From 24b2210fd2dd0164686522fdaa7e55de94a1a6ab Mon Sep 17 00:00:00 2001 From: PrashantDixit-dev Date: Tue, 14 Jan 2025 18:25:29 +0530 Subject: [PATCH] added data --- .../geospatial-recommendation.ipynb | 2456 +++---- examples/RASA_Customer-support-bot/main.ipynb | 3132 ++++---- .../Food_recommendation/main.ipynb | 6382 +++++++++++++---- .../Food_recommendation/main_food.csv | 401 ++ .../Food_recommendation/ratings.csv | 513 ++ .../cognee_multimedia_demo.ipynb | 26 +- 6 files changed, 8807 insertions(+), 4103 deletions(-) create mode 100644 examples/archived_examples/Food_recommendation/main_food.csv create mode 100644 examples/archived_examples/Food_recommendation/ratings.csv diff --git a/examples/Geospatial-Recommendation-System/geospatial-recommendation.ipynb b/examples/Geospatial-Recommendation-System/geospatial-recommendation.ipynb index e3140d1..1a306c8 100644 --- a/examples/Geospatial-Recommendation-System/geospatial-recommendation.ipynb +++ b/examples/Geospatial-Recommendation-System/geospatial-recommendation.ipynb @@ -1,1253 +1,1261 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "h3z5i2X45vTk" - }, - "source": [ - "### Geospatial Recommendation System\n", - "\n", - "![image-for-the-concept](https://github.com/lancedb/vectordb-recipes/blob/main/examples/Geospatial-Recommendation-System/Geospatial%20Recommendation%20System.png?raw=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BHGgH_UH5vTl" - }, - "source": [ - "In this tutorial, we'll enhance our restaurant recommendation system using Full Text Search (FTS) Indexes and Geospatial APIs.\n", - "\n", - "1. Extract User Preferences: Identify key details from user input such as preferred cuisines and location.\n", - "2. Construct Query String: Synthesize these details into a structured query string for searching.\n", - "3. Perform FTS Index Search: Use the query string to find relevant restaurant recommendations.\n", - "4. Apply Geospatial Filtering: Use a Geospatial API to locate the user and refine recommendations based on proximity.\n", - "\n", - "We can enhance later on by adding a filter to sort the recommendations based on distance" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a0k_ssua5vTn" - }, - "source": [ - "### Importing the relevant libraires" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "id": "A9QFbWf05yUT" - }, - "outputs": [], - "source": [ - "%%capture\n", - "!pip install lancedb pandas sentence-transformers requests openai tantivy" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "tFWV_ONGB2qN", - "outputId": "1b1cb49b-63d9-41c5-ea37-99e1446474b5" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2025-01-05 10:34:14-- https://drive.google.com/uc?export=download&id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM\n", - "Resolving drive.google.com (drive.google.com)... 74.125.126.139, 74.125.126.138, 74.125.126.102, ...\n", - "Connecting to drive.google.com (drive.google.com)|74.125.126.139|:443... connected.\n", - "HTTP request sent, awaiting response... 303 See Other\n", - "Location: https://drive.usercontent.google.com/download?id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM&export=download [following]\n", - "--2025-01-05 10:34:14-- https://drive.usercontent.google.com/download?id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM&export=download\n", - "Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 74.125.202.132, 2607:f8b0:4001:c06::84\n", - "Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|74.125.202.132|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 883287 (863K) [application/octet-stream]\n", - "Saving to: ‘data.csv’\n", - "\n", - "data.csv 100%[===================>] 862.58K --.-KB/s in 0.007s \n", - "\n", - "2025-01-05 10:34:17 (121 MB/s) - ‘data.csv’ saved [883287/883287]\n", - "\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "!wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM' -O data.csv" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 310 - }, - "id": "0-KnjGti5vTn", - "outputId": "f54bfd38-dd7d-4cff-ef57-9bbf53d41441" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"restaurant_data\",\n \"rows\": 8679,\n \"fields\": [\n {\n \"column\": \"Area\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 833,\n \"samples\": [\n \"Lakdi Ka Pul\",\n \"Umra Jakat\",\n \"Salt Lake Area\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"City\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Ahmedabad\",\n \"Hyderabad\",\n \"Delhi\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Restaurant\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7865,\n \"samples\": [\n \"Sri Radhe Chills And Thrills\",\n \"Rcb Bar & Cafe\",\n \"Momo Nation Cafe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.9379787288186,\n \"min\": 0.0,\n \"max\": 2500.0,\n \"num_unique_values\": 120,\n \"samples\": [\n 0.0,\n 140.0,\n 800.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Avg ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6476154836286703,\n \"min\": 2.0,\n \"max\": 5.0,\n \"num_unique_values\": 30,\n \"samples\": [\n 5.0,\n 3.5,\n 2.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Total ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 391,\n \"min\": 20,\n \"max\": 10000,\n \"num_unique_values\": 8,\n \"samples\": [\n 500,\n 5000,\n 100\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3734,\n \"samples\": [\n \"Punjabi,Thalis,North Indian,Tandoor,Snacks\",\n \"Gujarati,North Indian,Thalis\",\n \"Street Food,Fast Food,Snacks,Beverages\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Address\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2339,\n \"samples\": [\n \"Bheemanna Garden St Sri Ram Nagar\",\n \"Shiva Road Sector 7\",\n \"Oposite Hedua Park\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Delivery time\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14,\n \"min\": 20,\n \"max\": 109,\n \"num_unique_values\": 81,\n \"samples\": [\n 35,\n 59,\n 37\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe", - "variable_name": "restaurant_data" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AreaCityRestaurantPriceAvg ratingsTotal ratingsFood typeAddressDelivery time
0KoramangalaBangaloreTandoor Hut300.04.4100Biryani,Chinese,North Indian,South Indian5Th Block59
1KoramangalaBangaloreTunday Kababi300.04.1100Mughlai,Lucknowi5Th Block56
2JogupalyaBangaloreKim Lee650.04.4100ChineseDouble Road50
3IndiranagarBangaloreNew Punjabi Hotel250.03.9500North Indian,Punjabi,Tandoor,Chinese80 Feet Road57
4IndiranagarBangaloreNh8350.04.050Rajasthani,Gujarati,North Indian,Snacks,Desser...80 Feet Road63
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Area City Restaurant Price Avg ratings \\\n", - "0 Koramangala Bangalore Tandoor Hut 300.0 4.4 \n", - "1 Koramangala Bangalore Tunday Kababi 300.0 4.1 \n", - "2 Jogupalya Bangalore Kim Lee 650.0 4.4 \n", - "3 Indiranagar Bangalore New Punjabi Hotel 250.0 3.9 \n", - "4 Indiranagar Bangalore Nh8 350.0 4.0 \n", - "\n", - " Total ratings Food type \\\n", - "0 100 Biryani,Chinese,North Indian,South Indian \n", - "1 100 Mughlai,Lucknowi \n", - "2 100 Chinese \n", - "3 500 North Indian,Punjabi,Tandoor,Chinese \n", - "4 50 Rajasthani,Gujarati,North Indian,Snacks,Desser... \n", - "\n", - " Address Delivery time \n", - "0 5Th Block 59 \n", - "1 5Th Block 56 \n", - "2 Double Road 50 \n", - "3 80 Feet Road 57 \n", - "4 80 Feet Road 63 " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import lancedb\n", - "import pandas as pd\n", - "\n", - "restaurant_data = pd.read_csv(\"data.csv\")\n", - "restaurant_data = restaurant_data[restaurant_data.columns[1:]]\n", - "restaurant_data.dropna(inplace=True)\n", - "restaurant_data.drop_duplicates(inplace=True)\n", - "restaurant_data.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PTSumB1b5vTo" - }, - "source": [ - "### Embedding the relevant parts of the data." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jcVaV65s5vTp" - }, - "source": [ - "We will extract key information from the restaurant dataset columns and create a query string. This string will be encoded using our embedding model and then combined with additional data for storage in the Vector Database." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "5FUfug2G5vTp", - "outputId": "57b00934-da22-44d1-ff2b-15e8531231db" - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.10/dist-packages/sentence_transformers/SentenceTransformer.py:195: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v4 of SentenceTransformers.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "import os\n", - "from sentence_transformers import SentenceTransformer\n", - "from huggingface_hub import hf_hub_download\n", - "\n", - "os.environ[\"HUGGING_FACE_HUB_TOKEN\"] = \"****\"\n", - "\n", - "model = SentenceTransformer('paraphrase-MiniLM-L6-v2', use_auth_token=True)\n", - "data_points_vectors = []\n", - "\n", - "for _, row in restaurant_data.iterrows():\n", - " filter_cols = ['Food type', 'Avg ratings', 'Address']\n", - " data_point = \"#\".join(f\"{col}/{row[col]}\" for col in filter_cols)\n", - " data_points_vectors.append(data_point)\n", - "\n", - "# Add the new column to the DataFrame\n", - "restaurant_data[\"query_string\"] = data_points_vectors\n", - "\n", - "list_of_payloads = []\n", - "\n", - "for index, row in restaurant_data.iterrows():\n", - " encoded_vector = model.encode(row['query_string'])\n", - " payload = {\n", - " 'Area': row['Area'],\n", - " 'City': row['City'],\n", - " 'Restaurant': row['Restaurant'],\n", - " 'Price': row['Price'],\n", - " 'Avg_ratings': row['Avg ratings'],\n", - " 'Total_ratings': row['Total ratings'],\n", - " 'Food_type': row['Food type'],\n", - " 'Address': row['Address'],\n", - " 'Delivery_time': row['Delivery time'],\n", - " 'query_string': row['query_string'],\n", - " 'vector': encoded_vector\n", - " }\n", - "\n", - " list_of_payloads.append(payload)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jb9pMFm45vTq" - }, - "source": [ - "### Using the LanceDB database" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "V1XxLYrB5vTq" - }, - "outputs": [], - "source": [ - "# Connect to the LanceDB instance\n", - "uri = \"data\"\n", - "db = lancedb.connect(uri)\n", - "\n", - "lancedb_table = db.create_table(\"restaurant-geocoding-app\", data=list_of_payloads)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 487 - }, - "id": "_RTMOwEr5vTr", - "outputId": "7645d5b2-43ea-4d0c-e57c-5771a1f4216c" - }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"df\",\n \"rows\": 8679,\n \"fields\": [\n {\n \"column\": \"Area\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 833,\n \"samples\": [\n \"Lakdi Ka Pul\",\n \"Umra Jakat\",\n \"Salt Lake Area\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"City\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Ahmedabad\",\n \"Hyderabad\",\n \"Delhi\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Restaurant\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7865,\n \"samples\": [\n \"Sri Radhe Chills And Thrills\",\n \"Rcb Bar & Cafe\",\n \"Momo Nation Cafe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.9379787288186,\n \"min\": 0.0,\n \"max\": 2500.0,\n \"num_unique_values\": 120,\n \"samples\": [\n 0.0,\n 140.0,\n 800.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Avg_ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6476154836286703,\n \"min\": 2.0,\n \"max\": 5.0,\n \"num_unique_values\": 30,\n \"samples\": [\n 5.0,\n 3.5,\n 2.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Total_ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 391,\n \"min\": 20,\n \"max\": 10000,\n \"num_unique_values\": 8,\n \"samples\": [\n 500,\n 5000,\n 100\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3734,\n \"samples\": [\n \"Punjabi,Thalis,North Indian,Tandoor,Snacks\",\n \"Gujarati,North Indian,Thalis\",\n \"Street Food,Fast Food,Snacks,Beverages\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Address\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2339,\n \"samples\": [\n \"Bheemanna Garden St Sri Ram Nagar\",\n \"Shiva Road Sector 7\",\n \"Oposite Hedua Park\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Delivery_time\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14,\n \"min\": 20,\n \"max\": 109,\n \"num_unique_values\": 81,\n \"samples\": [\n 35,\n 59,\n 37\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"query_string\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8231,\n \"samples\": [\n \"Food type/Chinese,Indian#Avg ratings/3.7#Address/Rohini\",\n \"Food type/North Indian,Biryani,Chinese#Avg ratings/3.9#Address/Nampally\",\n \"Food type/North Indian,South Indian,Chinese,Continental,Punjabi#Avg ratings/4.1#Address/Warje\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe", - "variable_name": "df" - }, - "text/html": [ - "\n", - "
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
AreaCityRestaurantPriceAvg_ratingsTotal_ratingsFood_typeAddressDelivery_timequery_stringvector
0KoramangalaBangaloreTandoor Hut300.04.4100Biryani,Chinese,North Indian,South Indian5Th Block59Food type/Biryani,Chinese,North Indian,South I...[0.12830292, 0.14721094, -0.086350575, 0.08263...
1KoramangalaBangaloreTunday Kababi300.04.1100Mughlai,Lucknowi5Th Block56Food type/Mughlai,Lucknowi#Avg ratings/4.1#Add...[-0.10582731, 0.15009499, -0.35311985, 0.12081...
2JogupalyaBangaloreKim Lee650.04.4100ChineseDouble Road50Food type/Chinese#Avg ratings/4.4#Address/Doub...[-0.09362272, 0.16319357, 0.12415688, 0.012913...
3IndiranagarBangaloreNew Punjabi Hotel250.03.9500North Indian,Punjabi,Tandoor,Chinese80 Feet Road57Food type/North Indian,Punjabi,Tandoor,Chinese...[0.12705283, 0.17128171, 0.013174878, 0.239679...
4IndiranagarBangaloreNh8350.04.050Rajasthani,Gujarati,North Indian,Snacks,Desser...80 Feet Road63Food type/Rajasthani,Gujarati,North Indian,Sna...[0.08238438, 0.014472998, -0.11513413, 0.28430...
\n", - "
\n", - "
\n", - "\n", - "
\n", - " \n", - "\n", - " \n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "\n", - "
\n", - "
\n" - ], - "text/plain": [ - " Area City Restaurant Price Avg_ratings \\\n", - "0 Koramangala Bangalore Tandoor Hut 300.0 4.4 \n", - "1 Koramangala Bangalore Tunday Kababi 300.0 4.1 \n", - "2 Jogupalya Bangalore Kim Lee 650.0 4.4 \n", - "3 Indiranagar Bangalore New Punjabi Hotel 250.0 3.9 \n", - "4 Indiranagar Bangalore Nh8 350.0 4.0 \n", - "\n", - " Total_ratings Food_type \\\n", - "0 100 Biryani,Chinese,North Indian,South Indian \n", - "1 100 Mughlai,Lucknowi \n", - "2 100 Chinese \n", - "3 500 North Indian,Punjabi,Tandoor,Chinese \n", - "4 50 Rajasthani,Gujarati,North Indian,Snacks,Desser... \n", - "\n", - " Address Delivery_time \\\n", - "0 5Th Block 59 \n", - "1 5Th Block 56 \n", - "2 Double Road 50 \n", - "3 80 Feet Road 57 \n", - "4 80 Feet Road 63 \n", - "\n", - " query_string \\\n", - "0 Food type/Biryani,Chinese,North Indian,South I... \n", - "1 Food type/Mughlai,Lucknowi#Avg ratings/4.1#Add... \n", - "2 Food type/Chinese#Avg ratings/4.4#Address/Doub... \n", - "3 Food type/North Indian,Punjabi,Tandoor,Chinese... \n", - "4 Food type/Rajasthani,Gujarati,North Indian,Sna... \n", - "\n", - " vector \n", - "0 [0.12830292, 0.14721094, -0.086350575, 0.08263... \n", - "1 [-0.10582731, 0.15009499, -0.35311985, 0.12081... \n", - "2 [-0.09362272, 0.16319357, 0.12415688, 0.012913... \n", - "3 [0.12705283, 0.17128171, 0.013174878, 0.239679... \n", - "4 [0.08238438, 0.014472998, -0.11513413, 0.28430... " - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = lancedb_table.to_pandas()\n", - "df.head()" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "h3z5i2X45vTk" + }, + "source": [ + "### Geospatial Recommendation System\n", + "\n", + "![image-for-the-concept](https://github.com/lancedb/vectordb-recipes/blob/main/examples/Geospatial-Recommendation-System/Geospatial%20Recommendation%20System.png?raw=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BHGgH_UH5vTl" + }, + "source": [ + "In this tutorial, we'll enhance our restaurant recommendation system using Full Text Search (FTS) Indexes and Geospatial APIs.\n", + "\n", + "1. Extract User Preferences: Identify key details from user input such as preferred cuisines and location.\n", + "2. Construct Query String: Synthesize these details into a structured query string for searching.\n", + "3. Perform FTS Index Search: Use the query string to find relevant restaurant recommendations.\n", + "4. Apply Geospatial Filtering: Use a Geospatial API to locate the user and refine recommendations based on proximity.\n", + "\n", + "We can enhance later on by adding a filter to sort the recommendations based on distance" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a0k_ssua5vTn" + }, + "source": [ + "### Importing the relevant libraires" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "A9QFbWf05yUT" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install lancedb pandas sentence-transformers requests openai tantivy" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "tFWV_ONGB2qN", + "outputId": "1b1cb49b-63d9-41c5-ea37-99e1446474b5" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "pLEhtZhX5vTr" - }, - "source": [ - "### Query Transformation" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "--2025-01-05 10:34:14-- https://drive.google.com/uc?export=download&id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM\n", + "Resolving drive.google.com (drive.google.com)... 74.125.126.139, 74.125.126.138, 74.125.126.102, ...\n", + "Connecting to drive.google.com (drive.google.com)|74.125.126.139|:443... connected.\n", + "HTTP request sent, awaiting response... 303 See Other\n", + "Location: https://drive.usercontent.google.com/download?id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM&export=download [following]\n", + "--2025-01-05 10:34:14-- https://drive.usercontent.google.com/download?id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM&export=download\n", + "Resolving drive.usercontent.google.com (drive.usercontent.google.com)... 74.125.202.132, 2607:f8b0:4001:c06::84\n", + "Connecting to drive.usercontent.google.com (drive.usercontent.google.com)|74.125.202.132|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 883287 (863K) [application/octet-stream]\n", + "Saving to: ‘data.csv’\n", + "\n", + "data.csv 100%[===================>] 862.58K --.-KB/s in 0.007s \n", + "\n", + "2025-01-05 10:34:17 (121 MB/s) - ‘data.csv’ saved [883287/883287]\n", + "\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "!wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=17Div0ml4Nelr1C4QaGVJzC7lnMx--BkM' -O data.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 310 }, + "id": "0-KnjGti5vTn", + "outputId": "f54bfd38-dd7d-4cff-ef57-9bbf53d41441" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 36 - }, - "id": "mv9NzOea5vTs", - "outputId": "f1cc4e0a-57cd-4a76-ff2a-5bd5f403af50" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"restaurant_data\",\n \"rows\": 8679,\n \"fields\": [\n {\n \"column\": \"Area\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 833,\n \"samples\": [\n \"Lakdi Ka Pul\",\n \"Umra Jakat\",\n \"Salt Lake Area\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"City\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Ahmedabad\",\n \"Hyderabad\",\n \"Delhi\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Restaurant\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7865,\n \"samples\": [\n \"Sri Radhe Chills And Thrills\",\n \"Rcb Bar & Cafe\",\n \"Momo Nation Cafe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.9379787288186,\n \"min\": 0.0,\n \"max\": 2500.0,\n \"num_unique_values\": 120,\n \"samples\": [\n 0.0,\n 140.0,\n 800.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Avg ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6476154836286703,\n \"min\": 2.0,\n \"max\": 5.0,\n \"num_unique_values\": 30,\n \"samples\": [\n 5.0,\n 3.5,\n 2.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Total ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 391,\n \"min\": 20,\n \"max\": 10000,\n \"num_unique_values\": 8,\n \"samples\": [\n 500,\n 5000,\n 100\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3734,\n \"samples\": [\n \"Punjabi,Thalis,North Indian,Tandoor,Snacks\",\n \"Gujarati,North Indian,Thalis\",\n \"Street Food,Fast Food,Snacks,Beverages\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Address\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2339,\n \"samples\": [\n \"Bheemanna Garden St Sri Ram Nagar\",\n \"Shiva Road Sector 7\",\n \"Oposite Hedua Park\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Delivery time\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14,\n \"min\": 20,\n \"max\": 109,\n \"num_unique_values\": 81,\n \"samples\": [\n 35,\n 59,\n 37\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "restaurant_data" }, - "outputs": [ - { - "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "type": "string" - }, - "text/plain": [ - "'Food type/Biryani,Chinese,North Indian,South Indian#Avg ratings/4.4#Address/5Th Block'" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AreaCityRestaurantPriceAvg ratingsTotal ratingsFood typeAddressDelivery time
0KoramangalaBangaloreTandoor Hut300.04.4100Biryani,Chinese,North Indian,South Indian5Th Block59
1KoramangalaBangaloreTunday Kababi300.04.1100Mughlai,Lucknowi5Th Block56
2JogupalyaBangaloreKim Lee650.04.4100ChineseDouble Road50
3IndiranagarBangaloreNew Punjabi Hotel250.03.9500North Indian,Punjabi,Tandoor,Chinese80 Feet Road57
4IndiranagarBangaloreNh8350.04.050Rajasthani,Gujarati,North Indian,Snacks,Desser...80 Feet Road63
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "df[\"query_string\"][0]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DKGCJRSz5vTs" - }, - "source": [ - "### Extracting the specifics from the query\n", - "\n", - "Just like we pulled out the key details from our CSV to craft query strings, we’ll do the same with user queries. This step is important because it makes searching for the right recommendations much smoother. I mean doing so we can easily run the FTS Index Search." + "text/plain": [ + " Area City Restaurant Price Avg ratings \\\n", + "0 Koramangala Bangalore Tandoor Hut 300.0 4.4 \n", + "1 Koramangala Bangalore Tunday Kababi 300.0 4.1 \n", + "2 Jogupalya Bangalore Kim Lee 650.0 4.4 \n", + "3 Indiranagar Bangalore New Punjabi Hotel 250.0 3.9 \n", + "4 Indiranagar Bangalore Nh8 350.0 4.0 \n", + "\n", + " Total ratings Food type \\\n", + "0 100 Biryani,Chinese,North Indian,South Indian \n", + "1 100 Mughlai,Lucknowi \n", + "2 100 Chinese \n", + "3 500 North Indian,Punjabi,Tandoor,Chinese \n", + "4 50 Rajasthani,Gujarati,North Indian,Snacks,Desser... \n", + "\n", + " Address Delivery time \n", + "0 5Th Block 59 \n", + "1 5Th Block 56 \n", + "2 Double Road 50 \n", + "3 80 Feet Road 57 \n", + "4 80 Feet Road 63 " ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import lancedb\n", + "import pandas as pd\n", + "\n", + "restaurant_data = pd.read_csv(\"data.csv\")\n", + "restaurant_data = restaurant_data[restaurant_data.columns[1:]]\n", + "restaurant_data.dropna(inplace=True)\n", + "restaurant_data.drop_duplicates(inplace=True)\n", + "restaurant_data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PTSumB1b5vTo" + }, + "source": [ + "### Embedding the relevant parts of the data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jcVaV65s5vTp" + }, + "source": [ + "We will extract key information from the restaurant dataset columns and create a query string. This string will be encoded using our embedding model and then combined with additional data for storage in the Vector Database." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "5FUfug2G5vTp", + "outputId": "57b00934-da22-44d1-ff2b-15e8531231db" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9zlQCuSy5vTs", - "outputId": "0a2124e9-7ec5-4cee-e33d-366499d501d8" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"Food type\": \"Indian or Italian\",\n", - " \"Avg ratings\": None,\n", - " \"Address\": \"HSR Bangalore\"\n", - "}\n" - ] - } - ], - "source": [ - "from openai import OpenAI\n", - "OPENAI_API_KEY = \"****\"\n", - "client = OpenAI(api_key = OPENAI_API_KEY)\n", - "\n", - "\n", - "query_string = \"Hi, I am looking for a casual dining restaurant where Indian or Italian food is served near the HSR Bangalore\"\n", - "\n", - "# Helper prompt to extract structured data from ip_prompt\n", - "total_prompt = f\"\"\"Query String: {query_string}\\n\\n\\\n", - "Now from the query string above extract these following entities pinpoints:\n", - "1. Food type : Extract the food type\n", - "2. Avg ratings : Extract the average ratings\n", - "3. Address : Extract the current exact location, don't consider the fillers like \"near\" or \"nearby\".\n", - "\n", - "NOTE : For the Current location, try to understand the pin point location in the query string. Do not give any extra information. If you make the mistakes, bad things\n", - "will happen.\n", - "\n", - "Finally return a python dictionary using those points as keys and don't write the markdown of python. If value of a key is not mentioned, then set it as None.\n", - "\"\"\"\n", - "\n", - "# Make a request to OpenAI's API\n", - "completion = client.chat.completions.create(\n", - " model=\"gpt-4o\", # Use the appropriate model\n", - " store=True,\n", - " messages=[\n", - " {\"role\": \"user\", \"content\": total_prompt}\n", - " ]\n", - ")\n", - "\n", - "# Extract the generated text\n", - "content = completion.choices[0].message.content\n", - "print(content)" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sentence_transformers/SentenceTransformer.py:195: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v4 of SentenceTransformers.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "import os\n", + "from sentence_transformers import SentenceTransformer\n", + "from huggingface_hub import hf_hub_download\n", + "\n", + "os.environ[\"HUGGING_FACE_HUB_TOKEN\"] = \"****\"\n", + "\n", + "model = SentenceTransformer(\"paraphrase-MiniLM-L6-v2\", use_auth_token=True)\n", + "data_points_vectors = []\n", + "\n", + "for _, row in restaurant_data.iterrows():\n", + " filter_cols = [\"Food type\", \"Avg ratings\", \"Address\"]\n", + " data_point = \"#\".join(f\"{col}/{row[col]}\" for col in filter_cols)\n", + " data_points_vectors.append(data_point)\n", + "\n", + "# Add the new column to the DataFrame\n", + "restaurant_data[\"query_string\"] = data_points_vectors\n", + "\n", + "list_of_payloads = []\n", + "\n", + "for index, row in restaurant_data.iterrows():\n", + " encoded_vector = model.encode(row[\"query_string\"])\n", + " payload = {\n", + " \"Area\": row[\"Area\"],\n", + " \"City\": row[\"City\"],\n", + " \"Restaurant\": row[\"Restaurant\"],\n", + " \"Price\": row[\"Price\"],\n", + " \"Avg_ratings\": row[\"Avg ratings\"],\n", + " \"Total_ratings\": row[\"Total ratings\"],\n", + " \"Food_type\": row[\"Food type\"],\n", + " \"Address\": row[\"Address\"],\n", + " \"Delivery_time\": row[\"Delivery time\"],\n", + " \"query_string\": row[\"query_string\"],\n", + " \"vector\": encoded_vector,\n", + " }\n", + "\n", + " list_of_payloads.append(payload)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jb9pMFm45vTq" + }, + "source": [ + "### Using the LanceDB database" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "V1XxLYrB5vTq" + }, + "outputs": [], + "source": [ + "# Connect to the LanceDB instance\n", + "uri = \"data\"\n", + "db = lancedb.connect(uri)\n", + "\n", + "lancedb_table = db.create_table(\"restaurant-geocoding-app\", data=list_of_payloads)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 487 }, + "id": "_RTMOwEr5vTr", + "outputId": "7645d5b2-43ea-4d0c-e57c-5771a1f4216c" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "e-aRJyDl5vTt", - "outputId": "302894c4-f2c0-4a3f-acf9-076c70188492" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "summary": "{\n \"name\": \"df\",\n \"rows\": 8679,\n \"fields\": [\n {\n \"column\": \"Area\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 833,\n \"samples\": [\n \"Lakdi Ka Pul\",\n \"Umra Jakat\",\n \"Salt Lake Area\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"City\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 9,\n \"samples\": [\n \"Ahmedabad\",\n \"Hyderabad\",\n \"Delhi\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Restaurant\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7865,\n \"samples\": [\n \"Sri Radhe Chills And Thrills\",\n \"Rcb Bar & Cafe\",\n \"Momo Nation Cafe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 230.9379787288186,\n \"min\": 0.0,\n \"max\": 2500.0,\n \"num_unique_values\": 120,\n \"samples\": [\n 0.0,\n 140.0,\n 800.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Avg_ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6476154836286703,\n \"min\": 2.0,\n \"max\": 5.0,\n \"num_unique_values\": 30,\n \"samples\": [\n 5.0,\n 3.5,\n 2.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Total_ratings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 391,\n \"min\": 20,\n \"max\": 10000,\n \"num_unique_values\": 8,\n \"samples\": [\n 500,\n 5000,\n 100\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food_type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3734,\n \"samples\": [\n \"Punjabi,Thalis,North Indian,Tandoor,Snacks\",\n \"Gujarati,North Indian,Thalis\",\n \"Street Food,Fast Food,Snacks,Beverages\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Address\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2339,\n \"samples\": [\n \"Bheemanna Garden St Sri Ram Nagar\",\n \"Shiva Road Sector 7\",\n \"Oposite Hedua Park\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Delivery_time\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14,\n \"min\": 20,\n \"max\": 109,\n \"num_unique_values\": 81,\n \"samples\": [\n 35,\n 59,\n 37\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"query_string\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 8231,\n \"samples\": [\n \"Food type/Chinese,Indian#Avg ratings/3.7#Address/Rohini\",\n \"Food type/North Indian,Biryani,Chinese#Avg ratings/3.9#Address/Nampally\",\n \"Food type/North Indian,South Indian,Chinese,Continental,Punjabi#Avg ratings/4.1#Address/Warje\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", + "type": "dataframe", + "variable_name": "df" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Food type/Indian or Italian#Address/HSR Bangalore\n" - ] - } + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AreaCityRestaurantPriceAvg_ratingsTotal_ratingsFood_typeAddressDelivery_timequery_stringvector
0KoramangalaBangaloreTandoor Hut300.04.4100Biryani,Chinese,North Indian,South Indian5Th Block59Food type/Biryani,Chinese,North Indian,South I...[0.12830292, 0.14721094, -0.086350575, 0.08263...
1KoramangalaBangaloreTunday Kababi300.04.1100Mughlai,Lucknowi5Th Block56Food type/Mughlai,Lucknowi#Avg ratings/4.1#Add...[-0.10582731, 0.15009499, -0.35311985, 0.12081...
2JogupalyaBangaloreKim Lee650.04.4100ChineseDouble Road50Food type/Chinese#Avg ratings/4.4#Address/Doub...[-0.09362272, 0.16319357, 0.12415688, 0.012913...
3IndiranagarBangaloreNew Punjabi Hotel250.03.9500North Indian,Punjabi,Tandoor,Chinese80 Feet Road57Food type/North Indian,Punjabi,Tandoor,Chinese...[0.12705283, 0.17128171, 0.013174878, 0.239679...
4IndiranagarBangaloreNh8350.04.050Rajasthani,Gujarati,North Indian,Snacks,Desser...80 Feet Road63Food type/Rajasthani,Gujarati,North Indian,Sna...[0.08238438, 0.014472998, -0.11513413, 0.28430...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "source": [ - "import ast\n", - "\n", - "# Convert the string content to a dictionary\n", - "try:\n", - " response_dict = ast.literal_eval(content)\n", - "except (ValueError, SyntaxError) as e:\n", - " print(\"Error parsing the response:\", e)\n", - " response_dict = {}\n", - "\n", - "\n", - "filter_cols = ['Food type', 'Avg ratings', 'Address']\n", - "query_string_parts = [f\"{col}/{response_dict.get(col)}\" for col in filter_cols if response_dict.get(col)]\n", - "\n", - "query_string = \"#\".join(query_string_parts)\n", - "print((query_string))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Hc5lv7685vTt" - }, - "source": [ - "### Using LanceDB FTS for searching" + "text/plain": [ + " Area City Restaurant Price Avg_ratings \\\n", + "0 Koramangala Bangalore Tandoor Hut 300.0 4.4 \n", + "1 Koramangala Bangalore Tunday Kababi 300.0 4.1 \n", + "2 Jogupalya Bangalore Kim Lee 650.0 4.4 \n", + "3 Indiranagar Bangalore New Punjabi Hotel 250.0 3.9 \n", + "4 Indiranagar Bangalore Nh8 350.0 4.0 \n", + "\n", + " Total_ratings Food_type \\\n", + "0 100 Biryani,Chinese,North Indian,South Indian \n", + "1 100 Mughlai,Lucknowi \n", + "2 100 Chinese \n", + "3 500 North Indian,Punjabi,Tandoor,Chinese \n", + "4 50 Rajasthani,Gujarati,North Indian,Snacks,Desser... \n", + "\n", + " Address Delivery_time \\\n", + "0 5Th Block 59 \n", + "1 5Th Block 56 \n", + "2 Double Road 50 \n", + "3 80 Feet Road 57 \n", + "4 80 Feet Road 63 \n", + "\n", + " query_string \\\n", + "0 Food type/Biryani,Chinese,North Indian,South I... \n", + "1 Food type/Mughlai,Lucknowi#Avg ratings/4.1#Add... \n", + "2 Food type/Chinese#Avg ratings/4.4#Address/Doub... \n", + "3 Food type/North Indian,Punjabi,Tandoor,Chinese... \n", + "4 Food type/Rajasthani,Gujarati,North Indian,Sna... \n", + "\n", + " vector \n", + "0 [0.12830292, 0.14721094, -0.086350575, 0.08263... \n", + "1 [-0.10582731, 0.15009499, -0.35311985, 0.12081... \n", + "2 [-0.09362272, 0.16319357, 0.12415688, 0.012913... \n", + "3 [0.12705283, 0.17128171, 0.013174878, 0.239679... \n", + "4 [0.08238438, 0.014472998, -0.11513413, 0.28430... " ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = lancedb_table.to_pandas()\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLEhtZhX5vTr" + }, + "source": [ + "### Query Transformation" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 }, + "id": "mv9NzOea5vTs", + "outputId": "f1cc4e0a-57cd-4a76-ff2a-5bd5f403af50" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "id": "AF3TVoPx5vTt" + "data": { + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" }, - "outputs": [], - "source": [ - "# Create the FTS index and search\n", - "lancedb_table.create_fts_index(\"query_string\", replace=True)\n", - "results = lancedb_table.search(query_string).to_pandas()" + "text/plain": [ + "'Food type/Biryani,Chinese,North Indian,South Indian#Avg ratings/4.4#Address/5Th Block'" ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"query_string\"][0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DKGCJRSz5vTs" + }, + "source": [ + "### Extracting the specifics from the query\n", + "\n", + "Just like we pulled out the key details from our CSV to craft query strings, we’ll do the same with user queries. This step is important because it makes searching for the right recommendations much smoother. I mean doing so we can easily run the FTS Index Search." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "9zlQCuSy5vTs", + "outputId": "0a2124e9-7ec5-4cee-e33d-366499d501d8" + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "_Hpg2rzL5vTu" - }, - "source": [ - "### GeoSpatial Recommendation\n", - "\n", - "Ok now we will use the Google Geospatial API to pinpoint the exact locations of restaurants and find their coordinates. The next step is to calculate the distance between these restaurants and the user's location. For this, I am going to use the Haversine formula, which uses the coordinates of two points to draw an imaginary straight line between them, measuring the distance across the Earth's surface. There's some math behind how this formula works, but we'll keep things simple and focus on its application for now. " - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"Food type\": \"Indian or Italian\",\n", + " \"Avg ratings\": None,\n", + " \"Address\": \"HSR Bangalore\"\n", + "}\n" + ] + } + ], + "source": [ + "from openai import OpenAI\n", + "\n", + "OPENAI_API_KEY = \"****\"\n", + "client = OpenAI(api_key=OPENAI_API_KEY)\n", + "\n", + "\n", + "query_string = \"Hi, I am looking for a casual dining restaurant where Indian or Italian food is served near the HSR Bangalore\"\n", + "\n", + "# Helper prompt to extract structured data from ip_prompt\n", + "total_prompt = f\"\"\"Query String: {query_string}\\n\\n\\\n", + "Now from the query string above extract these following entities pinpoints:\n", + "1. Food type : Extract the food type\n", + "2. Avg ratings : Extract the average ratings\n", + "3. Address : Extract the current exact location, don't consider the fillers like \"near\" or \"nearby\".\n", + "\n", + "NOTE : For the Current location, try to understand the pin point location in the query string. Do not give any extra information. If you make the mistakes, bad things\n", + "will happen.\n", + "\n", + "Finally return a python dictionary using those points as keys and don't write the markdown of python. If value of a key is not mentioned, then set it as None.\n", + "\"\"\"\n", + "\n", + "# Make a request to OpenAI's API\n", + "completion = client.chat.completions.create(\n", + " model=\"gpt-4o\", # Use the appropriate model\n", + " store=True,\n", + " messages=[{\"role\": \"user\", \"content\": total_prompt}],\n", + ")\n", + "\n", + "# Extract the generated text\n", + "content = completion.choices[0].message.content\n", + "print(content)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "e-aRJyDl5vTt", + "outputId": "302894c4-f2c0-4a3f-acf9-076c70188492" + }, + "outputs": [ { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6M7wLlfO5vTu", - "outputId": "beaac5bb-30bf-4951-f516-1b758087dbbc" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Restaurant Name: Cafe Azzure\n", - "Distance: 8.06 km\n", - "Area: Ashok Nagar\n", - "Price: 1000.0\n", - "Coordinates: (12.975012, 77.6076558)\n", - "Cuisines Type: American,Italian\n", - "----------------------------------------\n", - "Restaurant Name: Hyderabad Biryaani House\n", - "Distance: 8.55 km\n", - "Area: Victoria Layout\n", - "Price: 499.0\n", - "Coordinates: (12.9715987, 77.5945627)\n", - "Cuisines Type: Indian\n", - "----------------------------------------\n", - "Restaurant Name: Aaliyar Ambur Dum Biryani\n", - "Distance: 7.53 km\n", - "Area: Ashok Nagar\n", - "Price: 200.0\n", - "Coordinates: (12.9694702, 77.60761529999999)\n", - "Cuisines Type: Indian\n", - "----------------------------------------\n", - "Restaurant Name: Jw Kitchen - Jw Marriott\n", - "Distance: 8.58 km\n", - "Area: Ashok Nagar\n", - "Price: 1000.0\n", - "Coordinates: (12.972231, 77.59495299999999)\n", - "Cuisines Type: Indian,Continental\n", - "----------------------------------------\n", - "Restaurant Name: The Ritz-Carlton - Ganache\n", - "Distance: 8.55 km\n", - "Area: Ashok Nagar\n", - "Price: 1000.0\n", - "Coordinates: (12.9715987, 77.5945627)\n", - "Cuisines Type: Indian,Bakery\n", - "----------------------------------------\n" - ] - } - ], - "source": [ - "import requests\n", - "import math\n", - "\n", - "def get_google_geocoding(address, api_key):\n", - " base_url = \"https://maps.googleapis.com/maps/api/geocode/json\"\n", - " params = {\"address\": address, \"key\": api_key}\n", - " response = requests.get(base_url, params=params)\n", - "\n", - " if response.status_code == 200:\n", - " result = response.json()\n", - " if result[\"status\"] == \"OK\":\n", - " latitude = result[\"results\"][0][\"geometry\"][\"location\"][\"lat\"]\n", - " longitude = result[\"results\"][0][\"geometry\"][\"location\"][\"lng\"]\n", - " return (latitude, longitude)\n", - " else:\n", - " print(f\"Google API: No results found for address: {address}\")\n", - " return None\n", - " else:\n", - " print(f\"Google API: Request failed for address: {address}\")\n", - " return None\n", - "\n", - "def haversine(coord1, coord2):\n", - " R = 6371.0 # Radius of the Earth in kilometers\n", - " lat1, lon1 = map(math.radians, coord1)\n", - " lat2, lon2 = map(math.radians, coord2)\n", - " dlat = lat2 - lat1\n", - " dlon = lon2 - lon1\n", - " a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2\n", - " c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))\n", - " distance = R * c\n", - " return distance\n", - "\n", - "def process_top_restaurants(data, current_location, api_key, top_n=5):\n", - " current_coords = get_google_geocoding(current_location, api_key)\n", - " if not current_coords:\n", - " return\n", - "\n", - " for index, row in data.head(top_n).iterrows():\n", - " complete_address = f\"{row['Restaurant']}, {row['City']}\"\n", - " restaurant_coords = get_google_geocoding(complete_address, api_key)\n", - " if restaurant_coords:\n", - " distance = haversine(current_coords, restaurant_coords)\n", - " print(f\"Restaurant Name: {row['Restaurant']}\")\n", - " print(f\"Distance: {distance:.2f} km\")\n", - " print(f\"Area: {row['Area']}\")\n", - " print(f\"Price: {row['Price']}\")\n", - " print(f\"Coordinates: {restaurant_coords}\")\n", - " print(f\"Cuisines Type: {row['Food_type']}\")\n", - " print(\"-\" * 40)\n", - "\n", - "# Example usage\n", - "GOOGLE_GEOSPATIAL_API_KEY = '****'\n", - "current_location = 'HSR, Bengaluru, India'\n", - "process_top_restaurants(results, current_location, GOOGLE_GEOSPATIAL_API_KEY, top_n=5)" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "Food type/Indian or Italian#Address/HSR Bangalore\n" + ] } - ], - "metadata": { + ], + "source": [ + "import ast\n", + "\n", + "# Convert the string content to a dictionary\n", + "try:\n", + " response_dict = ast.literal_eval(content)\n", + "except (ValueError, SyntaxError) as e:\n", + " print(\"Error parsing the response:\", e)\n", + " response_dict = {}\n", + "\n", + "\n", + "filter_cols = [\"Food type\", \"Avg ratings\", \"Address\"]\n", + "query_string_parts = [\n", + " f\"{col}/{response_dict.get(col)}\" for col in filter_cols if response_dict.get(col)\n", + "]\n", + "\n", + "query_string = \"#\".join(query_string_parts)\n", + "print((query_string))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hc5lv7685vTt" + }, + "source": [ + "### Using LanceDB FTS for searching" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "AF3TVoPx5vTt" + }, + "outputs": [], + "source": [ + "# Create the FTS index and search\n", + "lancedb_table.create_fts_index(\"query_string\", replace=True)\n", + "results = lancedb_table.search(query_string).to_pandas()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_Hpg2rzL5vTu" + }, + "source": [ + "### GeoSpatial Recommendation\n", + "\n", + "Ok now we will use the Google Geospatial API to pinpoint the exact locations of restaurants and find their coordinates. The next step is to calculate the distance between these restaurants and the user's location. For this, I am going to use the Haversine formula, which uses the coordinates of two points to draw an imaginary straight line between them, measuring the distance across the Earth's surface. There's some math behind how this formula works, but we'll keep things simple and focus on its application for now. " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "env", - "language": "python", - "name": "python3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.1" + "id": "6M7wLlfO5vTu", + "outputId": "beaac5bb-30bf-4951-f516-1b758087dbbc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Restaurant Name: Cafe Azzure\n", + "Distance: 8.06 km\n", + "Area: Ashok Nagar\n", + "Price: 1000.0\n", + "Coordinates: (12.975012, 77.6076558)\n", + "Cuisines Type: American,Italian\n", + "----------------------------------------\n", + "Restaurant Name: Hyderabad Biryaani House\n", + "Distance: 8.55 km\n", + "Area: Victoria Layout\n", + "Price: 499.0\n", + "Coordinates: (12.9715987, 77.5945627)\n", + "Cuisines Type: Indian\n", + "----------------------------------------\n", + "Restaurant Name: Aaliyar Ambur Dum Biryani\n", + "Distance: 7.53 km\n", + "Area: Ashok Nagar\n", + "Price: 200.0\n", + "Coordinates: (12.9694702, 77.60761529999999)\n", + "Cuisines Type: Indian\n", + "----------------------------------------\n", + "Restaurant Name: Jw Kitchen - Jw Marriott\n", + "Distance: 8.58 km\n", + "Area: Ashok Nagar\n", + "Price: 1000.0\n", + "Coordinates: (12.972231, 77.59495299999999)\n", + "Cuisines Type: Indian,Continental\n", + "----------------------------------------\n", + "Restaurant Name: The Ritz-Carlton - Ganache\n", + "Distance: 8.55 km\n", + "Area: Ashok Nagar\n", + "Price: 1000.0\n", + "Coordinates: (12.9715987, 77.5945627)\n", + "Cuisines Type: Indian,Bakery\n", + "----------------------------------------\n" + ] } + ], + "source": [ + "import requests\n", + "import math\n", + "\n", + "\n", + "def get_google_geocoding(address, api_key):\n", + " base_url = \"https://maps.googleapis.com/maps/api/geocode/json\"\n", + " params = {\"address\": address, \"key\": api_key}\n", + " response = requests.get(base_url, params=params)\n", + "\n", + " if response.status_code == 200:\n", + " result = response.json()\n", + " if result[\"status\"] == \"OK\":\n", + " latitude = result[\"results\"][0][\"geometry\"][\"location\"][\"lat\"]\n", + " longitude = result[\"results\"][0][\"geometry\"][\"location\"][\"lng\"]\n", + " return (latitude, longitude)\n", + " else:\n", + " print(f\"Google API: No results found for address: {address}\")\n", + " return None\n", + " else:\n", + " print(f\"Google API: Request failed for address: {address}\")\n", + " return None\n", + "\n", + "\n", + "def haversine(coord1, coord2):\n", + " R = 6371.0 # Radius of the Earth in kilometers\n", + " lat1, lon1 = map(math.radians, coord1)\n", + " lat2, lon2 = map(math.radians, coord2)\n", + " dlat = lat2 - lat1\n", + " dlon = lon2 - lon1\n", + " a = (\n", + " math.sin(dlat / 2) ** 2\n", + " + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2\n", + " )\n", + " c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))\n", + " distance = R * c\n", + " return distance\n", + "\n", + "\n", + "def process_top_restaurants(data, current_location, api_key, top_n=5):\n", + " current_coords = get_google_geocoding(current_location, api_key)\n", + " if not current_coords:\n", + " return\n", + "\n", + " for index, row in data.head(top_n).iterrows():\n", + " complete_address = f\"{row['Restaurant']}, {row['City']}\"\n", + " restaurant_coords = get_google_geocoding(complete_address, api_key)\n", + " if restaurant_coords:\n", + " distance = haversine(current_coords, restaurant_coords)\n", + " print(f\"Restaurant Name: {row['Restaurant']}\")\n", + " print(f\"Distance: {distance:.2f} km\")\n", + " print(f\"Area: {row['Area']}\")\n", + " print(f\"Price: {row['Price']}\")\n", + " print(f\"Coordinates: {restaurant_coords}\")\n", + " print(f\"Cuisines Type: {row['Food_type']}\")\n", + " print(\"-\" * 40)\n", + "\n", + "\n", + "# Example usage\n", + "GOOGLE_GEOSPATIAL_API_KEY = \"****\"\n", + "current_location = \"HSR, Bengaluru, India\"\n", + "process_top_restaurants(results, current_location, GOOGLE_GEOSPATIAL_API_KEY, top_n=5)" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "env", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.1" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/examples/RASA_Customer-support-bot/main.ipynb b/examples/RASA_Customer-support-bot/main.ipynb index 59a77e3..14a649c 100644 --- a/examples/RASA_Customer-support-bot/main.ipynb +++ b/examples/RASA_Customer-support-bot/main.ipynb @@ -1,1606 +1,1588 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# RASA x LanceDB x LLM : Conversational chatbot" + ], + "metadata": { + "id": "5AUJAGW2Rqmm" + } + }, + { + "cell_type": "markdown", + "source": [ + "Rasa is an open-source framework for building intelligent, contextual, and scalable conversational agents. It provides robust tools for **natural language understanding (NLU)**,**dialogue management** and **custom actions**, enabling the creation of sophisticated chatbots and virtual assistants tailored to specific business needs. With its flexible architecture, Rasa allows seamless integration with various APIs, databases, and machine learning models, facilitating the development of highly responsive and intelligent conversational experiences.\n", + "\n", + "Explore RASA at : https://rasa.com/docs/rasa/\n", + "\n", + "Explore LanceDB at : https://lancedb.github.io/lancedb/\n", + "\n", + "![](https://media4.giphy.com/media/v1.Y2lkPTc5MGI3NjExenZjZDhsbnp2cGxibW1zbmQ1Ymc2ZTgzbzBzbDRwZ2t3MzZsd3hvaCZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/S0hxMGYFhEMzm/giphy.webp)\n", + "\n", + "Today we will be building an **Advanced Customer Support Chatbot** using Rasa, LanceDB, and OpenAI api to deliver a great customer service experience. This chatbot is designed to handle a wide range of customer inquiries, provide accurate information, and offer personalized assistance, all while maintaining a natural and engaging conversational flow.\n", + "\n", + "\n", + "\n", + "\n" + ], + "metadata": { + "id": "vEkX0bDnPU15" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3Bz7xn6a6OdX" + }, + "outputs": [], + "source": [ + "# Install required packages\n", + "!pip install rasa lancedb openai==0.28 python-dotenv -q" + ] + }, + { + "cell_type": "code", + "source": [ + "# Initialize a new Rasa project which sets up the necessary directory structure and files.\n", + "!rasa init --no-prompt" + ], + "metadata": { "colab": { - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" + "id": "9fpWSlpm6iA4", + "outputId": "8c342841-5f2b-423e-8267-4a5ae8d241d1" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/usr/local/lib/python3.10/dist-packages/rasa/core/tracker_store.py:1044: MovedIn20Warning: \u001b[31mDeprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. \u001b[32mTo prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". \u001b[36mSet environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message.\u001b[0m (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", + " Base: DeclarativeMeta = declarative_base()\n", + "\u001b(0lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqk\u001b(B\n", + "\u001b(0x\u001b(B Rasa Open Source reports anonymous usage telemetry to help improve the product \u001b(0x\u001b(B\n", + "\u001b(0x\u001b(B for all its users. \u001b(0x\u001b(B\n", + "\u001b(0x\u001b(B \u001b(0x\u001b(B\n", + "\u001b(0x\u001b(B If you'd like to opt-out, you can use `rasa telemetry disable`. \u001b(0x\u001b(B\n", + "\u001b(0x\u001b(B To learn more, check out https://rasa.com/docs/rasa/telemetry/telemetry. \u001b(0x\u001b(B\n", + "\u001b(0mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqj\u001b(B\n", + "/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/validation.py:134: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html\n", + " import pkg_resources\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('ruamel')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "\u001b[92mWelcome to Rasa! 🤖\n", + "\u001b[0m\n", + "To get started quickly, an initial project will be created.\n", + "If you need some help, check out the documentation at https://rasa.com/docs/rasa.\n", + "\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating tests\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/tests/test_stories.yml -> ./tests\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/config.yml -> .\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/endpoints.yml -> .\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/credentials.yml -> .\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/domain.yml -> .\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating data\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/data/stories.yml -> ./data\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/data/nlu.yml -> ./data\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/data/rules.yml -> ./data\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating actions\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/__init__.py -> ./actions\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/actions.py -> ./actions\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating actions/__pycache__\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/__pycache__/__init__.cpython-310.pyc -> ./actions/__pycache__\n", + "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/__pycache__/actions.cpython-310.pyc -> ./actions/__pycache__\n", + "Created project directory at '/content'.\n", + "\u001b[92mFinished creating project structure.\u001b[0m\n", + "\u001b[92mTraining an initial model...\u001b[0m\n", + "/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/util.py:52: DeprecationWarning: jax.xla_computation is deprecated. Please use the AOT APIs.\n", + " from jax import xla_computation as _xla_computation\n", + "2024-12-30 20:25:49 \u001b[1;30mINFO \u001b[0m \u001b[34mnumexpr.utils\u001b[0m - NumExpr defaulting to 2 threads.\n", + "\u001b[94mThe configuration for policies and pipeline was chosen automatically. It was written into the config file at 'config.yml'.\u001b[0m\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'RegexFeaturizer'.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'RegexFeaturizer'.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'LexicalSyntacticFeaturizer'.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'LexicalSyntacticFeaturizer'.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'CountVectorsFeaturizer'.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer\u001b[0m - 80 vocabulary items were created for text attribute.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'CountVectorsFeaturizer'.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'CountVectorsFeaturizer'.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer\u001b[0m - 697 vocabulary items were created for text attribute.\n", + "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'CountVectorsFeaturizer'.\n", + "2024-12-30 20:25:54 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'DIETClassifier'.\n", + "Epochs: 100% 100/100 [00:42<00:00, 2.33it/s, t_loss=1.15, i_acc=1]\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'DIETClassifier'.\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'EntitySynonymMapper'.\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'EntitySynonymMapper'.\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'ResponseSelector'.\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.nlu.selectors.response_selector\u001b[0m - Retrieval intent parameter was left to its default value. This response selector will be trained on training examples combining all retrieval intents.\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'ResponseSelector'.\n", + "Processed story blocks: 100% 3/3 [00:00<00:00, 2026.89it/s, # trackers=1]\n", + "Processed story blocks: 100% 3/3 [00:00<00:00, 971.43it/s, # trackers=3]\n", + "Processed story blocks: 100% 3/3 [00:00<00:00, 265.83it/s, # trackers=12]\n", + "Processed story blocks: 100% 3/3 [00:00<00:00, 70.43it/s, # trackers=39]\n", + "Processed rules: 100% 2/2 [00:00<00:00, 2641.25it/s, # trackers=1]\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'MemoizationPolicy'.\n", + "Processed trackers: 100% 3/3 [00:00<00:00, 1329.27it/s, # action=12]\n", + "Processed actions: 12it [00:00, 5129.08it/s, # examples=12]\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'MemoizationPolicy'.\n", + "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'RulePolicy'.\n", + "Processed trackers: 100% 2/2 [00:00<00:00, 1590.86it/s, # action=5]\n", + "Processed actions: 5it [00:00, 11137.29it/s, # examples=4]\n", + "Processed trackers: 100% 3/3 [00:00<00:00, 1967.92it/s, # action=12]\n", + "Processed trackers: 100% 2/2 [00:00<00:00, 1624.44it/s]\n", + "Processed trackers: 100% 5/5 [00:00<00:00, 1416.71it/s]\n", + "2024-12-30 20:26:51 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'RulePolicy'.\n", + "2024-12-30 20:26:51 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'TEDPolicy'.\n", + "Processed trackers: 100% 120/120 [00:00<00:00, 1240.87it/s, # action=30]\n", + "Epochs: 100% 100/100 [00:22<00:00, 4.52it/s, t_loss=1.96, loss=1.8, acc=1]\n", + "2024-12-30 20:27:14 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'TEDPolicy'.\n", + "2024-12-30 20:27:14 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'UnexpecTEDIntentPolicy'.\n", + "2024-12-30 20:27:14 \u001b[1;30mWARNING \u001b[0m \u001b[34mrasa.shared.utils.common\u001b[0m - \u001b[33mThe UnexpecTED Intent Policy is currently experimental and might change or be removed in the future 🔬 Please share your feedback on it in the forum (https://forum.rasa.com) to help us make this feature ready for production.\u001b[0m\n", + "Processed trackers: 100% 120/120 [00:00<00:00, 3661.95it/s, # intent=12]\n", + "Epochs: 100% 100/100 [00:23<00:00, 4.17it/s, t_loss=0.134, loss=0.0164, acc=1]\n", + "2024-12-30 20:27:42 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'UnexpecTEDIntentPolicy'.\n", + "\u001b[92mYour Rasa model is trained and saved at 'models/20241230-202552-figurative-chronology.tar.gz'.\u001b[0m\n", + "If you want to speak to the assistant, run 'rasa shell' at any time inside the project directory.\n" + ] } + ] }, - "cells": [ - { - "cell_type": "markdown", - "source": [ - "# RASA x LanceDB x LLM : Conversational chatbot" - ], - "metadata": { - "id": "5AUJAGW2Rqmm" - } - }, - { - "cell_type": "markdown", - "source": [ - "Rasa is an open-source framework for building intelligent, contextual, and scalable conversational agents. It provides robust tools for **natural language understanding (NLU)**,**dialogue management** and **custom actions**, enabling the creation of sophisticated chatbots and virtual assistants tailored to specific business needs. With its flexible architecture, Rasa allows seamless integration with various APIs, databases, and machine learning models, facilitating the development of highly responsive and intelligent conversational experiences.\n", - "\n", - "Explore RASA at : https://rasa.com/docs/rasa/\n", - "\n", - "Explore LanceDB at : https://lancedb.github.io/lancedb/\n", - "\n", - "![](https://media4.giphy.com/media/v1.Y2lkPTc5MGI3NjExenZjZDhsbnp2cGxibW1zbmQ1Ymc2ZTgzbzBzbDRwZ2t3MzZsd3hvaCZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/S0hxMGYFhEMzm/giphy.webp)\n", - "\n", - "Today we will be building an **Advanced Customer Support Chatbot** using Rasa, LanceDB, and OpenAI api to deliver a great customer service experience. This chatbot is designed to handle a wide range of customer inquiries, provide accurate information, and offer personalized assistance, all while maintaining a natural and engaging conversational flow.\n", - "\n", - "\n", - "\n", - "\n" - ], - "metadata": { - "id": "vEkX0bDnPU15" - } - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3Bz7xn6a6OdX" - }, - "outputs": [], - "source": [ - "# Install required packages\n", - "!pip install rasa lancedb openai==0.28 python-dotenv -q" - ] - }, - { - "cell_type": "code", - "source": [ - "#Initialize a new Rasa project which sets up the necessary directory structure and files.\n", - "!rasa init --no-prompt" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9fpWSlpm6iA4", - "outputId": "8c342841-5f2b-423e-8267-4a5ae8d241d1" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/usr/local/lib/python3.10/dist-packages/rasa/core/tracker_store.py:1044: MovedIn20Warning: \u001b[31mDeprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. \u001b[32mTo prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". \u001b[36mSet environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message.\u001b[0m (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " Base: DeclarativeMeta = declarative_base()\n", - "\u001b(0lqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqk\u001b(B\n", - "\u001b(0x\u001b(B Rasa Open Source reports anonymous usage telemetry to help improve the product \u001b(0x\u001b(B\n", - "\u001b(0x\u001b(B for all its users. \u001b(0x\u001b(B\n", - "\u001b(0x\u001b(B \u001b(0x\u001b(B\n", - "\u001b(0x\u001b(B If you'd like to opt-out, you can use `rasa telemetry disable`. \u001b(0x\u001b(B\n", - "\u001b(0x\u001b(B To learn more, check out https://rasa.com/docs/rasa/telemetry/telemetry. \u001b(0x\u001b(B\n", - "\u001b(0mqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqj\u001b(B\n", - "/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/validation.py:134: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html\n", - " import pkg_resources\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('ruamel')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "\u001b[92mWelcome to Rasa! 🤖\n", - "\u001b[0m\n", - "To get started quickly, an initial project will be created.\n", - "If you need some help, check out the documentation at https://rasa.com/docs/rasa.\n", - "\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating tests\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/tests/test_stories.yml -> ./tests\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/config.yml -> .\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/endpoints.yml -> .\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/credentials.yml -> .\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/domain.yml -> .\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating data\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/data/stories.yml -> ./data\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/data/nlu.yml -> ./data\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/data/rules.yml -> ./data\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating actions\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/__init__.py -> ./actions\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/actions.py -> ./actions\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - creating actions/__pycache__\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/__pycache__/__init__.cpython-310.pyc -> ./actions/__pycache__\n", - "2024-12-30 20:25:43 \u001b[1;30mINFO \u001b[0m \u001b[34mroot\u001b[0m - copying /usr/local/lib/python3.10/dist-packages/rasa/cli/initial_project/actions/__pycache__/actions.cpython-310.pyc -> ./actions/__pycache__\n", - "Created project directory at '/content'.\n", - "\u001b[92mFinished creating project structure.\u001b[0m\n", - "\u001b[92mTraining an initial model...\u001b[0m\n", - "/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/util.py:52: DeprecationWarning: jax.xla_computation is deprecated. Please use the AOT APIs.\n", - " from jax import xla_computation as _xla_computation\n", - "2024-12-30 20:25:49 \u001b[1;30mINFO \u001b[0m \u001b[34mnumexpr.utils\u001b[0m - NumExpr defaulting to 2 threads.\n", - "\u001b[94mThe configuration for policies and pipeline was chosen automatically. It was written into the config file at 'config.yml'.\u001b[0m\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'RegexFeaturizer'.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'RegexFeaturizer'.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'LexicalSyntacticFeaturizer'.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'LexicalSyntacticFeaturizer'.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'CountVectorsFeaturizer'.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer\u001b[0m - 80 vocabulary items were created for text attribute.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'CountVectorsFeaturizer'.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'CountVectorsFeaturizer'.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.nlu.featurizers.sparse_featurizer.count_vectors_featurizer\u001b[0m - 697 vocabulary items were created for text attribute.\n", - "2024-12-30 20:25:53 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'CountVectorsFeaturizer'.\n", - "2024-12-30 20:25:54 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'DIETClassifier'.\n", - "Epochs: 100% 100/100 [00:42<00:00, 2.33it/s, t_loss=1.15, i_acc=1]\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'DIETClassifier'.\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'EntitySynonymMapper'.\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'EntitySynonymMapper'.\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'ResponseSelector'.\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.nlu.selectors.response_selector\u001b[0m - Retrieval intent parameter was left to its default value. This response selector will be trained on training examples combining all retrieval intents.\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'ResponseSelector'.\n", - "Processed story blocks: 100% 3/3 [00:00<00:00, 2026.89it/s, # trackers=1]\n", - "Processed story blocks: 100% 3/3 [00:00<00:00, 971.43it/s, # trackers=3]\n", - "Processed story blocks: 100% 3/3 [00:00<00:00, 265.83it/s, # trackers=12]\n", - "Processed story blocks: 100% 3/3 [00:00<00:00, 70.43it/s, # trackers=39]\n", - "Processed rules: 100% 2/2 [00:00<00:00, 2641.25it/s, # trackers=1]\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'MemoizationPolicy'.\n", - "Processed trackers: 100% 3/3 [00:00<00:00, 1329.27it/s, # action=12]\n", - "Processed actions: 12it [00:00, 5129.08it/s, # examples=12]\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'MemoizationPolicy'.\n", - "2024-12-30 20:26:50 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'RulePolicy'.\n", - "Processed trackers: 100% 2/2 [00:00<00:00, 1590.86it/s, # action=5]\n", - "Processed actions: 5it [00:00, 11137.29it/s, # examples=4]\n", - "Processed trackers: 100% 3/3 [00:00<00:00, 1967.92it/s, # action=12]\n", - "Processed trackers: 100% 2/2 [00:00<00:00, 1624.44it/s]\n", - "Processed trackers: 100% 5/5 [00:00<00:00, 1416.71it/s]\n", - "2024-12-30 20:26:51 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'RulePolicy'.\n", - "2024-12-30 20:26:51 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'TEDPolicy'.\n", - "Processed trackers: 100% 120/120 [00:00<00:00, 1240.87it/s, # action=30]\n", - "Epochs: 100% 100/100 [00:22<00:00, 4.52it/s, t_loss=1.96, loss=1.8, acc=1]\n", - "2024-12-30 20:27:14 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'TEDPolicy'.\n", - "2024-12-30 20:27:14 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Starting to train component 'UnexpecTEDIntentPolicy'.\n", - "2024-12-30 20:27:14 \u001b[1;30mWARNING \u001b[0m \u001b[34mrasa.shared.utils.common\u001b[0m - \u001b[33mThe UnexpecTED Intent Policy is currently experimental and might change or be removed in the future 🔬 Please share your feedback on it in the forum (https://forum.rasa.com) to help us make this feature ready for production.\u001b[0m\n", - "Processed trackers: 100% 120/120 [00:00<00:00, 3661.95it/s, # intent=12]\n", - "Epochs: 100% 100/100 [00:23<00:00, 4.17it/s, t_loss=0.134, loss=0.0164, acc=1]\n", - "2024-12-30 20:27:42 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Finished training component 'UnexpecTEDIntentPolicy'.\n", - "\u001b[92mYour Rasa model is trained and saved at 'models/20241230-202552-figurative-chronology.tar.gz'.\u001b[0m\n", - "If you want to speak to the assistant, run 'rasa shell' at any time inside the project directory.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "# Quick overview of RASA x LanceDB Integration Workflow\n", - "\n", - "**Step 1 : Define knowledge_data and store it in LanceDB:**\n", - "* Use LanceDB to store and manage structured knowledge data relevant to customer support.\n", - "\n", - "**Step 2 : Configure Rasa Files:**\n", - "- nlu.yml: Train intent recognition and entity extraction.\n", - "- stories.yml & rules.yml: Define conversational flows and rules.\n", - "- domain.yml: Specify intents, entities, actions, and responses.\n", - "- config.yml: Set up the NLP pipeline and policies.\n", - "\n", - "**Step 3 : Implement Custom Actions (actions.py):**\n", - "- Create actions that query LanceDB for relevant information based on user intents.\n", - "- Integrate OpenAI’s LLM to generate nuanced and context-aware responses.\n", - "\n", - "**Step 4 : Train the Model (rasa train):**\n", - "- Compile and optimize the Rasa model based on the latest configurations and training data.\n", - "\n", - "**Step 5 : Run Servers:**\n", - "- Action Server: Executes custom actions.\n", - "- Rasa Server: Handles user interactions and manages dialogue using the trained model.\n", - "\n", - "**Step 6 : Deploy and Test:**\n", - "- Interact with the chatbot to ensure it accurately understands queries, retrieves information from LanceDB, and generates appropriate responses via OpenAI’s LLM." - ], - "metadata": { - "id": "IFuOxNwrGbd4" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Creating and storing the Knowledge base of Company Support in a Lance Table" - ], - "metadata": { - "id": "vC1Ce1j_NxwF" - } - }, - { - "cell_type": "markdown", - "source": [ - "Let's create a dataset containing customer support information. We will use this data to populate LanceDB table.\n", - "\n", - "It contains a list of dictionaries where each dictionary represents a piece of knowledge or an FAQ entry. Each entry has a \"content\" key corresponding to the support information." - ], - "metadata": { - "id": "oriMmk0BNBG1" - } - }, - { - "cell_type": "code", - "source": [ - "# This dataset covers various categories such as Account Management, Billing & Payments, Technical Support, Shipping & Delivery, Returns & Refunds, Product Information, and Company Policies.\n", - "\n", - "company_support_data = [\n", - " # === Account Management ===\n", - " {\n", - " \"content\": \"To reset your password, navigate to the login page and click on 'Forgot Password'. You'll receive an email with instructions to create a new password.\"\n", - " },\n", - " {\n", - " \"content\": \"To update your account information, log in to your profile and click on 'Edit Profile'. From there, you can change your email, phone number, and other personal details.\"\n", - " },\n", - " {\n", - " \"content\": \"If you want to delete your account, please contact our support team at support@yourcompany.com with the subject line 'Account Deletion Request'. We'll assist you promptly.\"\n", - " },\n", - " {\n", - " \"content\": \"To change your username, go to your account settings, select 'Username', and enter your desired new username. If it's available, the change will be applied immediately.\"\n", - " },\n", - "\n", - " # === Billing & Payments ===\n", - " {\n", - " \"content\": \"We accept various payment methods including Visa, MasterCard, American Express, PayPal, and Apple Pay.\"\n", - " },\n", - " {\n", - " \"content\": \"To view your billing history, log in to your account, navigate to the 'Billing' section, and select 'Billing History'. You'll see a list of all your past transactions.\"\n", - " },\n", - " {\n", - " \"content\": \"If you believe you've been incorrectly charged, please contact our billing support at billing@yourcompany.com with your order ID and details of the discrepancy.\"\n", - " },\n", - " {\n", - " \"content\": \"Your subscription will automatically renew on the renewal date unless you cancel it at least 24 hours before the renewal date.\"\n", - " },\n", - "\n", - " # === Technical Support ===\n", - " {\n", - " \"content\": \"If you encounter issues while using our platform, try clearing your browser cache and cookies, then restart your browser.\"\n", - " },\n", - " {\n", - " \"content\": \"For mobile app issues, ensure that you have the latest version of the app installed. You can update the app from the App Store or Google Play Store.\"\n", - " },\n", - " {\n", - " \"content\": \"To report a bug, please email us at support@yourcompany.com with a detailed description of the issue and any relevant screenshots.\"\n", - " },\n", - " {\n", - " \"content\": \"Our platform requires JavaScript to be enabled. Please check your browser settings to ensure JavaScript is turned on.\"\n", - " },\n", - "\n", - " # === Shipping & Delivery ===\n", - " {\n", - " \"content\": \"We offer free standard shipping on all orders over $50. Standard shipping typically takes 5-7 business days.\"\n", - " },\n", - " {\n", - " \"content\": \"Express shipping options are available at an additional cost. Delivery usually takes 1-3 business days.\"\n", - " },\n", - " {\n", - " \"content\": \"You can track your order status by logging into your account and visiting the 'Orders' section. Click on your order to see detailed tracking information.\"\n", - " },\n", - " {\n", - " \"content\": \"If your package is delayed, please contact our shipping department at shipping@yourcompany.com with your order number for assistance.\"\n", - " },\n", - "\n", - " # === Returns & Refunds ===\n", - " {\n", - " \"content\": \"You can return most items within 30 days of purchase. Items must be in original condition and packaging.\"\n", - " },\n", - " {\n", - " \"content\": \"To initiate a return, log in to your account, go to the 'Orders' section, select the order you wish to return, and click on 'Start Return'. Follow the on-screen instructions.\"\n", - " },\n", - " {\n", - " \"content\": \"Refunds are processed within 7-10 business days after we receive your returned item. The refund will be issued to your original payment method.\"\n", - " },\n", - " {\n", - " \"content\": \"Please note that shipping costs are non-refundable unless the return is due to a defect or error on our part.\"\n", - " },\n", - "\n", - " # === Order Management ===\n", - " {\n", - " \"content\": \"To modify your order after placing it, please contact our support team at support@yourcompany.com as soon as possible. Changes can only be made within one hour of order placement.\"\n", - " },\n", - " {\n", - " \"content\": \"If you received the wrong item, please contact us immediately with your order number and a photo of the received product. We'll arrange for a replacement or refund.\"\n", - " },\n", - " {\n", - " \"content\": \"To cancel your order, log in to your account, go to the 'Orders' section, select the order you wish to cancel, and click on 'Cancel Order'.\"\n", - " },\n", - " {\n", - " \"content\": \"Parts of your order may ship separately. You will receive separate tracking information for each shipment.\"\n", - " },\n", - "\n", - " # === Account Security ===\n", - " {\n", - " \"content\": \"For enhanced security, enable two-factor authentication (2FA) in your account settings. This adds an extra layer of protection to your account.\"\n", - " },\n", - " {\n", - " \"content\": \"If you suspect unauthorized access to your account, change your password immediately and contact our support team.\"\n", - " },\n", - " {\n", - " \"content\": \"Never share your account credentials with anyone. We will never ask for your password via email or phone.\"\n", - " },\n", - " {\n", - " \"content\": \"Regularly review your account activity to ensure all actions are authorized. You can view your recent activity in the 'Account Activity' section.\"\n", - " },\n", - "\n", - " # === Shipping Issues ===\n", - " {\n", - " \"content\": \"If your package hasn't arrived within the estimated delivery time, please use the tracking number in your order details to check its status, or contact our shipping support at shipping@yourcompany.com.\"\n", - " },\n", - " {\n", - " \"content\": \"For international shipments, please note that customs processing times may affect delivery dates. We are not responsible for delays caused by customs.\"\n", - " },\n", - " {\n", - " \"content\": \"Ensure that your shipping address is correct before placing an order. Incorrect addresses may lead to delivery delays or package returns.\"\n", - " },\n", - " {\n", - " \"content\": \"We offer shipment insurance for an additional fee. This covers lost or damaged packages during transit. To add insurance, select the option during checkout.\"\n", - " },\n", - "\n", - " # === Product Availability ===\n", - " {\n", - " \"content\": \"If a product is out of stock, you can sign up for restock notifications on the product page. We'll notify you via email once it's available again.\"\n", - " },\n", - " {\n", - " \"content\": \"Limited edition items are subject to availability. We recommend placing your orders early to secure these exclusive products.\"\n", - " },\n", - " {\n", - " \"content\": \"Pre-orders are accepted for upcoming releases. Pre-order now to ensure you receive the product as soon as it ships.\"\n", - " },\n", - " {\n", - " \"content\": \"Sold-out items cannot be purchased until new stock arrives. Please check back regularly or join our waitlist.\"\n", - " },\n", - "\n", - " # === Shipping Costs ===\n", - " {\n", - " \"content\": \"Shipping costs vary based on your location and the size of your order. You can view the estimated shipping cost during the checkout process.\"\n", - " },\n", - " {\n", - " \"content\": \"We offer free standard shipping on orders over $75. Orders below this threshold will incur standard shipping fees.\"\n", - " },\n", - " {\n", - " \"content\": \"Express shipping is available for an additional fee. If you need your order faster, select the express option at checkout.\"\n", - " },\n", - " {\n", - " \"content\": \"Please note that international shipping fees will apply for orders shipped outside the United States.\"\n", - " },\n", - "\n", - " # === Subscription Services ===\n", - " {\n", - " \"content\": \"Subscribe to our monthly box to receive exclusive products and discounts. You can manage your subscription in your account settings.\"\n", - " },\n", - " {\n", - " \"content\": \"Canceling your subscription can be done at any time. Please visit the 'Subscriptions' section in your account to make changes.\"\n", - " },\n", - " {\n", - " \"content\": \"Subscribers receive early access to new products and special promotions throughout the year.\"\n", - " },\n", - " {\n", - " \"content\": \"Pause your subscription if you need a break. You can resume it whenever you're ready by logging into your account.\"\n", - " },\n", - "\n", - " # === Order Tracking ===\n", - " {\n", - " \"content\": \"To track your order, log in to your account, go to the 'Orders' section, and click on 'Track Order' next to the relevant order.\"\n", - " },\n", - " {\n", - " \"content\": \"You'll receive email notifications with tracking updates once your order has been shipped.\"\n", - " },\n", - " {\n", - " \"content\": \"If you haven't received your tracking information, please check your spam folder or contact our support team.\"\n", - " },\n", - " {\n", - " \"content\": \"For real-time tracking, use the tracking number provided and visit the carrier's website directly.\"\n", - " },\n", - "\n", - " # === Account Reactivation ===\n", - " {\n", - " \"content\": \"If you've previously closed your account and wish to reactivate it, please contact our support team at support@yourcompany.com with your account details.\"\n", - " },\n", - " {\n", - " \"content\": \"Reactivating your account will restore your previous order history and saved preferences.\"\n", - " },\n", - " {\n", - " \"content\": \"Please note that some data may have been permanently deleted if the account was closed more than six months ago.\"\n", - " },\n", - " {\n", - " \"content\": \"Upon reactivation, you may need to update your password and verify your email address for security purposes.\"\n", - " },\n", - "\n", - " # === Payment Issues ===\n", - " {\n", - " \"content\": \"If your payment is declined, please verify that your billing information is correct and that your card has sufficient funds.\"\n", - " },\n", - " {\n", - " \"content\": \"For any issues with payments made through PayPal, please ensure your account is in good standing and that your linked bank accounts are active.\"\n", - " },\n", - " {\n", - " \"content\": \"Expired or invalid credit cards cannot be used. Please update your payment method in your account settings.\"\n", - " },\n", - " {\n", - " \"content\": \"If you've been double-charged, contact our billing support at billing@yourcompany.com with your order ID and transaction details.\"\n", - " },\n", - "\n", - " # === Warranty Information ===\n", - " {\n", - " \"content\": \"All our products come with a one-year warranty covering manufacturing defects and hardware malfunctions.\"\n", - " },\n", - " {\n", - " \"content\": \"To claim a warranty, contact our support team with your order number and a description of the issue.\"\n", - " },\n", - " {\n", - " \"content\": \"Warranty does not cover accidental damage, unauthorized repairs, or normal wear and tear.\"\n", - " },\n", - " {\n", - " \"content\": \"Extended warranty options are available at the time of purchase for an additional fee.\"\n", - " },\n", - "\n", - " # === Privacy & Data Security ===\n", - " {\n", - " \"content\": \"We prioritize your privacy and ensure that your personal data is protected. Please refer to our Privacy Policy for detailed information.\"\n", - " },\n", - " {\n", - " \"content\": \"You have control over your personal information. Manage your privacy settings in the 'Account Settings' section.\"\n", - " },\n", - " {\n", - " \"content\": \"We do not share your personal information with third parties without your explicit consent, except as required by law.\"\n", - " },\n", - " {\n", - " \"content\": \"For any concerns about data security or privacy, contact our privacy team at privacy@yourcompany.com.\"\n", - " },\n", - "\n", - " # === Shipping Restrictions ===\n", - " {\n", - " \"content\": \"We currently do not ship to P.O. Boxes or APO/FPO addresses. Please provide a valid residential or business address.\"\n", - " },\n", - " {\n", - " \"content\": \"Certain products may have shipping restrictions based on international regulations. Check the product page for any specific restrictions.\"\n", - " },\n", - " {\n", - " \"content\": \"Hazardous materials and perishable items have special shipping requirements. Ensure compliance by reviewing our shipping guidelines.\"\n", - " },\n", - " {\n", - " \"content\": \"Import duties and taxes are the responsibility of the recipient for international orders. These fees are not included in the shipping cost.\"\n", - " },\n", - "\n", - " # === Frequently Asked Questions (FAQs) ===\n", - " {\n", - " \"content\": \"Q: How can I track my order?\\nA: You can track your order by logging into your account, navigating to the 'Orders' section, and clicking on 'Track Order' next to your order.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: What is your return policy?\\nA: We accept returns within 30 days of purchase. Items must be in original condition and packaging. To initiate a return, visit the 'Orders' section in your account.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: Do you offer international shipping?\\nA: Yes, we ship to select countries internationally. Shipping fees and delivery times may vary based on location.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: How do I change my shipping address?\\nA: To change your shipping address, go to your account settings, click on 'Edit Address', and update your details. Note that changes can only be made before the order is shipped.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: Can I cancel my order?\\nA: Yes, you can cancel your order within one hour of placing it. To cancel, go to the 'Orders' section in your account and select 'Cancel Order'.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: How secure is my personal information?\\nA: We use industry-standard encryption and security measures to protect your personal information. Refer to our Privacy Policy for more details.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: What payment methods are accepted?\\nA: We accept Visa, MasterCard, American Express, PayPal, Apple Pay, and Google Pay.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: How do I apply a promo code?\\nA: During checkout, enter your promo code in the 'Promo Code' field and click 'Apply' to receive your discount.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: Who do I contact for support?\\nA: For any inquiries or assistance, contact our support team at support@yourcompany.com or call us at (123) 456-7890.\"\n", - " },\n", - "\n", - " # === Account Verification ===\n", - " {\n", - " \"content\": \"To verify your account, check your email for a verification link after signing up. Click on the link to activate your account.\"\n", - " },\n", - " {\n", - " \"content\": \"If you did not receive a verification email, please check your spam folder or resend the verification email from your account settings.\"\n", - " },\n", - " {\n", - " \"content\": \"Account verification is required to access all features of our platform, including order placement and account management.\"\n", - " },\n", - " {\n", - " \"content\": \"For any issues with account verification, contact our support team at support@yourcompany.com.\"\n", - " },\n", - "\n", - " # === Contact Information ===\n", - " {\n", - " \"content\": \"You can reach our support team via email at support@yourcompany.com or call us at (123) 456-7890 from Monday to Friday, 9 AM to 5 PM EST.\"\n", - " },\n", - " {\n", - " \"content\": \"Follow us on social media for the latest updates, promotions, and customer support: Facebook, Twitter, Instagram.\"\n", - " },\n", - " {\n", - " \"content\": \"Visit our 'Contact Us' page on our website for a direct contact form and additional support options.\"\n", - " },\n", - " {\n", - " \"content\": \"Our headquarters are located at 1234 Business Ave, Suite 100, City, State, ZIP Code.\"\n", - " },\n", - "\n", - " # === Order Confirmation ===\n", - " {\n", - " \"content\": \"After placing an order, you'll receive a confirmation email with your order details and estimated delivery date.\"\n", - " },\n", - " {\n", - " \"content\": \"If you haven't received your order confirmation, please check your spam folder or contact our support team for assistance.\"\n", - " },\n", - " {\n", - " \"content\": \"The order confirmation email includes a summary of your purchase, billing information, and contact details for support.\"\n", - " },\n", - " {\n", - " \"content\": \"For large orders or bulk purchases, you'll receive a separate confirmation email with additional details.\"\n", - " },\n", - "\n", - " # === Shipping Options ===\n", - " {\n", - " \"content\": \"We offer three shipping options: Standard, Express, and Overnight. Choose the one that best fits your needs during checkout.\"\n", - " },\n", - " {\n", - " \"content\": \"Standard shipping is free for orders over $75. It typically takes 5-7 business days for delivery.\"\n", - " },\n", - " {\n", - " \"content\": \"Express shipping costs are calculated based on the weight and destination of your order. Estimated delivery is 2-3 business days.\"\n", - " },\n", - " {\n", - " \"content\": \"Overnight shipping is available for urgent orders. Placing your order before 10 AM ensures next-day delivery.\"\n", - " },\n", - "\n", - " # === Custom Order Handling ===\n", - " {\n", - " \"content\": \"For custom orders, please contact our support team at support@yourcompany.com with detailed specifications and requirements.\"\n", - " },\n", - " {\n", - " \"content\": \"Custom orders may require additional processing time. Delivery dates will be provided after discussing your specific needs.\"\n", - " },\n", - " {\n", - " \"content\": \"Pricing for custom orders varies based on the complexity and materials involved. Our support team will provide a quote upon request.\"\n", - " },\n", - " {\n", - " \"content\": \"Please review all custom order details carefully before finalizing to ensure accuracy and satisfaction.\"\n", - " },\n", - " # === FAQs Specific ===\n", - " {\n", - " \"content\": \"Q: How do I track my order?\\nA: You can track your order by logging into your account, going to the 'Orders' section, and clicking on 'Track Order' next to your recent purchase.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: What is your return policy?\\nA: We accept returns within 30 days of purchase. Items must be in their original condition and packaging. To initiate a return, visit the 'Orders' section in your account and select 'Return Item'.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: Do you offer international shipping?\\nA: Yes, we ship to select countries internationally. Shipping fees and delivery times vary based on your location. Please refer to the 'Shipping Information' page for more details.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: How can I contact customer support?\\nA: You can contact our support team via email at support@yourcompany.com or call us at (123) 456-7890 during our support hours, Monday to Friday, 9 AM to 5 PM EST.\"\n", - " },\n", - " {\n", - " \"content\": \"Q: What payment methods do you accept?\\nA: We accept Visa, MasterCard, American Express, PayPal, Apple Pay, and Google Pay.\"\n", - " },\n", - " # ... Add more FAQs as needed\n", - "]\n" - ], - "metadata": { - "id": "xEJy6e3t6iD1" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "source": [ - "Let's set up a robust knowledge base for an advanced customer support chatbot by leveraging LanceDB for efficient data storage and retrieval, and a Sentence Transformer model for generating vector embeddings.\n", - "\n", - "We will be using pre-trained Sentence Transformer model (BAAI/bge-small-en-v1.5) to convert textual support data into vector embeddings. You can change it accordingly.\n", - "\n", - "Then we will create a table - \"knowledge_base\" in the DB and insert all the customer support information in this table." - ], - "metadata": { - "id": "f3mhGwnfLN4H" - } - }, - { - "cell_type": "code", - "source": [ - "# Import necessary libraries\n", - "import os\n", - "import subprocess\n", - "import time\n", - "import threading\n", - "import lancedb\n", - "import requests\n", - "import json\n", - "from lancedb.pydantic import LanceModel, Vector\n", - "from lancedb.embeddings import get_registry" - ], - "metadata": { - "id": "TDf3zfjS6iGm" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "# Initialize LanceDB\n", - "db = lancedb.connect(\"./content/lancedb\") # Local storage within Colab\n", - "\n", - "# Initialize the language model for generating embeddings\n", - "model = get_registry().get(\"sentence-transformers\").create(name=\"BAAI/bge-small-en-v1.5\", device=\"cpu\")\n", - "\n", - "# Create table from schema\n", - "class Documents(LanceModel):\n", - " vector: Vector(model.ndims()) = model.VectorField()\n", - " content: str = model.SourceField() # Field to store the actual content/response\n", - "\n", - "\n", - "# Knowledge data from the data above\n", - "knowledge_data = company_support_data\n", - "\n", - "# Define table name\n", - "table_name = \"knowledge_base\"\n", - "\n", - "# Retrieve existing table names\n", - "existing_tables = db.table_names()\n", - "\n", - "if table_name not in existing_tables:\n", - " # Create a new table with the schema and insert data\n", - " tbl = db.create_table(table_name, schema=Documents)\n", - " tbl.add(knowledge_data)\n", - " print(f\"Created new table '{table_name}' and inserted data.\")\n", - "else:\n", - " # Append data to the existing table\n", - " table = db.open_table(table_name)\n", - " table.add(knowledge_data, mode=\"overwrite\")\n", - " print(f\"Overwrited data to the existing table '{table_name}'.\")\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "kS6SZJYx6iJZ", - "outputId": "8f7b818f-0b15-4c2a-c9a3-2e4b3caa16f1" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwrited data to the existing table 'knowledge_base'.\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Configure Rasa Files" - ], - "metadata": { - "id": "SSc4gFYRWBjl" - } - }, - { - "cell_type": "markdown", - "source": [ - "#### domain.yml\n", - "The domain.yml file serves as the core configuration for your Rasa chatbot. It defines the chatbot’s intents, entities, slots, responses, actions, forms, and policies.\n", - "\n", - "It has the following Components -\n", - "1. **Intents**: Represents the purposes behind user messages (e.g., greetings, inquiries about account deletion).\n", - "2. **Entities**: Specific pieces of information extracted from user inputs (e.g., project names, dates).\n", - "3. **Responses**: Predefined messages the chatbot can send back to users.\n", - "4. **Actions**: Lists both built-in and custom actions that the chatbot can perform.\n", - "5. **Slots**: Variables that store information during a conversation to maintain context and continuity.\n", - "\n" - ], - "metadata": { - "id": "EzRfZD3mB6HI" - } - }, - { - "cell_type": "code", - "source": [ - "%%writefile domain.yml\n", - "version: \"3.0\"\n", - "\n", - "language: \"en\"\n", - "\n", - "intents:\n", - " - greet\n", - " - ask_knowledge\n", - " - goodbye\n", - "\n", - "entities:\n", - " - project\n", - " - service\n", - "\n", - "responses:\n", - " utter_greet:\n", - " - text: \"Hello! How can I assist you today?\"\n", - "\n", - " utter_goodbye:\n", - " - text: \"Goodbye! Have a great day!\"\n", - " - text: \"Bye! Let me know if you need anything else.\"\n", - " - text: \"See you later! Feel free to reach out anytime.\"\n", - "\n", - "actions:\n", - " - action_search_knowledge\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "jYTu3y516iOr", - "outputId": "155f888a-5fe1-486f-ca27-b53dd5ff93be" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwriting domain.yml\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "#### endpoints.yml\n", - "The endpoints.yml file configures various backend services that Rasa interacts with, such as action servers, event brokers, and tracker stores. It specifies the URLs and connection details for services like the custom action server (actions.py), enabling Rasa to communicate with it.\n", - "\n", - "Following configurations are relevant to this use-case -\n", - "1. **Action Endpoint**: Defines where Rasa should send requests to execute custom actions, typically pointing to the server running actions.py.\n", - "2. **Tracker Store**: (Optional) Configures how conversation states are stored, which can be essential for maintaining session information in more complex setups." - ], - "metadata": { - "id": "iLNhVSayCnc3" - } - }, - { - "cell_type": "code", - "source": [ - "%%writefile endpoints.yml\n", - "# This file contains the different endpoints your bot can use.\n", - "\n", - "# Server where the models are pulled from.\n", - "# https://rasa.com/docs/rasa/model-storage#fetching-models-from-a-server\n", - "\n", - "#models:\n", - "# url: http://my-server.com/models/default_core@latest\n", - "# wait_time_between_pulls: 10 # [optional](default: 100)\n", - "\n", - "# Server which runs your custom actions.\n", - "# https://rasa.com/docs/rasa/custom-actions\n", - "\n", - "action_endpoint:\n", - " url: \"http://localhost:5055/webhook\"\n", - "\n", - "# Tracker store which is used to store the conversations.\n", - "# By default the conversations are stored in memory.\n", - "# https://rasa.com/docs/rasa/tracker-stores\n", - "\n", - "#tracker_store:\n", - "# type: redis\n", - "# url: \n", - "# port: \n", - "# db: \n", - "# password: \n", - "# use_ssl: \n", - "\n", - "#tracker_store:\n", - "# type: mongod\n", - "# url: \n", - "# db: \n", - "# username: \n", - "# password: \n", - "\n", - "# Event broker which all conversation events should be streamed to.\n", - "# https://rasa.com/docs/rasa/event-brokers\n", - "\n", - "#event_broker:\n", - "# url: localhost\n", - "# username: username\n", - "# password: password\n", - "# queue: queue\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "9SHFq76-6iRm", - "outputId": "d1fd82c5-01ad-46f2-9ec9-2e5265e00941" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwriting endpoints.yml\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "#### stories.yml\n", - "\n", - "The stories.yml file contains training stories that define example conversational paths your chatbot can take. These stories help Rasa understand how to manage dialogues by learning from example interactions. It provides Rasa with sequences of user intents and corresponding actions, teaching it how to handle various conversational scenarios.\n" - ], - "metadata": { - "id": "Hi8C-oeNC85g" - } - }, - { - "cell_type": "code", - "source": [ - "%%writefile data/stories.yml\n", - "version: \"3.0\"\n", - "\n", - "stories:\n", - " - story: Greet and ask question\n", - " steps:\n", - " - intent: greet\n", - " - action: utter_greet\n", - " - intent: ask_knowledge\n", - " - action: action_search_knowledge\n", - "\n", - " - story: ask question\n", - " steps:\n", - " - intent: ask_knowledge\n", - " - action: action_search_knowledge\n", - "\n", - " - story: Goodbye\n", - " steps:\n", - " - intent: goodbye\n", - " - action: utter_goodbye\n", - "\n", - " - story: greet and goodbye\n", - " steps:\n", - " - intent: greet\n", - " - action: utter_greet\n", - " - intent: goodbye\n", - " - action: utter_goodbye" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UUqa6mZb_x0m", - "outputId": "590517d2-0412-405d-dd54-81d0976824cd" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwriting data/stories.yml\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "#### rules.yml\n", - "The rules.yml file defines rule-based conversations that specify exact steps the chatbot should follow in certain situations. Unlike stories, rules are strict paths that Rasa should follow without deviation. It ensures that in specific scenarios, the chatbot responds in a predefined manner, which is crucial for critical interactions like error handling or confirmations.\n", - "\n", - "It works alongside stories to cover fixed conversational flows that require precise control, such as security checks or policy confirmations." - ], - "metadata": { - "id": "RdGtoB13DlMe" - } - }, - { - "cell_type": "code", - "source": [ - "%%writefile data/rules.yml\n", - "version: \"3.0\"\n", - "\n", - "rules:\n", - " - rule: Greet\n", - " steps:\n", - " - intent: greet\n", - " - action: utter_greet\n", - "\n", - " - rule: Goodbye\n", - " steps:\n", - " - intent: goodbye\n", - " - action: utter_goodbye\n", - "\n", - " - rule: Answer Knowledge Questions\n", - " steps:\n", - " - intent: ask_knowledge\n", - " - action: action_search_knowledge\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "pI0yN537_x3K", - "outputId": "8b7bfec5-3522-4e41-98cd-4e4cb385f5b2" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwriting data/rules.yml\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "#### nlu.yml\n", - "The nlu.yml file contains Natural Language Understanding (NLU) training data. It includes examples of user inputs categorized by intents and annotated with entities to train Rasa’s NLU component.\n", - "\n", - "It has the following role in the project -\n", - "1. **Intent Recognition**: Enables Rasa to accurately identify the user’s intent from their message.\n", - "2. **Entity Extraction**: Allows Rasa to extract specific information from user inputs, which can be used in custom actions to query LanceDB or generate responses via OpenAI’s LLM.\n", - "3. **Training Data Quality**: The quality and diversity of examples in nlu.yml directly impact the chatbot’s ability to understand and respond appropriately to a wide range of user queries." - ], - "metadata": { - "id": "CV1KKmQyD9VB" - } - }, - { - "cell_type": "code", - "source": [ - "%%writefile data/nlu.yml\n", - "# data/nlu.yml\n", - "\n", - "version: \"3.0\"\n", - "\n", - "nlu:\n", - " - intent: greet\n", - " examples: |\n", - " - hello\n", - " - hi\n", - " - hey\n", - " - good morning\n", - " - good evening\n", - " - greetings\n", - "\n", - " - intent: goodbye\n", - " examples: |\n", - " - bye\n", - " - goodbye\n", - " - see you later\n", - " - catch you later\n", - " - see ya\n", - " - take care\n", - "\n", - " - intent: ask_knowledge\n", - " examples: |\n", - " - I need help with my account\n", - " - Can you assist me with billing?\n", - " - How do I reset my password?\n", - " - I'm facing issues with my order\n", - " - Tell me about your support services\n", - " - How can I contact customer service?\n", - " - What are your support hours?\n", - " - I have a question about Project Alpha\n", - " - Help me understand Project Beta\n", - " - How can I track my purchase?\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "yhAW5Eh1_x5-", - "outputId": "b474f20e-9f7e-4dd6-ff2c-1ba12542ceb9" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwriting data/nlu.yml\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "#### config.yml\n", - "\n", - "The config.yml file defines the pipeline and policies used by Rasa for processing natural language inputs and managing dialogue workflows.\n", - "\n", - "Let us understand the components of the config.yml file\n", - "1. **NLP Pipeline**: Defines the components that handle preprocessing, feature extraction, intent classification, and entity recognition.\n", - "2. **Policies**: Dictate how the chatbot selects actions based on the current dialogue state, using rules, machine learning models, or a combination of both.\n", - "3. **Endpoint Integrations**: Can include configurations for connecting to external services or APIs, enhancing the chatbot’s capabilities.\n", - "\n", - "It specifies the sequence of components (like tokenizers, featurizers, classifiers) that process user inputs for intent recognition and entity extraction. Then it determines how Rasa decides the next action based on the conversation state and the trained model." - ], - "metadata": { - "id": "IDYPv5bPEnhb" - } - }, - { - "cell_type": "code", - "source": [ - "%%writefile config.yml\n", - "# config.yml\n", - "version: \"3.0\"\n", - "\n", - "language: \"en\"\n", - "\n", - "pipeline:\n", - "- name: WhitespaceTokenizer\n", - "- name: RegexFeaturizer\n", - "- name: LexicalSyntacticFeaturizer\n", - "- name: CountVectorsFeaturizer\n", - "- name: CountVectorsFeaturizer\n", - " analyzer: char_wb\n", - " min_ngram: 1\n", - " max_ngram: 4\n", - "- name: DIETClassifier\n", - " epochs: 100\n", - "- name: EntitySynonymMapper\n", - "- name: ResponseSelector\n", - " epochs: 100\n", - "\n", - "policies:\n", - "- name: RulePolicy\n", - "- name: UnexpecTEDIntentPolicy\n", - " max_history: 5\n", - " epochs: 100\n", - "- name: TEDPolicy\n", - " max_history: 5\n", - " epochs: 100\n", - "assistant_id: 20241227-151505-young-attachment" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "KDYvRppw_x8s", - "outputId": "9ddd9edb-f49e-4456-d8bd-379c18c6e0fb" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwriting config.yml\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Implement Custom Actions (actions.py) file\n", - "\n", - "The actions.py file is where you define custom actions for your Rasa chatbot. Custom actions are Python functions that can execute arbitrary logic, such as querying a database, calling external APIs, processing data, or integrating with services like LanceDB and OpenAI’s LLMs.\n", - "\n", - "We will be doing the following in this file -\n", - "1. **Get user input**: Processes user inputs and proceed to next step.\n", - "2. **Interacting with Databases**: Queries LanceDB to retrieve relevant information based on user queries.\n", - "3.**Enhancing Responses with LLMs**: Utilizes OpenAI’s API to generate detailed, contextually relevant responses that go beyond predefined replies." - ], - "metadata": { - "id": "48EX9SfzBQ_M" - } - }, - { - "cell_type": "code", - "source": [ - "%%writefile actions/actions.py\n", - "from typing import Any, Text, Dict, List\n", - "from rasa_sdk import Action, Tracker\n", - "from rasa_sdk.executor import CollectingDispatcher\n", - "import lancedb\n", - "import logging\n", - "from google.colab import userdata\n", - "import openai\n", - "import os\n", - "from dotenv import load_dotenv\n", - "\n", - "# Load environment variables from .env\n", - "load_dotenv()\n", - "\n", - "# Configure logging\n", - "logger = logging.getLogger(__name__)\n", - "logging.basicConfig(level=logging.INFO)\n", - "\n", - "class ActionSearchKnowledge(Action):\n", - " def name(self) -> Text:\n", - " return \"action_search_knowledge\"\n", - "\n", - " def __init__(self):\n", - "\n", - " # Initialize OpenAI API key from environment variables\n", - " self.openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", - " if not self.openai_api_key:\n", - " logger.error(\"OpenAI API key not found. Please set OPENAI_API_KEY in your environment.\")\n", - " openai.api_key = self.openai_api_key\n", - "\n", - " # Initialize LanceDB connection once\n", - " try:\n", - " self.db = lancedb.connect(\"./content/lancedb\")\n", - " self.table_name = \"knowledge_base\"\n", - " if self.table_name not in self.db.table_names():\n", - " logger.error(f\"Table '{self.table_name}' does not exist in LanceDB.\")\n", - " self.table = None\n", - " else:\n", - " self.table = self.db.open_table(self.table_name)\n", - " logger.info(f\"Connected to table '{self.table_name}' in LanceDB.\")\n", - " except Exception as e:\n", - " logger.error(f\"Error connecting to LanceDB: {e}\")\n", - " self.table = None\n", - "\n", - " def run(self, dispatcher: CollectingDispatcher,\n", - " tracker: Tracker,\n", - " domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:\n", - "\n", - "\n", - " # Get the latest user message\n", - " user_message = tracker.latest_message.get('text')\n", - " logger.info(f\"User message: {user_message}\")\n", - "\n", - " if not user_message:\n", - " dispatcher.utter_message(text=\"Sorry, I didn't catch that. Could you please repeat?\")\n", - " return []\n", - "\n", - " try:\n", - " # Perform similarity search in LanceDB\n", - " query_result = self.table.search(user_message).limit(1).to_pandas()\n", - "\n", - " # Filter results based on the _distance parameter (smaller _distance means more similar)\n", - " relevant_content = [query_result.loc[0, \"content\"] if query_result.loc[0, \"_distance\"] < 0.65 else None][0]\n", - " response_text = \"Null\"\n", - "\n", - " # If we find relevant content , sent it to LLM or Else send automated reply\n", - " if not relevant_content == None:\n", - " logger.info(f\"Retrieved answer from knowledge base.\")\n", - " # Use OpenAI to generate a more refined response\n", - " response_text = self.generate_response(user_message, relevant_content)\n", - " else:\n", - " # If user has ask not a relevant question, reply with the following\n", - " response_text = \"I'm sorry, I don't have an answer to that question.\"\n", - " logger.info(f\"No matching content found in knowledge base.\")\n", - "\n", - " # Send the answer back to the user\n", - " dispatcher.utter_message(text=response_text)\n", - "\n", - " except Exception as e:\n", - " logger.error(f\"Error during search operation: {e}\")\n", - " dispatcher.utter_message(text=\"Sorry, something went wrong while processing your request.\")\n", - "\n", - " return []\n", - "\n", - " def generate_response(self, user_message: Text, relevant_content: Text) -> Text:\n", - " \"\"\"\n", - " Use OpenAI's API to generate a refined response based on user message and relevant content.\n", - " \"\"\"\n", - " try:\n", - " system_prompt = \"You are an company support assistant that provides helpful and accurate answers based on the provided information. You talk professionally and like a customer support executive.\"\n", - "\n", - " prompt = (\n", - " f\"User Question: {user_message}\\n\"\n", - " f\"Relevant Information: {relevant_content}\\n\\n\"\n", - " f\"Provide a detailed and helpful response to the user's question based on the relevant information above.\"\n", - " )\n", - "\n", - " response = openai.ChatCompletion.create(\n", - " model=\"gpt-4o-mini\",\n", - " messages=[\n", - " {\"role\": \"system\", \"content\": system_prompt},\n", - " {\"role\": \"user\", \"content\": prompt}\n", - " ],\n", - " max_tokens=450,\n", - " temperature=0.7,\n", - " )\n", - "\n", - " generated_text = response.choices[0].message['content'].strip()\n", - " logger.info(\"Generated response using OpenAI API.\")\n", - " return generated_text\n", - "\n", - " except Exception as e:\n", - " logger.error(f\"Error generating response with OpenAI API: {e}\")\n", - " return relevant_content # Fallback to relevant content if OpenAI fails" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "eenvEl8y6iL-", - "outputId": "f934500b-b4a9-4bd7-e0c8-743fbaf1f3d7" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Overwriting actions/actions.py\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "### Training RASA Model\n", - "The rasa train command is used to train the Rasa model based on the data and configurations defined in your project files (nlu.yml, stories.yml, rules.yml, etc.)\n", - "\n", - "It compiles the training data into a machine learning model that Rasa uses to interpret user inputs and manage conversations. The training process optimizes the model’s ability to accurately recognize intents, extract entities, and decide on appropriate actions, which is crucial for integrating seamless interactions with LanceDB and generating responses via LLMs." - ], - "metadata": { - "id": "NZyBVLzLFY0Z" - } + { + "cell_type": "markdown", + "source": [ + "# Quick overview of RASA x LanceDB Integration Workflow\n", + "\n", + "**Step 1 : Define knowledge_data and store it in LanceDB:**\n", + "* Use LanceDB to store and manage structured knowledge data relevant to customer support.\n", + "\n", + "**Step 2 : Configure Rasa Files:**\n", + "- nlu.yml: Train intent recognition and entity extraction.\n", + "- stories.yml & rules.yml: Define conversational flows and rules.\n", + "- domain.yml: Specify intents, entities, actions, and responses.\n", + "- config.yml: Set up the NLP pipeline and policies.\n", + "\n", + "**Step 3 : Implement Custom Actions (actions.py):**\n", + "- Create actions that query LanceDB for relevant information based on user intents.\n", + "- Integrate OpenAI’s LLM to generate nuanced and context-aware responses.\n", + "\n", + "**Step 4 : Train the Model (rasa train):**\n", + "- Compile and optimize the Rasa model based on the latest configurations and training data.\n", + "\n", + "**Step 5 : Run Servers:**\n", + "- Action Server: Executes custom actions.\n", + "- Rasa Server: Handles user interactions and manages dialogue using the trained model.\n", + "\n", + "**Step 6 : Deploy and Test:**\n", + "- Interact with the chatbot to ensure it accurately understands queries, retrieves information from LanceDB, and generates appropriate responses via OpenAI’s LLM." + ], + "metadata": { + "id": "IFuOxNwrGbd4" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Creating and storing the Knowledge base of Company Support in a Lance Table" + ], + "metadata": { + "id": "vC1Ce1j_NxwF" + } + }, + { + "cell_type": "markdown", + "source": [ + "Let's create a dataset containing customer support information. We will use this data to populate LanceDB table.\n", + "\n", + "It contains a list of dictionaries where each dictionary represents a piece of knowledge or an FAQ entry. Each entry has a \"content\" key corresponding to the support information." + ], + "metadata": { + "id": "oriMmk0BNBG1" + } + }, + { + "cell_type": "code", + "source": [ + "# This dataset covers various categories such as Account Management, Billing & Payments, Technical Support, Shipping & Delivery, Returns & Refunds, Product Information, and Company Policies.\n", + "\n", + "company_support_data = [\n", + " # === Account Management ===\n", + " {\n", + " \"content\": \"To reset your password, navigate to the login page and click on 'Forgot Password'. You'll receive an email with instructions to create a new password.\"\n", + " },\n", + " {\n", + " \"content\": \"To update your account information, log in to your profile and click on 'Edit Profile'. From there, you can change your email, phone number, and other personal details.\"\n", + " },\n", + " {\n", + " \"content\": \"If you want to delete your account, please contact our support team at support@yourcompany.com with the subject line 'Account Deletion Request'. We'll assist you promptly.\"\n", + " },\n", + " {\n", + " \"content\": \"To change your username, go to your account settings, select 'Username', and enter your desired new username. If it's available, the change will be applied immediately.\"\n", + " },\n", + " # === Billing & Payments ===\n", + " {\n", + " \"content\": \"We accept various payment methods including Visa, MasterCard, American Express, PayPal, and Apple Pay.\"\n", + " },\n", + " {\n", + " \"content\": \"To view your billing history, log in to your account, navigate to the 'Billing' section, and select 'Billing History'. You'll see a list of all your past transactions.\"\n", + " },\n", + " {\n", + " \"content\": \"If you believe you've been incorrectly charged, please contact our billing support at billing@yourcompany.com with your order ID and details of the discrepancy.\"\n", + " },\n", + " {\n", + " \"content\": \"Your subscription will automatically renew on the renewal date unless you cancel it at least 24 hours before the renewal date.\"\n", + " },\n", + " # === Technical Support ===\n", + " {\n", + " \"content\": \"If you encounter issues while using our platform, try clearing your browser cache and cookies, then restart your browser.\"\n", + " },\n", + " {\n", + " \"content\": \"For mobile app issues, ensure that you have the latest version of the app installed. You can update the app from the App Store or Google Play Store.\"\n", + " },\n", + " {\n", + " \"content\": \"To report a bug, please email us at support@yourcompany.com with a detailed description of the issue and any relevant screenshots.\"\n", + " },\n", + " {\n", + " \"content\": \"Our platform requires JavaScript to be enabled. Please check your browser settings to ensure JavaScript is turned on.\"\n", + " },\n", + " # === Shipping & Delivery ===\n", + " {\n", + " \"content\": \"We offer free standard shipping on all orders over $50. Standard shipping typically takes 5-7 business days.\"\n", + " },\n", + " {\n", + " \"content\": \"Express shipping options are available at an additional cost. Delivery usually takes 1-3 business days.\"\n", + " },\n", + " {\n", + " \"content\": \"You can track your order status by logging into your account and visiting the 'Orders' section. Click on your order to see detailed tracking information.\"\n", + " },\n", + " {\n", + " \"content\": \"If your package is delayed, please contact our shipping department at shipping@yourcompany.com with your order number for assistance.\"\n", + " },\n", + " # === Returns & Refunds ===\n", + " {\n", + " \"content\": \"You can return most items within 30 days of purchase. Items must be in original condition and packaging.\"\n", + " },\n", + " {\n", + " \"content\": \"To initiate a return, log in to your account, go to the 'Orders' section, select the order you wish to return, and click on 'Start Return'. Follow the on-screen instructions.\"\n", + " },\n", + " {\n", + " \"content\": \"Refunds are processed within 7-10 business days after we receive your returned item. The refund will be issued to your original payment method.\"\n", + " },\n", + " {\n", + " \"content\": \"Please note that shipping costs are non-refundable unless the return is due to a defect or error on our part.\"\n", + " },\n", + " # === Order Management ===\n", + " {\n", + " \"content\": \"To modify your order after placing it, please contact our support team at support@yourcompany.com as soon as possible. Changes can only be made within one hour of order placement.\"\n", + " },\n", + " {\n", + " \"content\": \"If you received the wrong item, please contact us immediately with your order number and a photo of the received product. We'll arrange for a replacement or refund.\"\n", + " },\n", + " {\n", + " \"content\": \"To cancel your order, log in to your account, go to the 'Orders' section, select the order you wish to cancel, and click on 'Cancel Order'.\"\n", + " },\n", + " {\n", + " \"content\": \"Parts of your order may ship separately. You will receive separate tracking information for each shipment.\"\n", + " },\n", + " # === Account Security ===\n", + " {\n", + " \"content\": \"For enhanced security, enable two-factor authentication (2FA) in your account settings. This adds an extra layer of protection to your account.\"\n", + " },\n", + " {\n", + " \"content\": \"If you suspect unauthorized access to your account, change your password immediately and contact our support team.\"\n", + " },\n", + " {\n", + " \"content\": \"Never share your account credentials with anyone. We will never ask for your password via email or phone.\"\n", + " },\n", + " {\n", + " \"content\": \"Regularly review your account activity to ensure all actions are authorized. You can view your recent activity in the 'Account Activity' section.\"\n", + " },\n", + " # === Shipping Issues ===\n", + " {\n", + " \"content\": \"If your package hasn't arrived within the estimated delivery time, please use the tracking number in your order details to check its status, or contact our shipping support at shipping@yourcompany.com.\"\n", + " },\n", + " {\n", + " \"content\": \"For international shipments, please note that customs processing times may affect delivery dates. We are not responsible for delays caused by customs.\"\n", + " },\n", + " {\n", + " \"content\": \"Ensure that your shipping address is correct before placing an order. Incorrect addresses may lead to delivery delays or package returns.\"\n", + " },\n", + " {\n", + " \"content\": \"We offer shipment insurance for an additional fee. This covers lost or damaged packages during transit. To add insurance, select the option during checkout.\"\n", + " },\n", + " # === Product Availability ===\n", + " {\n", + " \"content\": \"If a product is out of stock, you can sign up for restock notifications on the product page. We'll notify you via email once it's available again.\"\n", + " },\n", + " {\n", + " \"content\": \"Limited edition items are subject to availability. We recommend placing your orders early to secure these exclusive products.\"\n", + " },\n", + " {\n", + " \"content\": \"Pre-orders are accepted for upcoming releases. Pre-order now to ensure you receive the product as soon as it ships.\"\n", + " },\n", + " {\n", + " \"content\": \"Sold-out items cannot be purchased until new stock arrives. Please check back regularly or join our waitlist.\"\n", + " },\n", + " # === Shipping Costs ===\n", + " {\n", + " \"content\": \"Shipping costs vary based on your location and the size of your order. You can view the estimated shipping cost during the checkout process.\"\n", + " },\n", + " {\n", + " \"content\": \"We offer free standard shipping on orders over $75. Orders below this threshold will incur standard shipping fees.\"\n", + " },\n", + " {\n", + " \"content\": \"Express shipping is available for an additional fee. If you need your order faster, select the express option at checkout.\"\n", + " },\n", + " {\n", + " \"content\": \"Please note that international shipping fees will apply for orders shipped outside the United States.\"\n", + " },\n", + " # === Subscription Services ===\n", + " {\n", + " \"content\": \"Subscribe to our monthly box to receive exclusive products and discounts. You can manage your subscription in your account settings.\"\n", + " },\n", + " {\n", + " \"content\": \"Canceling your subscription can be done at any time. Please visit the 'Subscriptions' section in your account to make changes.\"\n", + " },\n", + " {\n", + " \"content\": \"Subscribers receive early access to new products and special promotions throughout the year.\"\n", + " },\n", + " {\n", + " \"content\": \"Pause your subscription if you need a break. You can resume it whenever you're ready by logging into your account.\"\n", + " },\n", + " # === Order Tracking ===\n", + " {\n", + " \"content\": \"To track your order, log in to your account, go to the 'Orders' section, and click on 'Track Order' next to the relevant order.\"\n", + " },\n", + " {\n", + " \"content\": \"You'll receive email notifications with tracking updates once your order has been shipped.\"\n", + " },\n", + " {\n", + " \"content\": \"If you haven't received your tracking information, please check your spam folder or contact our support team.\"\n", + " },\n", + " {\n", + " \"content\": \"For real-time tracking, use the tracking number provided and visit the carrier's website directly.\"\n", + " },\n", + " # === Account Reactivation ===\n", + " {\n", + " \"content\": \"If you've previously closed your account and wish to reactivate it, please contact our support team at support@yourcompany.com with your account details.\"\n", + " },\n", + " {\n", + " \"content\": \"Reactivating your account will restore your previous order history and saved preferences.\"\n", + " },\n", + " {\n", + " \"content\": \"Please note that some data may have been permanently deleted if the account was closed more than six months ago.\"\n", + " },\n", + " {\n", + " \"content\": \"Upon reactivation, you may need to update your password and verify your email address for security purposes.\"\n", + " },\n", + " # === Payment Issues ===\n", + " {\n", + " \"content\": \"If your payment is declined, please verify that your billing information is correct and that your card has sufficient funds.\"\n", + " },\n", + " {\n", + " \"content\": \"For any issues with payments made through PayPal, please ensure your account is in good standing and that your linked bank accounts are active.\"\n", + " },\n", + " {\n", + " \"content\": \"Expired or invalid credit cards cannot be used. Please update your payment method in your account settings.\"\n", + " },\n", + " {\n", + " \"content\": \"If you've been double-charged, contact our billing support at billing@yourcompany.com with your order ID and transaction details.\"\n", + " },\n", + " # === Warranty Information ===\n", + " {\n", + " \"content\": \"All our products come with a one-year warranty covering manufacturing defects and hardware malfunctions.\"\n", + " },\n", + " {\n", + " \"content\": \"To claim a warranty, contact our support team with your order number and a description of the issue.\"\n", + " },\n", + " {\n", + " \"content\": \"Warranty does not cover accidental damage, unauthorized repairs, or normal wear and tear.\"\n", + " },\n", + " {\n", + " \"content\": \"Extended warranty options are available at the time of purchase for an additional fee.\"\n", + " },\n", + " # === Privacy & Data Security ===\n", + " {\n", + " \"content\": \"We prioritize your privacy and ensure that your personal data is protected. Please refer to our Privacy Policy for detailed information.\"\n", + " },\n", + " {\n", + " \"content\": \"You have control over your personal information. Manage your privacy settings in the 'Account Settings' section.\"\n", + " },\n", + " {\n", + " \"content\": \"We do not share your personal information with third parties without your explicit consent, except as required by law.\"\n", + " },\n", + " {\n", + " \"content\": \"For any concerns about data security or privacy, contact our privacy team at privacy@yourcompany.com.\"\n", + " },\n", + " # === Shipping Restrictions ===\n", + " {\n", + " \"content\": \"We currently do not ship to P.O. Boxes or APO/FPO addresses. Please provide a valid residential or business address.\"\n", + " },\n", + " {\n", + " \"content\": \"Certain products may have shipping restrictions based on international regulations. Check the product page for any specific restrictions.\"\n", + " },\n", + " {\n", + " \"content\": \"Hazardous materials and perishable items have special shipping requirements. Ensure compliance by reviewing our shipping guidelines.\"\n", + " },\n", + " {\n", + " \"content\": \"Import duties and taxes are the responsibility of the recipient for international orders. These fees are not included in the shipping cost.\"\n", + " },\n", + " # === Frequently Asked Questions (FAQs) ===\n", + " {\n", + " \"content\": \"Q: How can I track my order?\\nA: You can track your order by logging into your account, navigating to the 'Orders' section, and clicking on 'Track Order' next to your order.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: What is your return policy?\\nA: We accept returns within 30 days of purchase. Items must be in original condition and packaging. To initiate a return, visit the 'Orders' section in your account.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: Do you offer international shipping?\\nA: Yes, we ship to select countries internationally. Shipping fees and delivery times may vary based on location.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: How do I change my shipping address?\\nA: To change your shipping address, go to your account settings, click on 'Edit Address', and update your details. Note that changes can only be made before the order is shipped.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: Can I cancel my order?\\nA: Yes, you can cancel your order within one hour of placing it. To cancel, go to the 'Orders' section in your account and select 'Cancel Order'.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: How secure is my personal information?\\nA: We use industry-standard encryption and security measures to protect your personal information. Refer to our Privacy Policy for more details.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: What payment methods are accepted?\\nA: We accept Visa, MasterCard, American Express, PayPal, Apple Pay, and Google Pay.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: How do I apply a promo code?\\nA: During checkout, enter your promo code in the 'Promo Code' field and click 'Apply' to receive your discount.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: Who do I contact for support?\\nA: For any inquiries or assistance, contact our support team at support@yourcompany.com or call us at (123) 456-7890.\"\n", + " },\n", + " # === Account Verification ===\n", + " {\n", + " \"content\": \"To verify your account, check your email for a verification link after signing up. Click on the link to activate your account.\"\n", + " },\n", + " {\n", + " \"content\": \"If you did not receive a verification email, please check your spam folder or resend the verification email from your account settings.\"\n", + " },\n", + " {\n", + " \"content\": \"Account verification is required to access all features of our platform, including order placement and account management.\"\n", + " },\n", + " {\n", + " \"content\": \"For any issues with account verification, contact our support team at support@yourcompany.com.\"\n", + " },\n", + " # === Contact Information ===\n", + " {\n", + " \"content\": \"You can reach our support team via email at support@yourcompany.com or call us at (123) 456-7890 from Monday to Friday, 9 AM to 5 PM EST.\"\n", + " },\n", + " {\n", + " \"content\": \"Follow us on social media for the latest updates, promotions, and customer support: Facebook, Twitter, Instagram.\"\n", + " },\n", + " {\n", + " \"content\": \"Visit our 'Contact Us' page on our website for a direct contact form and additional support options.\"\n", + " },\n", + " {\n", + " \"content\": \"Our headquarters are located at 1234 Business Ave, Suite 100, City, State, ZIP Code.\"\n", + " },\n", + " # === Order Confirmation ===\n", + " {\n", + " \"content\": \"After placing an order, you'll receive a confirmation email with your order details and estimated delivery date.\"\n", + " },\n", + " {\n", + " \"content\": \"If you haven't received your order confirmation, please check your spam folder or contact our support team for assistance.\"\n", + " },\n", + " {\n", + " \"content\": \"The order confirmation email includes a summary of your purchase, billing information, and contact details for support.\"\n", + " },\n", + " {\n", + " \"content\": \"For large orders or bulk purchases, you'll receive a separate confirmation email with additional details.\"\n", + " },\n", + " # === Shipping Options ===\n", + " {\n", + " \"content\": \"We offer three shipping options: Standard, Express, and Overnight. Choose the one that best fits your needs during checkout.\"\n", + " },\n", + " {\n", + " \"content\": \"Standard shipping is free for orders over $75. It typically takes 5-7 business days for delivery.\"\n", + " },\n", + " {\n", + " \"content\": \"Express shipping costs are calculated based on the weight and destination of your order. Estimated delivery is 2-3 business days.\"\n", + " },\n", + " {\n", + " \"content\": \"Overnight shipping is available for urgent orders. Placing your order before 10 AM ensures next-day delivery.\"\n", + " },\n", + " # === Custom Order Handling ===\n", + " {\n", + " \"content\": \"For custom orders, please contact our support team at support@yourcompany.com with detailed specifications and requirements.\"\n", + " },\n", + " {\n", + " \"content\": \"Custom orders may require additional processing time. Delivery dates will be provided after discussing your specific needs.\"\n", + " },\n", + " {\n", + " \"content\": \"Pricing for custom orders varies based on the complexity and materials involved. Our support team will provide a quote upon request.\"\n", + " },\n", + " {\n", + " \"content\": \"Please review all custom order details carefully before finalizing to ensure accuracy and satisfaction.\"\n", + " },\n", + " # === FAQs Specific ===\n", + " {\n", + " \"content\": \"Q: How do I track my order?\\nA: You can track your order by logging into your account, going to the 'Orders' section, and clicking on 'Track Order' next to your recent purchase.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: What is your return policy?\\nA: We accept returns within 30 days of purchase. Items must be in their original condition and packaging. To initiate a return, visit the 'Orders' section in your account and select 'Return Item'.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: Do you offer international shipping?\\nA: Yes, we ship to select countries internationally. Shipping fees and delivery times vary based on your location. Please refer to the 'Shipping Information' page for more details.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: How can I contact customer support?\\nA: You can contact our support team via email at support@yourcompany.com or call us at (123) 456-7890 during our support hours, Monday to Friday, 9 AM to 5 PM EST.\"\n", + " },\n", + " {\n", + " \"content\": \"Q: What payment methods do you accept?\\nA: We accept Visa, MasterCard, American Express, PayPal, Apple Pay, and Google Pay.\"\n", + " },\n", + " # ... Add more FAQs as needed\n", + "]" + ], + "metadata": { + "id": "xEJy6e3t6iD1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Let's set up a robust knowledge base for an advanced customer support chatbot by leveraging LanceDB for efficient data storage and retrieval, and a Sentence Transformer model for generating vector embeddings.\n", + "\n", + "We will be using pre-trained Sentence Transformer model (BAAI/bge-small-en-v1.5) to convert textual support data into vector embeddings. You can change it accordingly.\n", + "\n", + "Then we will create a table - \"knowledge_base\" in the DB and insert all the customer support information in this table." + ], + "metadata": { + "id": "f3mhGwnfLN4H" + } + }, + { + "cell_type": "code", + "source": [ + "# Import necessary libraries\n", + "import os\n", + "import subprocess\n", + "import time\n", + "import threading\n", + "import lancedb\n", + "import requests\n", + "import json\n", + "from lancedb.pydantic import LanceModel, Vector\n", + "from lancedb.embeddings import get_registry" + ], + "metadata": { + "id": "TDf3zfjS6iGm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Initialize LanceDB\n", + "db = lancedb.connect(\"./content/lancedb\") # Local storage within Colab\n", + "\n", + "# Initialize the language model for generating embeddings\n", + "model = (\n", + " get_registry()\n", + " .get(\"sentence-transformers\")\n", + " .create(name=\"BAAI/bge-small-en-v1.5\", device=\"cpu\")\n", + ")\n", + "\n", + "\n", + "# Create table from schema\n", + "class Documents(LanceModel):\n", + " vector: Vector(model.ndims()) = model.VectorField()\n", + " content: str = model.SourceField() # Field to store the actual content/response\n", + "\n", + "\n", + "# Knowledge data from the data above\n", + "knowledge_data = company_support_data\n", + "\n", + "# Define table name\n", + "table_name = \"knowledge_base\"\n", + "\n", + "# Retrieve existing table names\n", + "existing_tables = db.table_names()\n", + "\n", + "if table_name not in existing_tables:\n", + " # Create a new table with the schema and insert data\n", + " tbl = db.create_table(table_name, schema=Documents)\n", + " tbl.add(knowledge_data)\n", + " print(f\"Created new table '{table_name}' and inserted data.\")\n", + "else:\n", + " # Append data to the existing table\n", + " table = db.open_table(table_name)\n", + " table.add(knowledge_data, mode=\"overwrite\")\n", + " print(f\"Overwrited data to the existing table '{table_name}'.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "kS6SZJYx6iJZ", + "outputId": "8f7b818f-0b15-4c2a-c9a3-2e4b3caa16f1" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Train the Rasa model\n", - "!rasa train" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "6FmKesSV_x_e", - "outputId": "2b87edf2-8a78-4c26-9845-688087eae709" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/usr/local/lib/python3.10/dist-packages/rasa/core/tracker_store.py:1044: MovedIn20Warning: \u001b[31mDeprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. \u001b[32mTo prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". \u001b[36mSet environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message.\u001b[0m (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", - " Base: DeclarativeMeta = declarative_base()\n", - "/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/validation.py:134: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html\n", - " import pkg_resources\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('ruamel')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.\n", - "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", - " declare_namespace(pkg)\n", - "2024-12-30 21:14:02 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.cli.train\u001b[0m - Started validating domain and training data...\n", - "/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/util.py:52: DeprecationWarning: jax.xla_computation is deprecated. Please use the AOT APIs.\n", - " from jax import xla_computation as _xla_computation\n", - "2024-12-30 21:14:07 \u001b[1;30mINFO \u001b[0m \u001b[34mnumexpr.utils\u001b[0m - NumExpr defaulting to 2 threads.\n", - "\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/domain.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/nlu.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/rules.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/stories.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Validating intents...\n", - "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Validating uniqueness of intents and stories...\n", - "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Validating utterances...\n", - "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Story structure validation...\n", - "Processed story blocks: 100% 4/4 [00:00<00:00, 1979.38it/s, # trackers=1]\n", - "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.core.training.story_conflict\u001b[0m - Considering all preceding turns for conflict analysis.\n", - "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - No story structure conflicts found.\n", - "\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/domain.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/rules.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/stories.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/nlu.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", - "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/engine/recipes/recipe.py:35: FutureWarning: From Rasa Open Source 4.0.0 onwards it will be required to specify a recipe in your model configuration. Defaulting to recipe 'default.v1'.\n", - " rasa.shared.utils.io.raise_deprecation_warning(\n", - "\u001b[0m2024-12-30 21:14:11 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'CountVectorsFeaturizer' from cache.\n", - "2024-12-30 21:14:11 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'CountVectorsFeaturizer' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'DIETClassifier' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'EntitySynonymMapper' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'LexicalSyntacticFeaturizer' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'RegexFeaturizer' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'ResponseSelector' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'RulePolicy' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'TEDPolicy' from cache.\n", - "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'UnexpecTEDIntentPolicy' from cache.\n", - "\u001b[92mYour Rasa model is trained and saved at 'models/20241230-211410-synchronic-diff.tar.gz'.\u001b[0m\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwrited data to the existing table 'knowledge_base'.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Configure Rasa Files" + ], + "metadata": { + "id": "SSc4gFYRWBjl" + } + }, + { + "cell_type": "markdown", + "source": [ + "#### domain.yml\n", + "The domain.yml file serves as the core configuration for your Rasa chatbot. It defines the chatbot’s intents, entities, slots, responses, actions, forms, and policies.\n", + "\n", + "It has the following Components -\n", + "1. **Intents**: Represents the purposes behind user messages (e.g., greetings, inquiries about account deletion).\n", + "2. **Entities**: Specific pieces of information extracted from user inputs (e.g., project names, dates).\n", + "3. **Responses**: Predefined messages the chatbot can send back to users.\n", + "4. **Actions**: Lists both built-in and custom actions that the chatbot can perform.\n", + "5. **Slots**: Variables that store information during a conversation to maintain context and continuity.\n", + "\n" + ], + "metadata": { + "id": "EzRfZD3mB6HI" + } + }, + { + "cell_type": "code", + "source": [ + "%%writefile domain.yml\n", + "version: \"3.0\"\n", + "\n", + "language: \"en\"\n", + "\n", + "intents:\n", + " - greet\n", + " - ask_knowledge\n", + " - goodbye\n", + "\n", + "entities:\n", + " - project\n", + " - service\n", + "\n", + "responses:\n", + " utter_greet:\n", + " - text: \"Hello! How can I assist you today?\"\n", + "\n", + " utter_goodbye:\n", + " - text: \"Goodbye! Have a great day!\"\n", + " - text: \"Bye! Let me know if you need anything else.\"\n", + " - text: \"See you later! Feel free to reach out anytime.\"\n", + "\n", + "actions:\n", + " - action_search_knowledge\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "jYTu3y516iOr", + "outputId": "155f888a-5fe1-486f-ca27-b53dd5ff93be" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "### Running Servers" - ], - "metadata": { - "id": "sQy3kIUMONDl" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwriting domain.yml\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### endpoints.yml\n", + "The endpoints.yml file configures various backend services that Rasa interacts with, such as action servers, event brokers, and tracker stores. It specifies the URLs and connection details for services like the custom action server (actions.py), enabling Rasa to communicate with it.\n", + "\n", + "Following configurations are relevant to this use-case -\n", + "1. **Action Endpoint**: Defines where Rasa should send requests to execute custom actions, typically pointing to the server running actions.py.\n", + "2. **Tracker Store**: (Optional) Configures how conversation states are stored, which can be essential for maintaining session information in more complex setups." + ], + "metadata": { + "id": "iLNhVSayCnc3" + } + }, + { + "cell_type": "code", + "source": [ + "%%writefile endpoints.yml\n", + "# This file contains the different endpoints your bot can use.\n", + "\n", + "# Server where the models are pulled from.\n", + "# https://rasa.com/docs/rasa/model-storage#fetching-models-from-a-server\n", + "\n", + "#models:\n", + "# url: http://my-server.com/models/default_core@latest\n", + "# wait_time_between_pulls: 10 # [optional](default: 100)\n", + "\n", + "# Server which runs your custom actions.\n", + "# https://rasa.com/docs/rasa/custom-actions\n", + "\n", + "action_endpoint:\n", + " url: \"http://localhost:5055/webhook\"\n", + "\n", + "# Tracker store which is used to store the conversations.\n", + "# By default the conversations are stored in memory.\n", + "# https://rasa.com/docs/rasa/tracker-stores\n", + "\n", + "#tracker_store:\n", + "# type: redis\n", + "# url: \n", + "# port: \n", + "# db: \n", + "# password: \n", + "# use_ssl: \n", + "\n", + "#tracker_store:\n", + "# type: mongod\n", + "# url: \n", + "# db: \n", + "# username: \n", + "# password: \n", + "\n", + "# Event broker which all conversation events should be streamed to.\n", + "# https://rasa.com/docs/rasa/event-brokers\n", + "\n", + "#event_broker:\n", + "# url: localhost\n", + "# username: username\n", + "# password: password\n", + "# queue: queue\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "9SHFq76-6iRm", + "outputId": "d1fd82c5-01ad-46f2-9ec9-2e5265e00941" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "The **Rasa Server** is the core component that handles user interactions. It processes incoming messages, interprets user intents, manages conversations, and generates appropriate responses based on the trained model and defined dialogue flows." - ], - "metadata": { - "id": "zVnktY3tODA6" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwriting endpoints.yml\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### stories.yml\n", + "\n", + "The stories.yml file contains training stories that define example conversational paths your chatbot can take. These stories help Rasa understand how to manage dialogues by learning from example interactions. It provides Rasa with sequences of user intents and corresponding actions, teaching it how to handle various conversational scenarios.\n" + ], + "metadata": { + "id": "Hi8C-oeNC85g" + } + }, + { + "cell_type": "code", + "source": [ + "%%writefile data/stories.yml\n", + "version: \"3.0\"\n", + "\n", + "stories:\n", + " - story: Greet and ask question\n", + " steps:\n", + " - intent: greet\n", + " - action: utter_greet\n", + " - intent: ask_knowledge\n", + " - action: action_search_knowledge\n", + "\n", + " - story: ask question\n", + " steps:\n", + " - intent: ask_knowledge\n", + " - action: action_search_knowledge\n", + "\n", + " - story: Goodbye\n", + " steps:\n", + " - intent: goodbye\n", + " - action: utter_goodbye\n", + "\n", + " - story: greet and goodbye\n", + " steps:\n", + " - intent: greet\n", + " - action: utter_greet\n", + " - intent: goodbye\n", + " - action: utter_goodbye" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "UUqa6mZb_x0m", + "outputId": "590517d2-0412-405d-dd54-81d0976824cd" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "import threading\n", - "import os\n", - "\n", - "def run_rasa_server():\n", - " os.system(\"rasa run\")\n", - "\n", - "threading.Thread(target=run_rasa_server).start()\n", - "# Wait for the Rasa server to start\n", - "time.sleep(35)" - ], - "metadata": { - "id": "NZq0FZgO_yCL" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwriting data/stories.yml\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### rules.yml\n", + "The rules.yml file defines rule-based conversations that specify exact steps the chatbot should follow in certain situations. Unlike stories, rules are strict paths that Rasa should follow without deviation. It ensures that in specific scenarios, the chatbot responds in a predefined manner, which is crucial for critical interactions like error handling or confirmations.\n", + "\n", + "It works alongside stories to cover fixed conversational flows that require precise control, such as security checks or policy confirmations." + ], + "metadata": { + "id": "RdGtoB13DlMe" + } + }, + { + "cell_type": "code", + "source": [ + "%%writefile data/rules.yml\n", + "version: \"3.0\"\n", + "\n", + "rules:\n", + " - rule: Greet\n", + " steps:\n", + " - intent: greet\n", + " - action: utter_greet\n", + "\n", + " - rule: Goodbye\n", + " steps:\n", + " - intent: goodbye\n", + " - action: utter_goodbye\n", + "\n", + " - rule: Answer Knowledge Questions\n", + " steps:\n", + " - intent: ask_knowledge\n", + " - action: action_search_knowledge\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "pI0yN537_x3K", + "outputId": "8b7bfec5-3522-4e41-98cd-4e4cb385f5b2" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "The **Action Server** is a separate service that executes Custom Actions defined in your project (actions.py). These actions perform specialized tasks such as querying databases, processing data, or integrating with external APIs." - ], - "metadata": { - "id": "FYV_JvcsOIuM" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwriting data/rules.yml\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### nlu.yml\n", + "The nlu.yml file contains Natural Language Understanding (NLU) training data. It includes examples of user inputs categorized by intents and annotated with entities to train Rasa’s NLU component.\n", + "\n", + "It has the following role in the project -\n", + "1. **Intent Recognition**: Enables Rasa to accurately identify the user’s intent from their message.\n", + "2. **Entity Extraction**: Allows Rasa to extract specific information from user inputs, which can be used in custom actions to query LanceDB or generate responses via OpenAI’s LLM.\n", + "3. **Training Data Quality**: The quality and diversity of examples in nlu.yml directly impact the chatbot’s ability to understand and respond appropriately to a wide range of user queries." + ], + "metadata": { + "id": "CV1KKmQyD9VB" + } + }, + { + "cell_type": "code", + "source": [ + "%%writefile data/nlu.yml\n", + "# data/nlu.yml\n", + "\n", + "version: \"3.0\"\n", + "\n", + "nlu:\n", + " - intent: greet\n", + " examples: |\n", + " - hello\n", + " - hi\n", + " - hey\n", + " - good morning\n", + " - good evening\n", + " - greetings\n", + "\n", + " - intent: goodbye\n", + " examples: |\n", + " - bye\n", + " - goodbye\n", + " - see you later\n", + " - catch you later\n", + " - see ya\n", + " - take care\n", + "\n", + " - intent: ask_knowledge\n", + " examples: |\n", + " - I need help with my account\n", + " - Can you assist me with billing?\n", + " - How do I reset my password?\n", + " - I'm facing issues with my order\n", + " - Tell me about your support services\n", + " - How can I contact customer service?\n", + " - What are your support hours?\n", + " - I have a question about Project Alpha\n", + " - Help me understand Project Beta\n", + " - How can I track my purchase?\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "yhAW5Eh1_x5-", + "outputId": "b474f20e-9f7e-4dd6-ff2c-1ba12542ceb9" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "import threading\n", - "import os\n", - "\n", - "def run_action_server():\n", - " os.system(\"rasa run actions\")\n", - "\n", - "threading.Thread(target=run_action_server).start()\n", - "# Wait for the Rasa server to start\n", - "time.sleep(35)" - ], - "metadata": { - "id": "op2C5AsO_yFA" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwriting data/nlu.yml\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### config.yml\n", + "\n", + "The config.yml file defines the pipeline and policies used by Rasa for processing natural language inputs and managing dialogue workflows.\n", + "\n", + "Let us understand the components of the config.yml file\n", + "1. **NLP Pipeline**: Defines the components that handle preprocessing, feature extraction, intent classification, and entity recognition.\n", + "2. **Policies**: Dictate how the chatbot selects actions based on the current dialogue state, using rules, machine learning models, or a combination of both.\n", + "3. **Endpoint Integrations**: Can include configurations for connecting to external services or APIs, enhancing the chatbot’s capabilities.\n", + "\n", + "It specifies the sequence of components (like tokenizers, featurizers, classifiers) that process user inputs for intent recognition and entity extraction. Then it determines how Rasa decides the next action based on the conversation state and the trained model." + ], + "metadata": { + "id": "IDYPv5bPEnhb" + } + }, + { + "cell_type": "code", + "source": [ + "%%writefile config.yml\n", + "# config.yml\n", + "version: \"3.0\"\n", + "\n", + "language: \"en\"\n", + "\n", + "pipeline:\n", + "- name: WhitespaceTokenizer\n", + "- name: RegexFeaturizer\n", + "- name: LexicalSyntacticFeaturizer\n", + "- name: CountVectorsFeaturizer\n", + "- name: CountVectorsFeaturizer\n", + " analyzer: char_wb\n", + " min_ngram: 1\n", + " max_ngram: 4\n", + "- name: DIETClassifier\n", + " epochs: 100\n", + "- name: EntitySynonymMapper\n", + "- name: ResponseSelector\n", + " epochs: 100\n", + "\n", + "policies:\n", + "- name: RulePolicy\n", + "- name: UnexpecTEDIntentPolicy\n", + " max_history: 5\n", + " epochs: 100\n", + "- name: TEDPolicy\n", + " max_history: 5\n", + " epochs: 100\n", + "assistant_id: 20241227-151505-young-attachment" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "KDYvRppw_x8s", + "outputId": "9ddd9edb-f49e-4456-d8bd-379c18c6e0fb" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "### Let's test out chatbot" - ], - "metadata": { - "id": "X_v2YzDvOUff" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwriting config.yml\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Implement Custom Actions (actions.py) file\n", + "\n", + "The actions.py file is where you define custom actions for your Rasa chatbot. Custom actions are Python functions that can execute arbitrary logic, such as querying a database, calling external APIs, processing data, or integrating with services like LanceDB and OpenAI’s LLMs.\n", + "\n", + "We will be doing the following in this file -\n", + "1. **Get user input**: Processes user inputs and proceed to next step.\n", + "2. **Interacting with Databases**: Queries LanceDB to retrieve relevant information based on user queries.\n", + "3.**Enhancing Responses with LLMs**: Utilizes OpenAI’s API to generate detailed, contextually relevant responses that go beyond predefined replies." + ], + "metadata": { + "id": "48EX9SfzBQ_M" + } + }, + { + "cell_type": "code", + "source": [ + "%%writefile actions/actions.py\n", + "from typing import Any, Text, Dict, List\n", + "from rasa_sdk import Action, Tracker\n", + "from rasa_sdk.executor import CollectingDispatcher\n", + "import lancedb\n", + "import logging\n", + "from google.colab import userdata\n", + "import openai\n", + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "# Load environment variables from .env\n", + "load_dotenv()\n", + "\n", + "# Configure logging\n", + "logger = logging.getLogger(__name__)\n", + "logging.basicConfig(level=logging.INFO)\n", + "\n", + "class ActionSearchKnowledge(Action):\n", + " def name(self) -> Text:\n", + " return \"action_search_knowledge\"\n", + "\n", + " def __init__(self):\n", + "\n", + " # Initialize OpenAI API key from environment variables\n", + " self.openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", + " if not self.openai_api_key:\n", + " logger.error(\"OpenAI API key not found. Please set OPENAI_API_KEY in your environment.\")\n", + " openai.api_key = self.openai_api_key\n", + "\n", + " # Initialize LanceDB connection once\n", + " try:\n", + " self.db = lancedb.connect(\"./content/lancedb\")\n", + " self.table_name = \"knowledge_base\"\n", + " if self.table_name not in self.db.table_names():\n", + " logger.error(f\"Table '{self.table_name}' does not exist in LanceDB.\")\n", + " self.table = None\n", + " else:\n", + " self.table = self.db.open_table(self.table_name)\n", + " logger.info(f\"Connected to table '{self.table_name}' in LanceDB.\")\n", + " except Exception as e:\n", + " logger.error(f\"Error connecting to LanceDB: {e}\")\n", + " self.table = None\n", + "\n", + " def run(self, dispatcher: CollectingDispatcher,\n", + " tracker: Tracker,\n", + " domain: Dict[Text, Any]) -> List[Dict[Text, Any]]:\n", + "\n", + "\n", + " # Get the latest user message\n", + " user_message = tracker.latest_message.get('text')\n", + " logger.info(f\"User message: {user_message}\")\n", + "\n", + " if not user_message:\n", + " dispatcher.utter_message(text=\"Sorry, I didn't catch that. Could you please repeat?\")\n", + " return []\n", + "\n", + " try:\n", + " # Perform similarity search in LanceDB\n", + " query_result = self.table.search(user_message).limit(1).to_pandas()\n", + "\n", + " # Filter results based on the _distance parameter (smaller _distance means more similar)\n", + " relevant_content = [query_result.loc[0, \"content\"] if query_result.loc[0, \"_distance\"] < 0.65 else None][0]\n", + " response_text = \"Null\"\n", + "\n", + " # If we find relevant content , sent it to LLM or Else send automated reply\n", + " if not relevant_content == None:\n", + " logger.info(f\"Retrieved answer from knowledge base.\")\n", + " # Use OpenAI to generate a more refined response\n", + " response_text = self.generate_response(user_message, relevant_content)\n", + " else:\n", + " # If user has ask not a relevant question, reply with the following\n", + " response_text = \"I'm sorry, I don't have an answer to that question.\"\n", + " logger.info(f\"No matching content found in knowledge base.\")\n", + "\n", + " # Send the answer back to the user\n", + " dispatcher.utter_message(text=response_text)\n", + "\n", + " except Exception as e:\n", + " logger.error(f\"Error during search operation: {e}\")\n", + " dispatcher.utter_message(text=\"Sorry, something went wrong while processing your request.\")\n", + "\n", + " return []\n", + "\n", + " def generate_response(self, user_message: Text, relevant_content: Text) -> Text:\n", + " \"\"\"\n", + " Use OpenAI's API to generate a refined response based on user message and relevant content.\n", + " \"\"\"\n", + " try:\n", + " system_prompt = \"You are an company support assistant that provides helpful and accurate answers based on the provided information. You talk professionally and like a customer support executive.\"\n", + "\n", + " prompt = (\n", + " f\"User Question: {user_message}\\n\"\n", + " f\"Relevant Information: {relevant_content}\\n\\n\"\n", + " f\"Provide a detailed and helpful response to the user's question based on the relevant information above.\"\n", + " )\n", + "\n", + " response = openai.ChatCompletion.create(\n", + " model=\"gpt-4o-mini\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": system_prompt},\n", + " {\"role\": \"user\", \"content\": prompt}\n", + " ],\n", + " max_tokens=450,\n", + " temperature=0.7,\n", + " )\n", + "\n", + " generated_text = response.choices[0].message['content'].strip()\n", + " logger.info(\"Generated response using OpenAI API.\")\n", + " return generated_text\n", + "\n", + " except Exception as e:\n", + " logger.error(f\"Error generating response with OpenAI API: {e}\")\n", + " return relevant_content # Fallback to relevant content if OpenAI fails" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "eenvEl8y6iL-", + "outputId": "f934500b-b4a9-4bd7-e0c8-743fbaf1f3d7" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "# Function to send messages to the Rasa server\n", - "def send_message(message):\n", - " url = \"http://localhost:5005/webhooks/rest/webhook\"\n", - " payload = {\n", - " \"sender\": \"test_user\",\n", - " \"message\": message\n", - " }\n", - " headers = {\n", - " \"Content-Type\": \"application/json\"\n", - " }\n", - " try:\n", - " response = requests.post(url, data=json.dumps(payload), headers=headers)\n", - " return response.json()\n", - " except requests.exceptions.ConnectionError:\n", - " return {\"error\": \"Could not connect to Rasa server.\"}\n", - "\n", - "# Example interactions\n", - "print(\"User: Hi\")\n", - "assistant_response = send_message(\"Hi\")\n", - "if assistant_response:\n", - " for resp in assistant_response:\n", - " if isinstance(resp, dict) and \"text\" in resp:\n", - " print(\"Assistant:\", resp[\"text\"])\n", - " else:\n", - " print(\"Assistant:\", resp)\n", - "\n", - "print(\"\\nUser: How do I reset my password? Explain in french\")\n", - "assistant_response = send_message(\"How do I delete my account? Explain in french\")\n", - "if assistant_response:\n", - " print(\"Assistant:\", end = \" \")\n", - " for resp in assistant_response:\n", - " if isinstance(resp, dict) and \"text\" in resp:\n", - " print(resp[\"text\"])\n", - " else:\n", - " print(resp)\n", - "\n", - "print(\"\\nUser: Standard shipping details. Hinglish mein btao\")\n", - "assistant_response = send_message(\"Standard shipping details. Hinglish mein btao\")\n", - "if assistant_response:\n", - " print(\"Assistant:\", end = \" \")\n", - " for resp in assistant_response:\n", - " if isinstance(resp, dict) and \"text\" in resp:\n", - " print(resp[\"text\"])\n", - " else:\n", - " print(resp)\n", - "\n", - "print(\"\\nUser: What is the weather today?\")\n", - "assistant_response = send_message(\"What is the weather today?\")\n", - "if assistant_response:\n", - " print(\"Assistant:\", end = \" \")\n", - " for resp in assistant_response:\n", - "\n", - " if isinstance(resp, dict) and \"text\" in resp:\n", - " print(resp[\"text\"])\n", - " else:\n", - " print( resp)\n", - "\n", - "print(\"\\nUser: Bye\")\n", - "assistant_response = send_message(\"Bye\")\n", - "if assistant_response:\n", - " print(\"Assistant:\", end = \" \")\n", - " for resp in assistant_response:\n", - " if isinstance(resp, dict) and \"text\" in resp:\n", - " print(resp[\"text\"])\n", - " else:\n", - " print(resp)\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "GxKWasrDOT5D", - "outputId": "9cb6f54e-5b3c-4bdb-fc3e-236937db13d5" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "User: Hi\n", - "Assistant: Hello! How can I assist you today?\n", - "\n", - "User: How do I reset my password? Explain in french\n", - "Assistant: Bonjour,\n", - "Pour supprimer votre compte, veuillez contacter notre équipe d'assistance à l'adresse suivante : support@yourcompany.com. Assurez-vous d'indiquer dans l'objet de votre message \"Demande de suppression de compte\". Nous vous assisterons rapidement dans cette démarche.\n", - "Si vous avez d'autres questions ou si vous avez besoin d'une aide supplémentaire, n'hésitez pas à nous le faire savoir.\n", - "Cordialement, \n", - "L'équipe de support\n", - "\n", - "User: Standard shipping details. Hinglish mein btao\n", - "Assistant: Aapka swaagat hai! Humein khushi hai ki aapne humse sampark kiya. \n", - "Hamara standard shipping policy yeh hai ki agar aapka order $75 se zyada hai, toh aapko standard shipping par koi bhi charges nahi dene honge, yani shipping free hai. Lekin agar aapka order is amount se kam hai, toh aapko standard shipping fees deni padegi.\n", - "Agar aapko aur koi sawaal hai ya madad chahiye, toh zaroor puchhiye!\n", - "\n", - "User: What is the weather today?\n", - "Assistant: I'm sorry, I don't have an answer to that question.\n", - "\n", - "User: Bye\n", - "Assistant: Goodbye! Have a great day!\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "Overwriting actions/actions.py\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Training RASA Model\n", + "The rasa train command is used to train the Rasa model based on the data and configurations defined in your project files (nlu.yml, stories.yml, rules.yml, etc.)\n", + "\n", + "It compiles the training data into a machine learning model that Rasa uses to interpret user inputs and manage conversations. The training process optimizes the model’s ability to accurately recognize intents, extract entities, and decide on appropriate actions, which is crucial for integrating seamless interactions with LanceDB and generating responses via LLMs." + ], + "metadata": { + "id": "NZyBVLzLFY0Z" + } + }, + { + "cell_type": "code", + "source": [ + "# Train the Rasa model\n", + "!rasa train" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "6FmKesSV_x_e", + "outputId": "2b87edf2-8a78-4c26-9845-688087eae709" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "markdown", - "source": [ - "### For debugging purposes" - ], - "metadata": { - "id": "eAli94VAZXpV" - } + "output_type": "stream", + "name": "stdout", + "text": [ + "/usr/local/lib/python3.10/dist-packages/rasa/core/tracker_store.py:1044: MovedIn20Warning: \u001b[31mDeprecated API features detected! These feature(s) are not compatible with SQLAlchemy 2.0. \u001b[32mTo prevent incompatible upgrades prior to updating applications, ensure requirements files are pinned to \"sqlalchemy<2.0\". \u001b[36mSet environment variable SQLALCHEMY_WARN_20=1 to show all deprecation warnings. Set environment variable SQLALCHEMY_SILENCE_UBER_WARNING=1 to silence this message.\u001b[0m (Background on SQLAlchemy 2.0 at: https://sqlalche.me/e/b8d9)\n", + " Base: DeclarativeMeta = declarative_base()\n", + "/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/validation.py:134: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html\n", + " import pkg_resources\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('mpl_toolkits')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('ruamel')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.10/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "2024-12-30 21:14:02 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.cli.train\u001b[0m - Started validating domain and training data...\n", + "/usr/local/lib/python3.10/dist-packages/tensorflow/lite/python/util.py:52: DeprecationWarning: jax.xla_computation is deprecated. Please use the AOT APIs.\n", + " from jax import xla_computation as _xla_computation\n", + "2024-12-30 21:14:07 \u001b[1;30mINFO \u001b[0m \u001b[34mnumexpr.utils\u001b[0m - NumExpr defaulting to 2 threads.\n", + "\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/domain.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/nlu.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/rules.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/stories.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Validating intents...\n", + "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Validating uniqueness of intents and stories...\n", + "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Validating utterances...\n", + "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - Story structure validation...\n", + "Processed story blocks: 100% 4/4 [00:00<00:00, 1979.38it/s, # trackers=1]\n", + "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.core.training.story_conflict\u001b[0m - Considering all preceding turns for conflict analysis.\n", + "2024-12-30 21:14:08 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.validator\u001b[0m - No story structure conflicts found.\n", + "\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/domain.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/rules.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/stories.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/shared/utils/io.py:99: UserWarning: Training data file /content/data/nlu.yml has a lower format version than your Rasa Open Source installation: 3.0 < 3.1. Rasa Open Source will read the file as a version 3.1 file. Please update your version key to 3.1. See https://rasa.com/docs/rasa/training-data-format.\n", + "\u001b[0m\u001b[93m/usr/local/lib/python3.10/dist-packages/rasa/engine/recipes/recipe.py:35: FutureWarning: From Rasa Open Source 4.0.0 onwards it will be required to specify a recipe in your model configuration. Defaulting to recipe 'default.v1'.\n", + " rasa.shared.utils.io.raise_deprecation_warning(\n", + "\u001b[0m2024-12-30 21:14:11 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'CountVectorsFeaturizer' from cache.\n", + "2024-12-30 21:14:11 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'CountVectorsFeaturizer' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'DIETClassifier' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'EntitySynonymMapper' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'LexicalSyntacticFeaturizer' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'RegexFeaturizer' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'ResponseSelector' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'RulePolicy' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'TEDPolicy' from cache.\n", + "2024-12-30 21:14:12 \u001b[1;30mINFO \u001b[0m \u001b[34mrasa.engine.training.hooks\u001b[0m - Restored component 'UnexpecTEDIntentPolicy' from cache.\n", + "\u001b[92mYour Rasa model is trained and saved at 'models/20241230-211410-synchronic-diff.tar.gz'.\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Running Servers" + ], + "metadata": { + "id": "sQy3kIUMONDl" + } + }, + { + "cell_type": "markdown", + "source": [ + "The **Rasa Server** is the core component that handles user interactions. It processes incoming messages, interprets user intents, manages conversations, and generates appropriate responses based on the trained model and defined dialogue flows." + ], + "metadata": { + "id": "zVnktY3tODA6" + } + }, + { + "cell_type": "code", + "source": [ + "import threading\n", + "import os\n", + "\n", + "\n", + "def run_rasa_server():\n", + " os.system(\"rasa run\")\n", + "\n", + "\n", + "threading.Thread(target=run_rasa_server).start()\n", + "# Wait for the Rasa server to start\n", + "time.sleep(35)" + ], + "metadata": { + "id": "NZq0FZgO_yCL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "The **Action Server** is a separate service that executes Custom Actions defined in your project (actions.py). These actions perform specialized tasks such as querying databases, processing data, or integrating with external APIs." + ], + "metadata": { + "id": "FYV_JvcsOIuM" + } + }, + { + "cell_type": "code", + "source": [ + "import threading\n", + "import os\n", + "\n", + "\n", + "def run_action_server():\n", + " os.system(\"rasa run actions\")\n", + "\n", + "\n", + "threading.Thread(target=run_action_server).start()\n", + "# Wait for the Rasa server to start\n", + "time.sleep(35)" + ], + "metadata": { + "id": "op2C5AsO_yFA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### Let's test out chatbot" + ], + "metadata": { + "id": "X_v2YzDvOUff" + } + }, + { + "cell_type": "code", + "source": [ + "# Function to send messages to the Rasa server\n", + "def send_message(message):\n", + " url = \"http://localhost:5005/webhooks/rest/webhook\"\n", + " payload = {\"sender\": \"test_user\", \"message\": message}\n", + " headers = {\"Content-Type\": \"application/json\"}\n", + " try:\n", + " response = requests.post(url, data=json.dumps(payload), headers=headers)\n", + " return response.json()\n", + " except requests.exceptions.ConnectionError:\n", + " return {\"error\": \"Could not connect to Rasa server.\"}\n", + "\n", + "\n", + "# Example interactions\n", + "print(\"User: Hi\")\n", + "assistant_response = send_message(\"Hi\")\n", + "if assistant_response:\n", + " for resp in assistant_response:\n", + " if isinstance(resp, dict) and \"text\" in resp:\n", + " print(\"Assistant:\", resp[\"text\"])\n", + " else:\n", + " print(\"Assistant:\", resp)\n", + "\n", + "print(\"\\nUser: How do I reset my password? Explain in french\")\n", + "assistant_response = send_message(\"How do I delete my account? Explain in french\")\n", + "if assistant_response:\n", + " print(\"Assistant:\", end=\" \")\n", + " for resp in assistant_response:\n", + " if isinstance(resp, dict) and \"text\" in resp:\n", + " print(resp[\"text\"])\n", + " else:\n", + " print(resp)\n", + "\n", + "print(\"\\nUser: Standard shipping details. Hinglish mein btao\")\n", + "assistant_response = send_message(\"Standard shipping details. Hinglish mein btao\")\n", + "if assistant_response:\n", + " print(\"Assistant:\", end=\" \")\n", + " for resp in assistant_response:\n", + " if isinstance(resp, dict) and \"text\" in resp:\n", + " print(resp[\"text\"])\n", + " else:\n", + " print(resp)\n", + "\n", + "print(\"\\nUser: What is the weather today?\")\n", + "assistant_response = send_message(\"What is the weather today?\")\n", + "if assistant_response:\n", + " print(\"Assistant:\", end=\" \")\n", + " for resp in assistant_response:\n", + " if isinstance(resp, dict) and \"text\" in resp:\n", + " print(resp[\"text\"])\n", + " else:\n", + " print(resp)\n", + "\n", + "print(\"\\nUser: Bye\")\n", + "assistant_response = send_message(\"Bye\")\n", + "if assistant_response:\n", + " print(\"Assistant:\", end=\" \")\n", + " for resp in assistant_response:\n", + " if isinstance(resp, dict) and \"text\" in resp:\n", + " print(resp[\"text\"])\n", + " else:\n", + " print(resp)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "GxKWasrDOT5D", + "outputId": "9cb6f54e-5b3c-4bdb-fc3e-236937db13d5" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "!lsof -i" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fqBXLg3BU0Pn", - "outputId": "bfa0a50f-f665-414d-ad69-95244dc81850" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME\n", - "node 6 root 21u IPv6 19762 0t0 TCP *:8080 (LISTEN)\n", - "node 6 root 27u IPv4 21268 0t0 TCP 11757d4d820b:56654->11757d4d820b:6000 (ESTABLISHED)\n", - "node 6 root 28u IPv6 68552 0t0 TCP 11757d4d820b:8080->172.28.0.1:49662 (ESTABLISHED)\n", - "node 6 root 29u IPv6 142453 0t0 TCP 11757d4d820b:8080->172.28.0.1:43440 (ESTABLISHED)\n", - "node 6 root 31u IPv6 72491 0t0 TCP 11757d4d820b:8080->172.28.0.1:46072 (ESTABLISHED)\n", - "node 6 root 34u IPv6 143833 0t0 TCP 11757d4d820b:8080->172.28.0.1:43708 (ESTABLISHED)\n", - "kernel_ma 26 root 3u IPv4 19626 0t0 TCP 11757d4d820b:6000 (LISTEN)\n", - "kernel_ma 26 root 6u IPv4 21269 0t0 TCP 11757d4d820b:6000->11757d4d820b:56654 (ESTABLISHED)\n", - "kernel_ma 26 root 7u IPv4 20263 0t0 TCP 11757d4d820b:41868->11757d4d820b:9000 (ESTABLISHED)\n", - "kernel_ma 26 root 8u IPv4 21271 0t0 TCP 11757d4d820b:46128->11757d4d820b:9000 (ESTABLISHED)\n", - "kernel_ma 26 root 9u IPv4 21384 0t0 TCP 11757d4d820b:6000->172.28.0.1:58950 (ESTABLISHED)\n", - "colab-fil 73 root 3u IPv4 19800 0t0 TCP localhost:3453 (LISTEN)\n", - "jupyter-n 90 root 7u IPv4 20838 0t0 TCP 11757d4d820b:9000 (LISTEN)\n", - "jupyter-n 90 root 8u IPv4 20866 0t0 TCP 11757d4d820b:9000->11757d4d820b:41868 (ESTABLISHED)\n", - "jupyter-n 90 root 18u IPv4 21272 0t0 TCP 11757d4d820b:9000->11757d4d820b:46128 (ESTABLISHED)\n", - "dap_multi 91 root 9u IPv4 68985 0t0 TCP localhost:58084->localhost:38429 (ESTABLISHED)\n", - "python3 1728 root 21u IPv4 68701 0t0 TCP localhost:37327 (LISTEN)\n", - "python3 1728 root 34u IPv4 68878 0t0 TCP localhost:44974->localhost:41041 (ESTABLISHED)\n", - "python3 1728 root 51u IPv4 106476 0t0 TCP 11757d4d820b:42606->server-3-171-171-65.atl59.r.cloudfront.net:443 (CLOSE_WAIT)\n", - "python3 1728 root 52u IPv4 106845 0t0 TCP 11757d4d820b:46718->server-3-171-171-65.atl59.r.cloudfront.net:443 (CLOSE_WAIT)\n", - "python3 1728 root 54u IPv4 108879 0t0 TCP 11757d4d820b:34598->server-54-230-253-56.atl56.r.cloudfront.net:443 (CLOSE_WAIT)\n", - "python3 1753 root 3u IPv4 68533 0t0 TCP localhost:38429 (LISTEN)\n", - "python3 1753 root 4u IPv4 68875 0t0 TCP localhost:38429->localhost:58084 (ESTABLISHED)\n", - "python3 1753 root 5u IPv4 68534 0t0 TCP localhost:41041 (LISTEN)\n", - "python3 1753 root 6u IPv4 68876 0t0 TCP localhost:41041->localhost:44974 (ESTABLISHED)\n", - "rasa 6257 root 14u IPv4 140831 0t0 TCP *:5005 (LISTEN)\n", - "rasa 6434 root 29u IPv4 139178 0t0 TCP *:5055 (LISTEN)\n", - "rasa 6434 root 32u IPv4 140924 0t0 TCP 11757d4d820b:51776->server-3-171-171-6.atl59.r.cloudfront.net:443 (ESTABLISHED)\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "User: Hi\n", + "Assistant: Hello! How can I assist you today?\n", + "\n", + "User: How do I reset my password? Explain in french\n", + "Assistant: Bonjour,\n", + "Pour supprimer votre compte, veuillez contacter notre équipe d'assistance à l'adresse suivante : support@yourcompany.com. Assurez-vous d'indiquer dans l'objet de votre message \"Demande de suppression de compte\". Nous vous assisterons rapidement dans cette démarche.\n", + "Si vous avez d'autres questions ou si vous avez besoin d'une aide supplémentaire, n'hésitez pas à nous le faire savoir.\n", + "Cordialement, \n", + "L'équipe de support\n", + "\n", + "User: Standard shipping details. Hinglish mein btao\n", + "Assistant: Aapka swaagat hai! Humein khushi hai ki aapne humse sampark kiya. \n", + "Hamara standard shipping policy yeh hai ki agar aapka order $75 se zyada hai, toh aapko standard shipping par koi bhi charges nahi dene honge, yani shipping free hai. Lekin agar aapka order is amount se kam hai, toh aapko standard shipping fees deni padegi.\n", + "Agar aapko aur koi sawaal hai ya madad chahiye, toh zaroor puchhiye!\n", + "\n", + "User: What is the weather today?\n", + "Assistant: I'm sorry, I don't have an answer to that question.\n", + "\n", + "User: Bye\n", + "Assistant: Goodbye! Have a great day!\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### For debugging purposes" + ], + "metadata": { + "id": "eAli94VAZXpV" + } + }, + { + "cell_type": "code", + "source": [ + "!lsof -i" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "fqBXLg3BU0Pn", + "outputId": "bfa0a50f-f665-414d-ad69-95244dc81850" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [ - "!kill -9 7966" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "RgVuA2MiVQfR", - "outputId": "4c515fc8-b2eb-4566-d681-b48c06e06d0b" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "/bin/bash: line 1: kill: (7966) - No such process\n" - ] - } - ] + "output_type": "stream", + "name": "stdout", + "text": [ + "COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME\n", + "node 6 root 21u IPv6 19762 0t0 TCP *:8080 (LISTEN)\n", + "node 6 root 27u IPv4 21268 0t0 TCP 11757d4d820b:56654->11757d4d820b:6000 (ESTABLISHED)\n", + "node 6 root 28u IPv6 68552 0t0 TCP 11757d4d820b:8080->172.28.0.1:49662 (ESTABLISHED)\n", + "node 6 root 29u IPv6 142453 0t0 TCP 11757d4d820b:8080->172.28.0.1:43440 (ESTABLISHED)\n", + "node 6 root 31u IPv6 72491 0t0 TCP 11757d4d820b:8080->172.28.0.1:46072 (ESTABLISHED)\n", + "node 6 root 34u IPv6 143833 0t0 TCP 11757d4d820b:8080->172.28.0.1:43708 (ESTABLISHED)\n", + "kernel_ma 26 root 3u IPv4 19626 0t0 TCP 11757d4d820b:6000 (LISTEN)\n", + "kernel_ma 26 root 6u IPv4 21269 0t0 TCP 11757d4d820b:6000->11757d4d820b:56654 (ESTABLISHED)\n", + "kernel_ma 26 root 7u IPv4 20263 0t0 TCP 11757d4d820b:41868->11757d4d820b:9000 (ESTABLISHED)\n", + "kernel_ma 26 root 8u IPv4 21271 0t0 TCP 11757d4d820b:46128->11757d4d820b:9000 (ESTABLISHED)\n", + "kernel_ma 26 root 9u IPv4 21384 0t0 TCP 11757d4d820b:6000->172.28.0.1:58950 (ESTABLISHED)\n", + "colab-fil 73 root 3u IPv4 19800 0t0 TCP localhost:3453 (LISTEN)\n", + "jupyter-n 90 root 7u IPv4 20838 0t0 TCP 11757d4d820b:9000 (LISTEN)\n", + "jupyter-n 90 root 8u IPv4 20866 0t0 TCP 11757d4d820b:9000->11757d4d820b:41868 (ESTABLISHED)\n", + "jupyter-n 90 root 18u IPv4 21272 0t0 TCP 11757d4d820b:9000->11757d4d820b:46128 (ESTABLISHED)\n", + "dap_multi 91 root 9u IPv4 68985 0t0 TCP localhost:58084->localhost:38429 (ESTABLISHED)\n", + "python3 1728 root 21u IPv4 68701 0t0 TCP localhost:37327 (LISTEN)\n", + "python3 1728 root 34u IPv4 68878 0t0 TCP localhost:44974->localhost:41041 (ESTABLISHED)\n", + "python3 1728 root 51u IPv4 106476 0t0 TCP 11757d4d820b:42606->server-3-171-171-65.atl59.r.cloudfront.net:443 (CLOSE_WAIT)\n", + "python3 1728 root 52u IPv4 106845 0t0 TCP 11757d4d820b:46718->server-3-171-171-65.atl59.r.cloudfront.net:443 (CLOSE_WAIT)\n", + "python3 1728 root 54u IPv4 108879 0t0 TCP 11757d4d820b:34598->server-54-230-253-56.atl56.r.cloudfront.net:443 (CLOSE_WAIT)\n", + "python3 1753 root 3u IPv4 68533 0t0 TCP localhost:38429 (LISTEN)\n", + "python3 1753 root 4u IPv4 68875 0t0 TCP localhost:38429->localhost:58084 (ESTABLISHED)\n", + "python3 1753 root 5u IPv4 68534 0t0 TCP localhost:41041 (LISTEN)\n", + "python3 1753 root 6u IPv4 68876 0t0 TCP localhost:41041->localhost:44974 (ESTABLISHED)\n", + "rasa 6257 root 14u IPv4 140831 0t0 TCP *:5005 (LISTEN)\n", + "rasa 6434 root 29u IPv4 139178 0t0 TCP *:5055 (LISTEN)\n", + "rasa 6434 root 32u IPv4 140924 0t0 TCP 11757d4d820b:51776->server-3-171-171-6.atl59.r.cloudfront.net:443 (ESTABLISHED)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!kill -9 7966" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, + "id": "RgVuA2MiVQfR", + "outputId": "4c515fc8-b2eb-4566-d681-b48c06e06d0b" + }, + "execution_count": null, + "outputs": [ { - "cell_type": "code", - "source": [], - "metadata": { - "id": "Wrg5YFSlWFd-" - }, - "execution_count": null, - "outputs": [] + "output_type": "stream", + "name": "stdout", + "text": [ + "/bin/bash: line 1: kill: (7966) - No such process\n" + ] } - ] + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "Wrg5YFSlWFd-" + }, + "execution_count": null, + "outputs": [] + } + ] } \ No newline at end of file diff --git a/examples/archived_examples/Food_recommendation/main.ipynb b/examples/archived_examples/Food_recommendation/main.ipynb index b54d821..17e88de 100644 --- a/examples/archived_examples/Food_recommendation/main.ipynb +++ b/examples/archived_examples/Food_recommendation/main.ipynb @@ -1,1326 +1,5124 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Food Recommendation System\n", - "\n", - "## Overview\n", - "\n", - "This project is a vector-based food recommendation system utilizing LanceDB for full-text search (FTS), hybrid search, and vector search. It integrates the reranker model to enhance search results and provide accurate food recommendations.\n", - "\n", - "## Features\n", - "\n", - "- **Vector-Based Recommendations**: Utilizes advanced vector search to find similar food items.\n", - "- **Full-Text Search (FTS)**: Enables efficient searching of food items based on text descriptions.\n", - "- **Hybrid Search**: Combines both vector search and full-text search for comprehensive results.\n", - "- **Jina Reranker Model**: Improves search result accuracy by reranking models. \n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Install required dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# install packages\n", - "!pip install pandas\n", - "!pip install lancedb" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Download Data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For this notebook walkthrough, we will use food recommendation data from Kaggle. You can download the dataset from the following link:\n", - "\n", - "Download the food recommendation data from Kaggle\n", - "\n", - "https://www.kaggle.com/datasets/schemersays/food-recommendation-system" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# Loading and Merging Data into a Single File\n", - "import pandas as pd\n", - "\n", - "df = pd.read_csv(\"main_food.csv\")\n", - "df_rating = pd.read_csv(\"ratings.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "main_df = pd.merge(df_rating, df, on=\"Food_ID\", how=\"inner\")\n", - "main_df.to_csv(\"main_df.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# Now, open the main file which contains both merged datasets.\n", - "df = pd.read_csv(\"main_df.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ + "cells": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribe
001.088.04.0peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...
111.046.03.0steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...
221.024.05.0green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...
331.025.04.0cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...
442.049.01.0christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...
\n", - "
" + "cell_type": "markdown", + "metadata": { + "id": "xYITU-6R0jHI" + }, + "source": [ + "# Food Recommendation System\n", + "\n", + "## Overview\n", + "\n", + "This project is a vector-based food recommendation system utilizing LanceDB for full-text search (FTS), hybrid search, and vector search. It integrates the reranker model to enhance search results and provide accurate food recommendations.\n", + "\n", + "## Features\n", + "\n", + "- **Vector-Based Recommendations**: Utilizes advanced vector search to find similar food items.\n", + "- **Full-Text Search (FTS)**: Enables efficient searching of food items based on text descriptions.\n", + "- **Hybrid Search**: Combines both vector search and full-text search for comprehensive results.\n", + "- **Jina Reranker Model**: Improves search result accuracy by reranking models.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YWArkiYk0jHL" + }, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QrvjHAfb0jHL" + }, + "source": [ + "### Install required dependencies" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "UNffzgyY0jHM", + "outputId": "b9c63f58-c61f-4523-c633-ba6e99fb448e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (2.2.2)\n", + "Requirement already satisfied: numpy>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas) (1.26.4)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas) (2024.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n", + "Collecting lancedb\n", + " Downloading lancedb-0.18.0-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (4.0 kB)\n", + "Collecting deprecation (from lancedb)\n", + " Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting pylance==0.22.0 (from lancedb)\n", + " Downloading pylance-0.22.0-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (7.2 kB)\n", + "Requirement already satisfied: tqdm>=4.27.0 in /usr/local/lib/python3.10/dist-packages (from lancedb) (4.67.1)\n", + "Requirement already satisfied: pydantic>=1.10 in /usr/local/lib/python3.10/dist-packages (from lancedb) (2.10.4)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from lancedb) (24.2)\n", + "Collecting overrides>=0.7 (from lancedb)\n", + " Downloading overrides-7.7.0-py3-none-any.whl.metadata (5.8 kB)\n", + "Requirement already satisfied: pyarrow>=14 in /usr/local/lib/python3.10/dist-packages (from pylance==0.22.0->lancedb) (17.0.0)\n", + "Requirement already satisfied: numpy>=1.22 in /usr/local/lib/python3.10/dist-packages (from pylance==0.22.0->lancedb) (1.26.4)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (2.27.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.10/dist-packages (from pydantic>=1.10->lancedb) (4.12.2)\n", + "Downloading lancedb-0.18.0-cp39-abi3-manylinux_2_28_x86_64.whl (32.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m32.2/32.2 MB\u001b[0m \u001b[31m29.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pylance-0.22.0-cp39-abi3-manylinux_2_28_x86_64.whl (38.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m38.3/38.3 MB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading overrides-7.7.0-py3-none-any.whl (17 kB)\n", + "Downloading deprecation-2.1.0-py2.py3-none-any.whl (11 kB)\n", + "Installing collected packages: overrides, deprecation, pylance, lancedb\n", + "Successfully installed deprecation-2.1.0 lancedb-0.18.0 overrides-7.7.0 pylance-0.22.0\n" + ] + } ], - "text/plain": [ - " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", - "0 0 1.0 88.0 4.0 peri peri chicken satay Snack \n", - "1 1 1.0 46.0 3.0 steam bunny chicken bao Japanese \n", - "2 2 1.0 24.0 5.0 green lentil dessert fudge Dessert \n", - "3 3 1.0 25.0 4.0 cashew nut cookies Dessert \n", - "4 4 2.0 49.0 1.0 christmas tree pizza Italian \n", - "\n", - " Veg_Non Describe \n", - "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", - "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", - "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", - "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", - "4 veg pizza dough (2 boules), red pepper, red onion,... " + "source": [ + "# install packages\n", + "!pip install pandas\n", + "!pip install lancedb" ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Data Preprocessing" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# We are adding all important columns into the text column to enhance full-text search (FTS) and overall search performance.\n", - "df[\"text\"] = df.apply(\n", - " lambda row: f\"{row['Name']} {row['C_Type']} {row['Veg_Non']}: {row['Describe']}\",\n", - " axis=1,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/plain": [ - "'peri peri chicken satay Snack non-veg: boneless skinless chicken thigh (trimmed), salt and pepper, yogurt, chilli powder, ginger garlic paste, coriander leaves, oil to fry, peri peri sauce, potato fries'" + "cell_type": "code", + "source": [ + "!pip install tantivy rerankers" + ], + "metadata": { + "id": "oUjPxx272u-X", + "outputId": "d45c2a95-38e6-493e-89e1-24a61318ddfb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: tantivy in /usr/local/lib/python3.10/dist-packages (0.22.0)\n", + "Collecting rerankers\n", + " Downloading rerankers-0.6.1-py3-none-any.whl.metadata (29 kB)\n", + "Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from rerankers) (2.10.4)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from rerankers) (4.67.1)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->rerankers) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.10/dist-packages (from pydantic->rerankers) (2.27.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.10/dist-packages (from pydantic->rerankers) (4.12.2)\n", + "Downloading rerankers-0.6.1-py3-none-any.whl (41 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.5/41.5 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: rerankers\n", + "Successfully installed rerankers-0.6.1\n" + ] + } ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# just chcking our text data\n", - "df[\"text\"][0]" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oeV7mp2R0jHN" + }, + "source": [ + "### Download Data" + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribetext
001.088.04.0peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...peri peri chicken satay Snack non-veg: boneles...
111.046.03.0steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...steam bunny chicken bao Japanese non-veg: buns...
221.024.05.0green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...green lentil dessert fudge Dessert veg: whole ...
331.025.04.0cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...cashew nut cookies Dessert veg: cashew paste, ...
442.049.01.0christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...christmas tree pizza Italian veg: pizza dough ...
\n", - "
" + "cell_type": "markdown", + "metadata": { + "id": "ZENpOEvS0jHN" + }, + "source": [ + "For this notebook walkthrough, we will use food recommendation data from Kaggle. You can download the dataset from the following link:\n", + "\n", + "Download the food recommendation data from Kaggle\n", + "\n", + "https://www.kaggle.com/datasets/schemersays/food-recommendation-system" + ] + }, + { + "cell_type": "code", + "source": [ + "# Download data\n", + "!wget https://raw.githubusercontent.com/lancedb/vectordb-recipes/tree/main/examples/archived_examples/Food_recommendation/main_food.csv\n", + "!wget https://raw.githubusercontent.com/lancedb/vectordb-recipes/tree/main/examples/archived_examples/Food_recommendation/ratings.csv" ], - "text/plain": [ - " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", - "0 0 1.0 88.0 4.0 peri peri chicken satay Snack \n", - "1 1 1.0 46.0 3.0 steam bunny chicken bao Japanese \n", - "2 2 1.0 24.0 5.0 green lentil dessert fudge Dessert \n", - "3 3 1.0 25.0 4.0 cashew nut cookies Dessert \n", - "4 4 2.0 49.0 1.0 christmas tree pizza Italian \n", - "\n", - " Veg_Non Describe \\\n", - "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", - "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", - "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", - "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", - "4 veg pizza dough (2 boules), red pepper, red onion,... \n", - "\n", - " text \n", - "0 peri peri chicken satay Snack non-veg: boneles... \n", - "1 steam bunny chicken bao Japanese non-veg: buns... \n", - "2 green lentil dessert fudge Dessert veg: whole ... \n", - "3 cashew nut cookies Dessert veg: cashew paste, ... \n", - "4 christmas tree pizza Italian veg: pizza dough ... " + "metadata": { + "id": "gWQI2h923JBJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "FGlscoxl0jHN" + }, + "outputs": [], + "source": [ + "# Loading and Merging Data into a Single File\n", + "import pandas as pd\n", + "\n", + "df = pd.read_csv(\"main_food.csv\")\n", + "df_rating = pd.read_csv(\"ratings.csv\")" ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "\n", - "To improve accuracy, we should include both numerical and string representations of ratings. First, add a new column, rating_str, containing the string values for each rating. Then, append both the numerical and string ratings to the text column. This approach increases the chances of achieving better accuracy.\n", - "this kind of trick exp you need to do for improving your accuracy\n" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a mapping from numbers to strings\n", - "num_to_string = {\n", - " 0.0: \"zero\",\n", - " 1.0: \"one\",\n", - " 2.0: \"two\",\n", - " 3.0: \"three\",\n", - " 4.0: \"four\",\n", - " 5.0: \"five\",\n", - " 6.0: \"six\",\n", - " 7.0: \"seven\",\n", - " 8.0: \"eight\",\n", - " 9.0: \"nine\",\n", - " 10.0: \"ten\",\n", - "}\n", - "# Replace numerical ratings with their string equivalents\n", - "df[\"Rating_str\"] = df[\"Rating\"].map(num_to_string)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "df[\"Rating\"] = df[\"Rating\"].astype(int)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "rCjy46W00jHN" + }, + "outputs": [], + "source": [ + "main_df = pd.merge(df_rating, df, on=\"Food_ID\", how=\"inner\")\n", + "main_df.to_csv(\"main_df.csv\")" + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribetextRating_str
001.088.04peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...peri peri chicken satay Snack non-veg: boneles...four
111.046.03steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...steam bunny chicken bao Japanese non-veg: buns...three
221.024.05green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...green lentil dessert fudge Dessert veg: whole ...five
331.025.04cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...cashew nut cookies Dessert veg: cashew paste, ...four
442.049.01christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...christmas tree pizza Italian veg: pizza dough ...one
\n", - "
" + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "zzFbt4g40jHO" + }, + "outputs": [], + "source": [ + "# Now, open the main file which contains both merged datasets.\n", + "df = pd.read_csv(\"main_df.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "XiWQ-KYi0jHO", + "outputId": "e1033c39-1672-45f6-d665-2a9efcda6a02", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", + "0 0 1.0 88.0 4.0 peri peri chicken satay Snack \n", + "1 1 1.0 46.0 3.0 steam bunny chicken bao Japanese \n", + "2 2 1.0 24.0 5.0 green lentil dessert fudge Dessert \n", + "3 3 1.0 25.0 4.0 cashew nut cookies Dessert \n", + "4 4 2.0 49.0 1.0 christmas tree pizza Italian \n", + "\n", + " Veg_Non Describe \n", + "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", + "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", + "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", + "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", + "4 veg pizza dough (2 boules), red pepper, red onion,... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribe
001.088.04.0peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...
111.046.03.0steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...
221.024.05.0green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...
331.025.04.0cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...
442.049.01.0christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 511,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 147,\n \"min\": 0,\n \"max\": 510,\n \"num_unique_values\": 511,\n \"samples\": [\n 124,\n 84,\n 433\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"User_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28.73921290350848,\n \"min\": 1.0,\n \"max\": 100.0,\n \"num_unique_values\": 100,\n \"samples\": [\n 84.0,\n 54.0,\n 71.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 91.29262932706344,\n \"min\": 1.0,\n \"max\": 309.0,\n \"num_unique_values\": 309,\n \"samples\": [\n 210.0,\n 110.0,\n 154.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.8662362487559436,\n \"min\": 1.0,\n \"max\": 10.0,\n \"num_unique_values\": 10,\n \"samples\": [\n 2.0,\n 3.0,\n 9.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 309,\n \"samples\": [\n \"quinoa coconut crumble custard\",\n \"chicken and mushroom lasagna\",\n \"fish with white sauce\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"C_Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"Beverage\",\n \"Snack\",\n \"Thai\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Veg_Non\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"veg\",\n \"non-veg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Describe\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 306,\n \"samples\": [\n \"dahi, cumin powder, garlic paste, garam masala, turmeric powder, red chilli powder, salt, boneless chicken, oil, green chilli, onion, tomato\",\n \"chicken, onion, green chilli, garlic, ginger, salt, aromatic powder, soya sauce, oyster sauce, spring onion, filo sheets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 5 + } ], - "text/plain": [ - " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", - "0 0 1.0 88.0 4 peri peri chicken satay Snack \n", - "1 1 1.0 46.0 3 steam bunny chicken bao Japanese \n", - "2 2 1.0 24.0 5 green lentil dessert fudge Dessert \n", - "3 3 1.0 25.0 4 cashew nut cookies Dessert \n", - "4 4 2.0 49.0 1 christmas tree pizza Italian \n", - "\n", - " Veg_Non Describe \\\n", - "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", - "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", - "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", - "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", - "4 veg pizza dough (2 boules), red pepper, red onion,... \n", - "\n", - " text Rating_str \n", - "0 peri peri chicken satay Snack non-veg: boneles... four \n", - "1 steam bunny chicken bao Japanese non-veg: buns... three \n", - "2 green lentil dessert fudge Dessert veg: whole ... five \n", - "3 cashew nut cookies Dessert veg: cashew paste, ... four \n", - "4 christmas tree pizza Italian veg: pizza dough ... one " + "source": [ + "df.head()" ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "df[\"text\"] = df.apply(\n", - " lambda row: f\"{row['text']} rating: {row['Rating']} {row['Rating_str']}\", axis=1\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tzhDVWuP0jHO" + }, + "source": [ + "### Data Preprocessing" + ] + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribetextRating_str
001.088.04peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...peri peri chicken satay Snack non-veg: boneles...four
111.046.03steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...steam bunny chicken bao Japanese non-veg: buns...three
221.024.05green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...green lentil dessert fudge Dessert veg: whole ...five
331.025.04cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...cashew nut cookies Dessert veg: cashew paste, ...four
442.049.01christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...christmas tree pizza Italian veg: pizza dough ...one
\n", - "
" + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "p41002yW0jHP" + }, + "outputs": [], + "source": [ + "# We are adding all important columns into the text column to enhance full-text search (FTS) and overall search performance.\n", + "df[\"text\"] = df.apply(\n", + " lambda row: f\"{row['Name']} {row['C_Type']} {row['Veg_Non']}: {row['Describe']}\",\n", + " axis=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "ExJVsE-o0jHP", + "outputId": "8d2b826e-214f-44cd-c9ac-2b8c0416b75c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 54 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'peri peri chicken satay Snack non-veg: boneless skinless chicken thigh (trimmed), salt and pepper, yogurt, chilli powder, ginger garlic paste, coriander leaves, oil to fry, peri peri sauce, potato fries'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 7 + } ], - "text/plain": [ - " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", - "0 0 1.0 88.0 4 peri peri chicken satay Snack \n", - "1 1 1.0 46.0 3 steam bunny chicken bao Japanese \n", - "2 2 1.0 24.0 5 green lentil dessert fudge Dessert \n", - "3 3 1.0 25.0 4 cashew nut cookies Dessert \n", - "4 4 2.0 49.0 1 christmas tree pizza Italian \n", - "\n", - " Veg_Non Describe \\\n", - "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", - "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", - "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", - "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", - "4 veg pizza dough (2 boules), red pepper, red onion,... \n", - "\n", - " text Rating_str \n", - "0 peri peri chicken satay Snack non-veg: boneles... four \n", - "1 steam bunny chicken bao Japanese non-veg: buns... three \n", - "2 green lentil dessert fudge Dessert veg: whole ... five \n", - "3 cashew nut cookies Dessert veg: cashew paste, ... four \n", - "4 christmas tree pizza Italian veg: pizza dough ... one " + "source": [ + "# just chcking our text data\n", + "df[\"text\"][0]" ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "df = df.drop([\"User_ID\", \"Describe\", \"Unnamed: 0\", \"Rating_str\"], axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ + }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Food_IDRatingNameC_TypeVeg_Nontext
088.04peri peri chicken sataySnacknon-vegperi peri chicken satay Snack non-veg: boneles...
146.03steam bunny chicken baoJapanesenon-vegsteam bunny chicken bao Japanese non-veg: buns...
224.05green lentil dessert fudgeDessertveggreen lentil dessert fudge Dessert veg: whole ...
325.04cashew nut cookiesDessertvegcashew nut cookies Dessert veg: cashew paste, ...
449.01christmas tree pizzaItalianvegchristmas tree pizza Italian veg: pizza dough ...
\n", - "
" + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "Gpbu9Cw60jHP", + "outputId": "dd1e794c-1ec6-4fad-cf82-956fe9048076", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 310 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", + "0 0 1.0 88.0 4.0 peri peri chicken satay Snack \n", + "1 1 1.0 46.0 3.0 steam bunny chicken bao Japanese \n", + "2 2 1.0 24.0 5.0 green lentil dessert fudge Dessert \n", + "3 3 1.0 25.0 4.0 cashew nut cookies Dessert \n", + "4 4 2.0 49.0 1.0 christmas tree pizza Italian \n", + "\n", + " Veg_Non Describe \\\n", + "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", + "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", + "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", + "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", + "4 veg pizza dough (2 boules), red pepper, red onion,... \n", + "\n", + " text \n", + "0 peri peri chicken satay Snack non-veg: boneles... \n", + "1 steam bunny chicken bao Japanese non-veg: buns... \n", + "2 green lentil dessert fudge Dessert veg: whole ... \n", + "3 cashew nut cookies Dessert veg: cashew paste, ... \n", + "4 christmas tree pizza Italian veg: pizza dough ... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribetext
001.088.04.0peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...peri peri chicken satay Snack non-veg: boneles...
111.046.03.0steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...steam bunny chicken bao Japanese non-veg: buns...
221.024.05.0green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...green lentil dessert fudge Dessert veg: whole ...
331.025.04.0cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...cashew nut cookies Dessert veg: cashew paste, ...
442.049.01.0christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...christmas tree pizza Italian veg: pizza dough ...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 511,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 147,\n \"min\": 0,\n \"max\": 510,\n \"num_unique_values\": 511,\n \"samples\": [\n 124,\n 84,\n 433\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"User_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28.73921290350848,\n \"min\": 1.0,\n \"max\": 100.0,\n \"num_unique_values\": 100,\n \"samples\": [\n 84.0,\n 54.0,\n 71.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 91.29262932706344,\n \"min\": 1.0,\n \"max\": 309.0,\n \"num_unique_values\": 309,\n \"samples\": [\n 210.0,\n 110.0,\n 154.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.8662362487559436,\n \"min\": 1.0,\n \"max\": 10.0,\n \"num_unique_values\": 10,\n \"samples\": [\n 2.0,\n 3.0,\n 9.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 309,\n \"samples\": [\n \"quinoa coconut crumble custard\",\n \"chicken and mushroom lasagna\",\n \"fish with white sauce\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"C_Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"Beverage\",\n \"Snack\",\n \"Thai\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Veg_Non\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"veg\",\n \"non-veg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Describe\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 306,\n \"samples\": [\n \"dahi, cumin powder, garlic paste, garam masala, turmeric powder, red chilli powder, salt, boneless chicken, oil, green chilli, onion, tomato\",\n \"chicken, onion, green chilli, garlic, ginger, salt, aromatic powder, soya sauce, oyster sauce, spring onion, filo sheets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 309,\n \"samples\": [\n \"quinoa coconut crumble custard Dessert veg: Knoia (cooked), oats, cinnamon powder, salt, brown sugar or jaggery, nuts, coconut nuts, eggs, kinoia, coconut milk, maple syrup, vanilla extract, cinnamon powder, salt, honey\",\n \"chicken and mushroom lasagna Italian non-veg: chicken, salt, crush black pepper, garlic cloves (minced), olive oil, fresh thyme, button mushroom, onion, low fat milk, basil, basil-tomato sauce\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 8 + } ], - "text/plain": [ - " Food_ID Rating Name C_Type Veg_Non \\\n", - "0 88.0 4 peri peri chicken satay Snack non-veg \n", - "1 46.0 3 steam bunny chicken bao Japanese non-veg \n", - "2 24.0 5 green lentil dessert fudge Dessert veg \n", - "3 25.0 4 cashew nut cookies Dessert veg \n", - "4 49.0 1 christmas tree pizza Italian veg \n", - "\n", - " text \n", - "0 peri peri chicken satay Snack non-veg: boneles... \n", - "1 steam bunny chicken bao Japanese non-veg: buns... \n", - "2 green lentil dessert fudge Dessert veg: whole ... \n", - "3 cashew nut cookies Dessert veg: cashew paste, ... \n", - "4 christmas tree pizza Italian veg: pizza dough ... " + "source": [ + "df.head()" ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "# Saving our data\n", - "df.to_csv(\"final_food_recom_data.csv\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "# your openai api key for embedding model\n", - "import os\n", - "\n", - "os.environ[\"OPENAI_API_KEY\"] = \"sk-proj-\"" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YaRgiOgw0jHP" + }, + "source": [ + "\n", + "\n", + "To improve accuracy, we should include both numerical and string representations of ratings. First, add a new column, rating_str, containing the string values for each rating. Then, append both the numerical and string ratings to the text column. This approach increases the chances of achieving better accuracy.\n", + "this kind of trick exp you need to do for improving your accuracy\n" + ] + }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/akashdesai/anaconda3/envs/qdrant_music/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2024-07-28 12:06:59.058090: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2024-07-28 12:06:59.065225: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", - "2024-07-28 12:06:59.074678: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", - "2024-07-28 12:06:59.077317: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", - "2024-07-28 12:06:59.083950: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", - "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", - "2024-07-28 12:06:59.721092: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" - ] + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "Qe95BUYu0jHP" + }, + "outputs": [], + "source": [ + "# Create a mapping from numbers to strings\n", + "num_to_string = {\n", + " 0.0: \"zero\",\n", + " 1.0: \"one\",\n", + " 2.0: \"two\",\n", + " 3.0: \"three\",\n", + " 4.0: \"four\",\n", + " 5.0: \"five\",\n", + " 6.0: \"six\",\n", + " 7.0: \"seven\",\n", + " 8.0: \"eight\",\n", + " 9.0: \"nine\",\n", + " 10.0: \"ten\",\n", + "}\n", + "# Replace numerical ratings with their string equivalents\n", + "df[\"Rating_str\"] = df[\"Rating\"].map(num_to_string)" + ] }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Food_IDNameRatingC_TypeVeg_Nonvectortext_relevance_score
098chicken potli6Chinesenon-veg[-0.04389098, 0.009811659, -0.026069013, 0.008...chicken potli Chinese non-veg: chicken, onion,...0.694098
1132coffee marinated mutton chops6Thainon-veg[-0.04389098, 0.009811659, -0.026069013, 0.008...coffee marinated mutton chops Thai non-veg: mu...0.670877
2136malabari fish curry6Indiannon-veg[-0.04389098, 0.009811659, -0.026069013, 0.008...malabari fish curry Indian non-veg: sear fish,...0.670778
3128thai lamb balls6Thainon-veg[-0.04389098, 0.009811659, -0.026069013, 0.008...thai lamb balls Thai non-veg: lamb (minced), c...0.668333
\n", - "
" + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "OOrYVHH70jHP" + }, + "outputs": [], + "source": [ + "df[\"Rating\"] = df[\"Rating\"].astype(int)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "ZpdL_9Ki0jHP", + "outputId": "6892b5ee-e6ab-4ea3-a2c5-4c611ea97feb", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 310 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", + "0 0 1.0 88.0 4 peri peri chicken satay Snack \n", + "1 1 1.0 46.0 3 steam bunny chicken bao Japanese \n", + "2 2 1.0 24.0 5 green lentil dessert fudge Dessert \n", + "3 3 1.0 25.0 4 cashew nut cookies Dessert \n", + "4 4 2.0 49.0 1 christmas tree pizza Italian \n", + "\n", + " Veg_Non Describe \\\n", + "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", + "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", + "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", + "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", + "4 veg pizza dough (2 boules), red pepper, red onion,... \n", + "\n", + " text Rating_str \n", + "0 peri peri chicken satay Snack non-veg: boneles... four \n", + "1 steam bunny chicken bao Japanese non-veg: buns... three \n", + "2 green lentil dessert fudge Dessert veg: whole ... five \n", + "3 cashew nut cookies Dessert veg: cashew paste, ... four \n", + "4 christmas tree pizza Italian veg: pizza dough ... one " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribetextRating_str
001.088.04peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...peri peri chicken satay Snack non-veg: boneles...four
111.046.03steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...steam bunny chicken bao Japanese non-veg: buns...three
221.024.05green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...green lentil dessert fudge Dessert veg: whole ...five
331.025.04cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...cashew nut cookies Dessert veg: cashew paste, ...four
442.049.01christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...christmas tree pizza Italian veg: pizza dough ...one
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 511,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 147,\n \"min\": 0,\n \"max\": 510,\n \"num_unique_values\": 511,\n \"samples\": [\n 124,\n 84,\n 433\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"User_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28.73921290350848,\n \"min\": 1.0,\n \"max\": 100.0,\n \"num_unique_values\": 100,\n \"samples\": [\n 84.0,\n 54.0,\n 71.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 91.29262932706344,\n \"min\": 1.0,\n \"max\": 309.0,\n \"num_unique_values\": 309,\n \"samples\": [\n 210.0,\n 110.0,\n 154.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 10,\n \"num_unique_values\": 10,\n \"samples\": [\n 2,\n 3,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 309,\n \"samples\": [\n \"quinoa coconut crumble custard\",\n \"chicken and mushroom lasagna\",\n \"fish with white sauce\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"C_Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"Beverage\",\n \"Snack\",\n \"Thai\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Veg_Non\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"veg\",\n \"non-veg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Describe\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 306,\n \"samples\": [\n \"dahi, cumin powder, garlic paste, garam masala, turmeric powder, red chilli powder, salt, boneless chicken, oil, green chilli, onion, tomato\",\n \"chicken, onion, green chilli, garlic, ginger, salt, aromatic powder, soya sauce, oyster sauce, spring onion, filo sheets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 309,\n \"samples\": [\n \"quinoa coconut crumble custard Dessert veg: Knoia (cooked), oats, cinnamon powder, salt, brown sugar or jaggery, nuts, coconut nuts, eggs, kinoia, coconut milk, maple syrup, vanilla extract, cinnamon powder, salt, honey\",\n \"chicken and mushroom lasagna Italian non-veg: chicken, salt, crush black pepper, garlic cloves (minced), olive oil, fresh thyme, button mushroom, onion, low fat milk, basil, basil-tomato sauce\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating_str\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"two\",\n \"three\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 11 + } ], - "text/plain": [ - " Food_ID Name Rating C_Type Veg_Non \\\n", - "0 98 chicken potli 6 Chinese non-veg \n", - "1 132 coffee marinated mutton chops 6 Thai non-veg \n", - "2 136 malabari fish curry 6 Indian non-veg \n", - "3 128 thai lamb balls 6 Thai non-veg \n", - "\n", - " vector \\\n", - "0 [-0.04389098, 0.009811659, -0.026069013, 0.008... \n", - "1 [-0.04389098, 0.009811659, -0.026069013, 0.008... \n", - "2 [-0.04389098, 0.009811659, -0.026069013, 0.008... \n", - "3 [-0.04389098, 0.009811659, -0.026069013, 0.008... \n", - "\n", - " text _relevance_score \n", - "0 chicken potli Chinese non-veg: chicken, onion,... 0.694098 \n", - "1 coffee marinated mutton chops Thai non-veg: mu... 0.670877 \n", - "2 malabari fish curry Indian non-veg: sear fish,... 0.670778 \n", - "3 thai lamb balls Thai non-veg: lamb (minced), c... 0.668333 " + "source": [ + "df.head()" ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "\n", - "import lancedb\n", - "from lancedb.embeddings import EmbeddingFunctionRegistry, get_registry\n", - "from lancedb.pydantic import LanceModel, Vector\n", - "from lancedb.rerankers import (\n", - " ColbertReranker,\n", - " JinaReranker,\n", - " CohereReranker,\n", - " LinearCombinationReranker,\n", - ")\n", - "\n", - "\n", - "db = lancedb.connect(\"/tmp/foods\")\n", - "\n", - "# HF sentence transformer embeddings\n", - "registry = EmbeddingFunctionRegistry.get_instance()\n", - "func = registry.get(\"sentence-transformers\").create(device=\"cpu\")\n", - "\n", - "# uncomment below things for openai embeddings\n", - "# openai embeddings\n", - "# func = get_registry().get(\"openai\").create(name=\"text-embedding-ada-002\")\n", - "\n", - "\n", - "class Words(LanceModel):\n", - " text: str = func.SourceField() # Text column is combinations of all columns\n", - " Food_ID: str = func.SourceField() # food id is food store name\n", - " Name: str = func.SourceField() # Name of menu\n", - " Rating: str = func.SourceField() # Rating given by users\n", - " C_Type: str = func.SourceField() # category type of food\n", - " Veg_Non: str = func.SourceField() # type of food its veg or non-veg\n", - " vector: Vector(func.ndims()) = func.VectorField()\n", - "\n", - "\n", - "table = db.create_table(\"food_recommandations\", schema=Words, mode=\"overwrite\")\n", - "table.add(data=df)\n", - "\n", - "# Full text search support\n", - "table.create_fts_index(\"text\", replace=True)\n", - "\n", - "# check our guidance for othe for reranker models https://lancedb.github.io/lancedb/reranking/\n", - "# reranker = JinaReranker(api_key=\"key\")\n", - "reranker = ColbertReranker()\n", - "\n", - "query = \" 6 rating non-veg meal \"\n", - "\n", - "# lance_reranker_hybrid = table.search(query, query_type=\"hybrid\").rerank(reranker=reranker).limit(5).to_pandas() # use Hybrid search also\n", - "lance_reranker_fts = (\n", - " table.search(query, query_type=\"fts\").rerank(reranker=reranker).limit(4).to_pandas()\n", - ")\n", - "\n", - "lance_reranker_fts" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type Veg_Non Rating\n", - "0 303 red rice Healthy Food veg 6\n", - "1 10 broccoli and almond soup Healthy Food veg 6\n", - "2 10 broccoli and almond soup Healthy Food veg 6\n", - "3 36 spicy watermelon soup Healthy Food veg 6\n" - ] - } - ], - "source": [ - "# recommendations\n", - "def get_recommendations(query):\n", - " results = (\n", - " table.search(query, query_type=\"fts\")\n", - " .rerank(reranker=reranker)\n", - " .limit(4)\n", - " .to_pandas()\n", - " )\n", - " return results[[\"Food_ID\", \"Name\", \"C_Type\", \"Veg_Non\", \"Rating\"]]\n", - "\n", - "\n", - "# Example usage\n", - "query = \"give me rating 6 non-veg food \"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "KFL67joG0jHP" + }, + "outputs": [], + "source": [ + "df[\"text\"] = df.apply(\n", + " lambda row: f\"{row['text']} rating: {row['Rating']} {row['Rating_str']}\", axis=1\n", + ")" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type Veg_Non \\\n", - "0 247 microwave chicken steak Healthy Food non-veg \n", - "1 87 roasted spring chicken with root veggies Healthy Food non-veg \n", - "2 86 roast turkey with cranberry sauce Healthy Food non-veg \n", - "3 86 roast turkey with cranberry sauce Healthy Food non-veg \n", - "\n", - " Rating \n", - "0 5 \n", - "1 8 \n", - "2 4 \n", - "3 4 \n" - ] - } - ], - "source": [ - "# Example usage\n", - "query = \"Non veg food near me \"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "og--7JRE0jHP", + "outputId": "38f3ef28-2bb3-43bc-a43f-de1e60f3d869", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 310 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 User_ID Food_ID Rating Name C_Type \\\n", + "0 0 1.0 88.0 4 peri peri chicken satay Snack \n", + "1 1 1.0 46.0 3 steam bunny chicken bao Japanese \n", + "2 2 1.0 24.0 5 green lentil dessert fudge Dessert \n", + "3 3 1.0 25.0 4 cashew nut cookies Dessert \n", + "4 4 2.0 49.0 1 christmas tree pizza Italian \n", + "\n", + " Veg_Non Describe \\\n", + "0 non-veg boneless skinless chicken thigh (trimmed), sal... \n", + "1 non-veg buns, all purpose white flour, dry yeast, suga... \n", + "2 veg whole moong beans, cow ghee, raisins, whole mi... \n", + "3 veg cashew paste, ghee, khaand (a sweetening agent... \n", + "4 veg pizza dough (2 boules), red pepper, red onion,... \n", + "\n", + " text Rating_str \n", + "0 peri peri chicken satay Snack non-veg: boneles... four \n", + "1 steam bunny chicken bao Japanese non-veg: buns... three \n", + "2 green lentil dessert fudge Dessert veg: whole ... five \n", + "3 cashew nut cookies Dessert veg: cashew paste, ... four \n", + "4 christmas tree pizza Italian veg: pizza dough ... one " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0User_IDFood_IDRatingNameC_TypeVeg_NonDescribetextRating_str
001.088.04peri peri chicken sataySnacknon-vegboneless skinless chicken thigh (trimmed), sal...peri peri chicken satay Snack non-veg: boneles...four
111.046.03steam bunny chicken baoJapanesenon-vegbuns, all purpose white flour, dry yeast, suga...steam bunny chicken bao Japanese non-veg: buns...three
221.024.05green lentil dessert fudgeDessertvegwhole moong beans, cow ghee, raisins, whole mi...green lentil dessert fudge Dessert veg: whole ...five
331.025.04cashew nut cookiesDessertvegcashew paste, ghee, khaand (a sweetening agent...cashew nut cookies Dessert veg: cashew paste, ...four
442.049.01christmas tree pizzaItalianvegpizza dough (2 boules), red pepper, red onion,...christmas tree pizza Italian veg: pizza dough ...one
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 511,\n \"fields\": [\n {\n \"column\": \"Unnamed: 0\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 147,\n \"min\": 0,\n \"max\": 510,\n \"num_unique_values\": 511,\n \"samples\": [\n 124,\n 84,\n 433\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"User_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28.73921290350848,\n \"min\": 1.0,\n \"max\": 100.0,\n \"num_unique_values\": 100,\n \"samples\": [\n 84.0,\n 54.0,\n 71.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 91.29262932706344,\n \"min\": 1.0,\n \"max\": 309.0,\n \"num_unique_values\": 309,\n \"samples\": [\n 210.0,\n 110.0,\n 154.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 10,\n \"num_unique_values\": 10,\n \"samples\": [\n 2,\n 3,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 309,\n \"samples\": [\n \"quinoa coconut crumble custard\",\n \"chicken and mushroom lasagna\",\n \"fish with white sauce\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"C_Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"Beverage\",\n \"Snack\",\n \"Thai\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Veg_Non\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"veg\",\n \"non-veg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Describe\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 306,\n \"samples\": [\n \"dahi, cumin powder, garlic paste, garam masala, turmeric powder, red chilli powder, salt, boneless chicken, oil, green chilli, onion, tomato\",\n \"chicken, onion, green chilli, garlic, ginger, salt, aromatic powder, soya sauce, oyster sauce, spring onion, filo sheets\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 476,\n \"samples\": [\n \"egg and cheddar cheese sandwich Mexican non-veg: egg, salt, pepper, ham slices, basil leaves rating: 2 two\",\n \"balti meat Mexican non-veg: refined oil, black cardamoms, green cardamoms, mace, clove, cinnamon stick, black pepper corn, ginger garlic paste, ginger, green chilies, mutton curry cut, brown onion paste, salt, kashmiri red chili powder, tomato puree, garam masala powder, coriander powder, cumin powder rating: 5 five\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating_str\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"two\",\n \"three\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 13 + } + ], + "source": [ + "df.head()" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type Veg_Non Rating\n", - "0 81 fruit infused tea Beverage veg 8\n", - "1 232 apple and walnut cake Dessert veg 8\n", - "2 292 chicken tikka Indian non-veg 8\n", - "3 69 banana and maple ice lollies Dessert veg 8\n" - ] - } - ], - "source": [ - "query = \" rating 8 \"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "vJw2H6IR0jHP" + }, + "outputs": [], + "source": [ + "df = df.drop([\"User_ID\", \"Describe\", \"Unnamed: 0\", \"Rating_str\"], axis=1)" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type \\\n", - "0 185 red wine braised mushroom flatbread Italian \n", - "1 142 fish skewers with coriander and red wine vineg... Thai \n", - "2 85 garlic and pinenut soup with burnt butter essence French \n", - "3 85 garlic and pinenut soup with burnt butter essence French \n", - "\n", - " Veg_Non Rating \n", - "0 veg 7 \n", - "1 non-veg 6 \n", - "2 veg 10 \n", - "3 veg 3 \n" - ] - } - ], - "source": [ - "query = \"red wine with chicken\"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)\n", - "# here we have only one non veg with rating 9 so getting" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "_nKH8yiS0jHQ", + "outputId": "02e40010-65ac-4630-9895-760c73c8ff81", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Food_ID Rating Name C_Type Veg_Non \\\n", + "0 88.0 4 peri peri chicken satay Snack non-veg \n", + "1 46.0 3 steam bunny chicken bao Japanese non-veg \n", + "2 24.0 5 green lentil dessert fudge Dessert veg \n", + "3 25.0 4 cashew nut cookies Dessert veg \n", + "4 49.0 1 christmas tree pizza Italian veg \n", + "\n", + " text \n", + "0 peri peri chicken satay Snack non-veg: boneles... \n", + "1 steam bunny chicken bao Japanese non-veg: buns... \n", + "2 green lentil dessert fudge Dessert veg: whole ... \n", + "3 cashew nut cookies Dessert veg: cashew paste, ... \n", + "4 christmas tree pizza Italian veg: pizza dough ... " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Food_IDRatingNameC_TypeVeg_Nontext
088.04peri peri chicken sataySnacknon-vegperi peri chicken satay Snack non-veg: boneles...
146.03steam bunny chicken baoJapanesenon-vegsteam bunny chicken bao Japanese non-veg: buns...
224.05green lentil dessert fudgeDessertveggreen lentil dessert fudge Dessert veg: whole ...
325.04cashew nut cookiesDessertvegcashew nut cookies Dessert veg: cashew paste, ...
449.01christmas tree pizzaItalianvegchristmas tree pizza Italian veg: pizza dough ...
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 511,\n \"fields\": [\n {\n \"column\": \"Food_ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 91.29262932706344,\n \"min\": 1.0,\n \"max\": 309.0,\n \"num_unique_values\": 309,\n \"samples\": [\n 210.0,\n 110.0,\n 154.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 10,\n \"num_unique_values\": 10,\n \"samples\": [\n 2,\n 3,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 309,\n \"samples\": [\n \"quinoa coconut crumble custard\",\n \"chicken and mushroom lasagna\",\n \"fish with white sauce\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"C_Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"Beverage\",\n \"Snack\",\n \"Thai\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Veg_Non\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"veg\",\n \"non-veg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 476,\n \"samples\": [\n \"egg and cheddar cheese sandwich Mexican non-veg: egg, salt, pepper, ham slices, basil leaves rating: 2 two\",\n \"balti meat Mexican non-veg: refined oil, black cardamoms, green cardamoms, mace, clove, cinnamon stick, black pepper corn, ginger garlic paste, ginger, green chilies, mutton curry cut, brown onion paste, salt, kashmiri red chili powder, tomato puree, garam masala powder, coriander powder, cumin powder rating: 5 five\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "df.head()" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type \\\n", - "0 303 red rice Healthy Food \n", - "1 10 broccoli and almond soup Healthy Food \n", - "2 36 spicy watermelon soup Healthy Food \n", - "3 221 amaranthus granola with lemon yogurt, berries ... Healthy Food \n", - "\n", - " Veg_Non Rating \n", - "0 veg 6 \n", - "1 veg 6 \n", - "2 veg 6 \n", - "3 veg 6 \n" - ] - } - ], - "source": [ - "query = \"veg food with rating 6\"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)\n", - "# here we have only one non veg with rating 9 so getting" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "0WYyBCxO0jHQ" + }, + "outputs": [], + "source": [ + "# Saving our data\n", + "df.to_csv(\"final_food_recom_data.csv\")" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type Veg_Non Rating\n", - "0 301 brown rice Healthy Food veg 1\n", - "1 300 black rice Healthy Food veg 9\n", - "2 270 jalapeno cheese fingers Mexican veg 3\n", - "3 270 jalapeno cheese fingers Mexican veg 5\n" - ] - } - ], - "source": [ - "query = \" veg food menu only\"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "VPe57Cdi0jHQ" + }, + "outputs": [], + "source": [ + "# your openai api key for embedding model\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"sk-proj-...\"" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type Veg_Non Rating\n", - "0 100 spicy chicken curry Indian non-veg 3\n", - "1 100 spicy chicken curry Indian non-veg 4\n", - "2 100 spicy chicken curry Indian non-veg 1\n", - "3 93 buldak (hot and spicy chicken) Japanese non-veg 7\n" - ] - } - ], - "source": [ - "# Example usage\n", - "query = \"rice with chicken spicy \"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 22, + "metadata": { + "id": "0A70NyNi0jHQ", + "outputId": "02c25ade-1eb9-4bd6-9d4c-819f3c72865c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 564, + "referenced_widgets": [ + "5617a03f618a49efb8908f44b57afbc0", + "dcfc290e92394b3da06e40f123f9a709", + "7d817d9597ec4df38d6cf9c178ebc16a", + "93d2c5708f82459a9d7a05572cb70a9a", + "72de8d5ccbef4ff0a9ee81bb60c50f1f", + "d2a6af0bb3d343bcb21495e434021aa5", + "e64498dd07a54bb99948486823ade8c2", + "388808de14d64303942fe90efcafacdd", + "8568417b14b24ebda58c348389350a22", + "ac8bce8a1c234c33b0354106de2ead2f", + "51bbe2e653ae4950927023bf7a4e8042", + "e0c8ea4ec1844377acbf319014da8687", + "159e1a00f6c846a7b7a35902282320ac", + "f721c2e5389a495b9547aad9f2ea76aa", + "5afd1d45d8b24ac1a06252f167c0f88d", + "952b6d7076fe444cabddcf6dfc6c7697", + "2fbaca795fc24e3baafd01028a2b8986", + "e4d47f5c89ef43128c948f0084ba5798", + "52971bfe5f8e4280a22dd814999c27eb", + "2ed79e95f7f44d73928d379de4b5e1e5", + "33e193c74644441295c99d9018abe9f1", + "ae3e9859738a4de98622a3b3b8ce7ac9", + "36fa727aa62048c7915c8de34c088366", + "7075eb30430742e0ac1f9c76359f40fe", + "ae771d2fff5048f7a8d37aa19de411e8", + "2c382893b9464bd884484bc262cdea73", + "21aab2c82852484ea58455d1cf7e4122", + "953c8404e8a74cc49c1d3d135f48da32", + "e2f8969c6539412bb69015231b832e64", + "e6728748eb5b4672b629edd0205c259b", + "dda9c0032c1b4946902eeff5d8bb76b8", + "412997ac3a4b4815933ef9195857dcf8", + "f7d6657280fe492795ea11fc79814952", + "ab63bfda6513499a9e71b161eb70b27f", + "751501adf88c4106acf7c6eebe2a70f7", + "979da24eabb74c98a6f0595988332e8a", + "c5fe871c7c07405ca9b3e0811f3617db", + "cc1038b3bc5e4be693dd9f005bd9ea20", + "3b82f46a3483417393e4f82d25a81dfa", + "ae6e2314867342ba9e97a62cb68d0f50", + "d2b272ba8fcc4eeeac15b581efd961b2", + "8ab35c240f2740468f678979038a3049", + "0f76d6d9f35c4fd2b7469c5c83db3ff8", + "9b72e7c5c8aa4347a4505926c1aad1e9", + "baddae2826b34b4c9c217c0d51f406c2", + "0a3ccc0df0af47078440367e03f55c70", + "f1a38aa3d4e6497e9c8c2e0acd95df2c", + "bc16c3b2cd44403d848c8027ce760e09", + "0d7fab4c6a3c45ab96681e1e6039ee11", + "a6f4872f1cf64130afdf158892431893", + "f47877116de8448e934f30203153ca1c", + "43412797c5fd4884b5f3f282d0d53223", + "57987d85833b467aa567a7302e327408", + "5013e74af88a468daac9801bb6dab438", + "cf90e1e7f7d44002b8c0169ea7acbaa0", + "7dbe2cbb8f8f417dbfeb3e23365f888a", + "6bbdc3ced13f4ed0bdc82427c1149044", + "6d88a569e9b64803a19dbfa6205779a4", + "69364eb2855d4735a736cbb47d6de6bc", + "8c5ab668e16946d898bd356c3cc9a431", + "12c2fde4bc64423582bfcaee1f668721", + "288ba00f9d684d5d99b740a945672582", + "b9b5139683d14cbf9ccaf4c5d71f65c7", + "4defb67a0d964fe5ac5af836d5301099", + "29d2b10708d340a480d1c692fdee8f5f", + "32a463a1967f4027bfdbf7ba8e44b99b" + ] + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Loading ColBERTRanker model colbert-ir/colbertv2.0 (this message can be suppressed by setting verbose=0)\n", + "No device set\n", + "Using device cpu\n", + "No dtype set\n", + "Using dtype torch.float32\n", + "Loading model colbert-ir/colbertv2.0, this might take a while...\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/405 [00:00\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
textFood_IDNameRatingC_TypeVeg_Nonvector_relevance_score
0chicken potli Chinese non-veg: chicken, onion,...98chicken potli6Chinesenon-veg[-0.04389097, 0.009811673, -0.026068995, 0.008...1.053606
1coffee marinated mutton chops Thai non-veg: mu...132coffee marinated mutton chops6Thainon-veg[-0.04389097, 0.009811673, -0.026068995, 0.008...1.046465
2malabari fish curry Indian non-veg: sear fish,...136malabari fish curry6Indiannon-veg[-0.04389097, 0.009811673, -0.026068995, 0.008...1.000582
3thai lamb balls Thai non-veg: lamb (minced), c...128thai lamb balls6Thainon-veg[-0.04389097, 0.009811673, -0.026068995, 0.008...0.973492
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "
\n", + " \n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "lance_reranker_fts", + "summary": "{\n \"name\": \"lance_reranker_fts\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"coffee marinated mutton chops Thai non-veg: mutton chops, espresso, honey, balsamic vinegar, rosemary, pink peppercorns (crushed), olive oil, salt rating: 6 six\",\n \"thai lamb balls Thai non-veg: lamb (minced), couscous, scallion, garlic, egg, parsley, olive oil, mint, ao nori herb, salt, five spice, cinnamon powder rating: 6 six\",\n \"chicken potli Chinese non-veg: chicken, onion, green chilli, garlic, ginger, salt, aromatic powder, soya sauce, oyster sauce, spring onion, filo sheets rating: 6 six\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Food_ID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"132\",\n \"128\",\n \"98\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Name\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"coffee marinated mutton chops\",\n \"thai lamb balls\",\n \"chicken potli\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Rating\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"6\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"C_Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Chinese\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Veg_Non\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"non-veg\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"vector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"_relevance_score\",\n \"properties\": {\n \"dtype\": \"float32\",\n \"num_unique_values\": 4,\n \"samples\": [\n 1.0464649200439453\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 22 + } + ], + "source": [ + "import numpy as np\n", + "\n", + "import lancedb\n", + "from lancedb.embeddings import EmbeddingFunctionRegistry, get_registry\n", + "from lancedb.pydantic import LanceModel, Vector\n", + "from lancedb.rerankers import (\n", + " ColbertReranker,\n", + " JinaReranker,\n", + " CohereReranker,\n", + " LinearCombinationReranker,\n", + ")\n", + "\n", + "\n", + "db = lancedb.connect(\"/tmp/foods\")\n", + "\n", + "# HF sentence transformer embeddings\n", + "registry = EmbeddingFunctionRegistry.get_instance()\n", + "func = registry.get(\"sentence-transformers\").create(device=\"cpu\")\n", + "\n", + "# uncomment below things for openai embeddings\n", + "# openai embeddings\n", + "# func = get_registry().get(\"openai\").create(name=\"text-embedding-ada-002\")\n", + "\n", + "\n", + "class Words(LanceModel):\n", + " text: str = func.SourceField() # Text column is combinations of all columns\n", + " Food_ID: str = func.SourceField() # food id is food store name\n", + " Name: str = func.SourceField() # Name of menu\n", + " Rating: str = func.SourceField() # Rating given by users\n", + " C_Type: str = func.SourceField() # category type of food\n", + " Veg_Non: str = func.SourceField() # type of food its veg or non-veg\n", + " vector: Vector(func.ndims()) = func.VectorField()\n", + "\n", + "\n", + "table = db.create_table(\"food_recommandations\", schema=Words, mode=\"overwrite\")\n", + "table.add(data=df)\n", + "\n", + "# Full text search support\n", + "table.create_fts_index(\"text\", replace=True)\n", + "\n", + "# check our guidance for othe for reranker models https://lancedb.github.io/lancedb/reranking/\n", + "# reranker = JinaReranker(api_key=\"key\")\n", + "reranker = ColbertReranker()\n", + "\n", + "query = \" 6 rating non-veg meal \"\n", + "\n", + "# lance_reranker_hybrid = table.search(query, query_type=\"hybrid\").rerank(reranker=reranker).limit(5).to_pandas() # use Hybrid search also\n", + "lance_reranker_fts = (\n", + " table.search(query, query_type=\"fts\").rerank(reranker=reranker).limit(4).to_pandas()\n", + ")\n", + "\n", + "lance_reranker_fts" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type Veg_Non Rating\n", - "0 83 spiced coffee Beverage veg 9\n", - "1 84 filter coffee Beverage veg 10\n", - "2 84 filter coffee Beverage veg 10\n", - "3 84 filter coffee Beverage veg 2\n" - ] - } - ], - "source": [ - "# Example usage\n", - "query = \"coffee \"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ + "cell_type": "code", + "execution_count": 23, + "metadata": { + "id": "fAGmRvYb0jHQ", + "outputId": "90e975ed-7f52-426e-cb47-26245d415c74", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type Veg_Non Rating\n", + "0 303 red rice Healthy Food veg 6\n", + "1 10 broccoli and almond soup Healthy Food veg 6\n", + "2 10 broccoli and almond soup Healthy Food veg 6\n", + "3 36 spicy watermelon soup Healthy Food veg 6\n" + ] + } + ], + "source": [ + "# recommendations\n", + "def get_recommendations(query):\n", + " results = (\n", + " table.search(query, query_type=\"fts\")\n", + " .rerank(reranker=reranker)\n", + " .limit(4)\n", + " .to_pandas()\n", + " )\n", + " return results[[\"Food_ID\", \"Name\", \"C_Type\", \"Veg_Non\", \"Rating\"]]\n", + "\n", + "\n", + "# Example usage\n", + "query = \"give me rating 6 non-veg food \"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "6nfC8__40jHQ", + "outputId": "c7e042ab-2b03-4616-c034-931b1ab33146", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type Veg_Non \\\n", + "0 87 roasted spring chicken with root veggies Healthy Food non-veg \n", + "1 247 microwave chicken steak Healthy Food non-veg \n", + "2 86 roast turkey with cranberry sauce Healthy Food non-veg \n", + "3 86 roast turkey with cranberry sauce Healthy Food non-veg \n", + "\n", + " Rating \n", + "0 8 \n", + "1 5 \n", + "2 4 \n", + "3 4 \n" + ] + } + ], + "source": [ + "# Example usage\n", + "query = \"Non veg food near me \"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "RthlD1oN0jHQ", + "outputId": "7505e8a8-8501-4226-9dad-2a6d68ffa9e4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type Veg_Non Rating\n", + "0 292 chicken tikka Indian non-veg 8\n", + "1 69 banana and maple ice lollies Dessert veg 8\n", + "2 232 apple and walnut cake Dessert veg 8\n", + "3 81 fruit infused tea Beverage veg 8\n" + ] + } + ], + "source": [ + "query = \" rating 8 \"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "ztmZTBHx0jHQ", + "outputId": "daed433f-eaf3-4b20-b503-50cb5f9bd18f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type \\\n", + "0 142 fish skewers with coriander and red wine vineg... Thai \n", + "1 185 red wine braised mushroom flatbread Italian \n", + "2 85 garlic and pinenut soup with burnt butter essence French \n", + "3 85 garlic and pinenut soup with burnt butter essence French \n", + "\n", + " Veg_Non Rating \n", + "0 non-veg 6 \n", + "1 veg 7 \n", + "2 veg 3 \n", + "3 veg 10 \n" + ] + } + ], + "source": [ + "query = \"red wine with chicken\"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)\n", + "# here we have only one non veg with rating 9 so getting" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "id": "uxs1K-7G0jHQ", + "outputId": "ae252eff-da5d-4ca3-9b22-270926cd868b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type \\\n", + "0 303 red rice Healthy Food \n", + "1 10 broccoli and almond soup Healthy Food \n", + "2 36 spicy watermelon soup Healthy Food \n", + "3 221 amaranthus granola with lemon yogurt, berries ... Healthy Food \n", + "\n", + " Veg_Non Rating \n", + "0 veg 6 \n", + "1 veg 6 \n", + "2 veg 6 \n", + "3 veg 6 \n" + ] + } + ], + "source": [ + "query = \"veg food with rating 6\"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)\n", + "# here we have only one non veg with rating 9 so getting" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "dV36C2-T0jHQ", + "outputId": "45e58d29-fa6c-4189-ec6f-52c70582471e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type Veg_Non Rating\n", + "0 270 jalapeno cheese fingers Mexican veg 3\n", + "1 270 jalapeno cheese fingers Mexican veg 5\n", + "2 301 brown rice Healthy Food veg 1\n", + "3 300 black rice Healthy Food veg 9\n" + ] + } + ], + "source": [ + "query = \" veg food menu only\"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - " Food_ID Name C_Type Veg_Non Rating\n", - "0 162 prawn potato soup Thai veg 9\n", - "1 79 beetroot and green apple soup Healthy Food veg 1\n", - "2 302 koldil chicken Chinese non-veg 5\n", - "3 298 chicken 65 Chinese non-veg 4\n" - ] + "cell_type": "code", + "execution_count": 29, + "metadata": { + "id": "EPv5ww2t0jHR", + "outputId": "ca0afb2c-91f3-4949-9816-21897da9b0f1", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type Veg_Non Rating\n", + "0 93 buldak (hot and spicy chicken) Japanese non-veg 7\n", + "1 100 spicy chicken curry Indian non-veg 3\n", + "2 100 spicy chicken curry Indian non-veg 4\n", + "3 100 spicy chicken curry Indian non-veg 1\n" + ] + } + ], + "source": [ + "# Example usage\n", + "query = \"rice with chicken spicy \"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "_VocfLe30jHR", + "outputId": "77eeef96-24f8-4612-f3ad-00a7c6080ddf", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type Veg_Non Rating\n", + "0 83 spiced coffee Beverage veg 9\n", + "1 84 filter coffee Beverage veg 10\n", + "2 84 filter coffee Beverage veg 10\n", + "3 84 filter coffee Beverage veg 2\n" + ] + } + ], + "source": [ + "# Example usage\n", + "query = \"coffee \"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "fJ76-Qen0jHR", + "outputId": "6df1ed2f-65c3-4e2e-ef6e-e670c0e13afb", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " Food_ID Name C_Type Veg_Non Rating\n", + "0 162 prawn potato soup Thai veg 9\n", + "1 79 beetroot and green apple soup Healthy Food veg 1\n", + "2 302 koldil chicken Chinese non-veg 5\n", + "3 298 chicken 65 Chinese non-veg 4\n" + ] + } + ], + "source": [ + "# Example usage\n", + "query = \"soup chinese please\"\n", + "recommendations = get_recommendations(query)\n", + "print(recommendations)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BiUusCHU0jHR" + }, + "source": [ + "\n", + "---\n", + "\n", + "Due to limited data, there may be instances where mixed results are returned, especially with a recommendation limit set to 4. The key to achieving better results lies in how you prepare your text data and optimize various hyperparameters, such as query types (hybrid, FTS, vector search). Additionally, experiment with different reranker methods. For further improvements, refer to our vector recipe repository for enhancing RAG methods and consult the LanceDB documentation for more details.\n", + "docs: https://lancedb.github.io/lancedb/search/\n", + "more such genai projects:https://github.com/lancedb/vectordb-recipes\n", + "\n", + "---" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "qdrant_music", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "colab": { + "provenance": [] + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "5617a03f618a49efb8908f44b57afbc0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_dcfc290e92394b3da06e40f123f9a709", + "IPY_MODEL_7d817d9597ec4df38d6cf9c178ebc16a", + "IPY_MODEL_93d2c5708f82459a9d7a05572cb70a9a" + ], + "layout": "IPY_MODEL_72de8d5ccbef4ff0a9ee81bb60c50f1f" + } + }, + "dcfc290e92394b3da06e40f123f9a709": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d2a6af0bb3d343bcb21495e434021aa5", + "placeholder": "​", + "style": "IPY_MODEL_e64498dd07a54bb99948486823ade8c2", + "value": "tokenizer_config.json: 100%" + } + }, + "7d817d9597ec4df38d6cf9c178ebc16a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_388808de14d64303942fe90efcafacdd", + "max": 405, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8568417b14b24ebda58c348389350a22", + "value": 405 + } + }, + "93d2c5708f82459a9d7a05572cb70a9a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ac8bce8a1c234c33b0354106de2ead2f", + "placeholder": "​", + "style": "IPY_MODEL_51bbe2e653ae4950927023bf7a4e8042", + "value": " 405/405 [00:00<00:00, 21.4kB/s]" + } + }, + "72de8d5ccbef4ff0a9ee81bb60c50f1f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d2a6af0bb3d343bcb21495e434021aa5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e64498dd07a54bb99948486823ade8c2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "388808de14d64303942fe90efcafacdd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8568417b14b24ebda58c348389350a22": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ac8bce8a1c234c33b0354106de2ead2f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "51bbe2e653ae4950927023bf7a4e8042": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e0c8ea4ec1844377acbf319014da8687": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_159e1a00f6c846a7b7a35902282320ac", + "IPY_MODEL_f721c2e5389a495b9547aad9f2ea76aa", + "IPY_MODEL_5afd1d45d8b24ac1a06252f167c0f88d" + ], + "layout": "IPY_MODEL_952b6d7076fe444cabddcf6dfc6c7697" + } + }, + "159e1a00f6c846a7b7a35902282320ac": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_2fbaca795fc24e3baafd01028a2b8986", + "placeholder": "​", + "style": "IPY_MODEL_e4d47f5c89ef43128c948f0084ba5798", + "value": "vocab.txt: 100%" + } + }, + "f721c2e5389a495b9547aad9f2ea76aa": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_52971bfe5f8e4280a22dd814999c27eb", + "max": 231508, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2ed79e95f7f44d73928d379de4b5e1e5", + "value": 231508 + } + }, + "5afd1d45d8b24ac1a06252f167c0f88d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_33e193c74644441295c99d9018abe9f1", + "placeholder": "​", + "style": "IPY_MODEL_ae3e9859738a4de98622a3b3b8ce7ac9", + "value": " 232k/232k [00:00<00:00, 9.10MB/s]" + } + }, + "952b6d7076fe444cabddcf6dfc6c7697": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2fbaca795fc24e3baafd01028a2b8986": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e4d47f5c89ef43128c948f0084ba5798": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "52971bfe5f8e4280a22dd814999c27eb": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "2ed79e95f7f44d73928d379de4b5e1e5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "33e193c74644441295c99d9018abe9f1": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae3e9859738a4de98622a3b3b8ce7ac9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "36fa727aa62048c7915c8de34c088366": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_7075eb30430742e0ac1f9c76359f40fe", + "IPY_MODEL_ae771d2fff5048f7a8d37aa19de411e8", + "IPY_MODEL_2c382893b9464bd884484bc262cdea73" + ], + "layout": "IPY_MODEL_21aab2c82852484ea58455d1cf7e4122" + } + }, + "7075eb30430742e0ac1f9c76359f40fe": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_953c8404e8a74cc49c1d3d135f48da32", + "placeholder": "​", + "style": "IPY_MODEL_e2f8969c6539412bb69015231b832e64", + "value": "tokenizer.json: 100%" + } + }, + "ae771d2fff5048f7a8d37aa19de411e8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_e6728748eb5b4672b629edd0205c259b", + "max": 466081, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_dda9c0032c1b4946902eeff5d8bb76b8", + "value": 466081 + } + }, + "2c382893b9464bd884484bc262cdea73": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_412997ac3a4b4815933ef9195857dcf8", + "placeholder": "​", + "style": "IPY_MODEL_f7d6657280fe492795ea11fc79814952", + "value": " 466k/466k [00:00<00:00, 7.40MB/s]" + } + }, + "21aab2c82852484ea58455d1cf7e4122": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "953c8404e8a74cc49c1d3d135f48da32": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "e2f8969c6539412bb69015231b832e64": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "e6728748eb5b4672b629edd0205c259b": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "dda9c0032c1b4946902eeff5d8bb76b8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "412997ac3a4b4815933ef9195857dcf8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f7d6657280fe492795ea11fc79814952": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ab63bfda6513499a9e71b161eb70b27f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_751501adf88c4106acf7c6eebe2a70f7", + "IPY_MODEL_979da24eabb74c98a6f0595988332e8a", + "IPY_MODEL_c5fe871c7c07405ca9b3e0811f3617db" + ], + "layout": "IPY_MODEL_cc1038b3bc5e4be693dd9f005bd9ea20" + } + }, + "751501adf88c4106acf7c6eebe2a70f7": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3b82f46a3483417393e4f82d25a81dfa", + "placeholder": "​", + "style": "IPY_MODEL_ae6e2314867342ba9e97a62cb68d0f50", + "value": "special_tokens_map.json: 100%" + } + }, + "979da24eabb74c98a6f0595988332e8a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d2b272ba8fcc4eeeac15b581efd961b2", + "max": 112, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_8ab35c240f2740468f678979038a3049", + "value": 112 + } + }, + "c5fe871c7c07405ca9b3e0811f3617db": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0f76d6d9f35c4fd2b7469c5c83db3ff8", + "placeholder": "​", + "style": "IPY_MODEL_9b72e7c5c8aa4347a4505926c1aad1e9", + "value": " 112/112 [00:00<00:00, 4.14kB/s]" + } + }, + "cc1038b3bc5e4be693dd9f005bd9ea20": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3b82f46a3483417393e4f82d25a81dfa": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ae6e2314867342ba9e97a62cb68d0f50": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "d2b272ba8fcc4eeeac15b581efd961b2": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8ab35c240f2740468f678979038a3049": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0f76d6d9f35c4fd2b7469c5c83db3ff8": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9b72e7c5c8aa4347a4505926c1aad1e9": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "baddae2826b34b4c9c217c0d51f406c2": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_0a3ccc0df0af47078440367e03f55c70", + "IPY_MODEL_f1a38aa3d4e6497e9c8c2e0acd95df2c", + "IPY_MODEL_bc16c3b2cd44403d848c8027ce760e09" + ], + "layout": "IPY_MODEL_0d7fab4c6a3c45ab96681e1e6039ee11" + } + }, + "0a3ccc0df0af47078440367e03f55c70": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a6f4872f1cf64130afdf158892431893", + "placeholder": "​", + "style": "IPY_MODEL_f47877116de8448e934f30203153ca1c", + "value": "config.json: 100%" + } + }, + "f1a38aa3d4e6497e9c8c2e0acd95df2c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_43412797c5fd4884b5f3f282d0d53223", + "max": 743, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_57987d85833b467aa567a7302e327408", + "value": 743 + } + }, + "bc16c3b2cd44403d848c8027ce760e09": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5013e74af88a468daac9801bb6dab438", + "placeholder": "​", + "style": "IPY_MODEL_cf90e1e7f7d44002b8c0169ea7acbaa0", + "value": " 743/743 [00:00<00:00, 23.5kB/s]" + } + }, + "0d7fab4c6a3c45ab96681e1e6039ee11": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a6f4872f1cf64130afdf158892431893": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f47877116de8448e934f30203153ca1c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "43412797c5fd4884b5f3f282d0d53223": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "57987d85833b467aa567a7302e327408": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "5013e74af88a468daac9801bb6dab438": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "cf90e1e7f7d44002b8c0169ea7acbaa0": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "7dbe2cbb8f8f417dbfeb3e23365f888a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_6bbdc3ced13f4ed0bdc82427c1149044", + "IPY_MODEL_6d88a569e9b64803a19dbfa6205779a4", + "IPY_MODEL_69364eb2855d4735a736cbb47d6de6bc" + ], + "layout": "IPY_MODEL_8c5ab668e16946d898bd356c3cc9a431" + } + }, + "6bbdc3ced13f4ed0bdc82427c1149044": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_12c2fde4bc64423582bfcaee1f668721", + "placeholder": "​", + "style": "IPY_MODEL_288ba00f9d684d5d99b740a945672582", + "value": "model.safetensors: 100%" + } + }, + "6d88a569e9b64803a19dbfa6205779a4": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b9b5139683d14cbf9ccaf4c5d71f65c7", + "max": 438349816, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4defb67a0d964fe5ac5af836d5301099", + "value": 438349816 + } + }, + "69364eb2855d4735a736cbb47d6de6bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_29d2b10708d340a480d1c692fdee8f5f", + "placeholder": "​", + "style": "IPY_MODEL_32a463a1967f4027bfdbf7ba8e44b99b", + "value": " 438M/438M [00:06<00:00, 41.7MB/s]" + } + }, + "8c5ab668e16946d898bd356c3cc9a431": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "12c2fde4bc64423582bfcaee1f668721": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "288ba00f9d684d5d99b740a945672582": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b9b5139683d14cbf9ccaf4c5d71f65c7": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4defb67a0d964fe5ac5af836d5301099": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "29d2b10708d340a480d1c692fdee8f5f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "32a463a1967f4027bfdbf7ba8e44b99b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } } - ], - "source": [ - "# Example usage\n", - "query = \"soup chinese please\"\n", - "recommendations = get_recommendations(query)\n", - "print(recommendations)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "---\n", - "\n", - "Due to limited data, there may be instances where mixed results are returned, especially with a recommendation limit set to 4. The key to achieving better results lies in how you prepare your text data and optimize various hyperparameters, such as query types (hybrid, FTS, vector search). Additionally, experiment with different reranker methods. For further improvements, refer to our vector recipe repository for enhancing RAG methods and consult the LanceDB documentation for more details.\n", - "docs: https://lancedb.github.io/lancedb/search/\n", - "more such genai projects:https://github.com/lancedb/vectordb-recipes\n", - "\n", - "---" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "qdrant_music", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.0" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/examples/archived_examples/Food_recommendation/main_food.csv b/examples/archived_examples/Food_recommendation/main_food.csv new file mode 100644 index 0000000..1031d25 --- /dev/null +++ b/examples/archived_examples/Food_recommendation/main_food.csv @@ -0,0 +1,401 @@ +Food_ID,Name,C_Type,Veg_Non,Describe +1,summer squash salad,Healthy Food,veg,"white balsamic vinegar, lemon juice, lemon rind, red chillies, garlic cloves (crushed), olive oil, summer squash (zucchini), sea salt, black pepper, basil leaves" +2,chicken minced salad,Healthy Food,non-veg,"olive oil, chicken mince, garlic (minced), onion, salt, black pepper, carrot, cabbage, green onions, sweet chilli sauce, peanut butter, ginger, soy sauce, fresh cilantro, red pepper flakes (crushed), tarts" +3,sweet chilli almonds,Snack,veg,"almonds whole, egg white, curry leaves, salt, sugar (fine grain), red chilli powder" +4,tricolour salad,Healthy Food,veg,"vinegar, honey/sugar, soy sauce, salt, garlic cloves (minced), chilli pepper (sliced), green papaya, carrot (peeled), cucumbers, mint leaves, toasted peanuts" +5,christmas cake,Dessert,veg,"christmas dry fruits (pre-soaked), orange zest, lemon zest, jaggery syrup, almond flour, apple, butter (softened), eggs" +6,japanese curry arancini with barley salsa,Japanese,veg,"japanese curry, sticky rice, cheese inside rice, barley salsa, wasabi mayo, red capsicum cube (cut), yellow capsicum cube (cut), green capsicum cube (cut), green chili, barley, butter, white pepper, light soya, salt" +7,chocolate nero cookies,Dessert,veg,"almonds, eggs, granulated sugar, bittersweet chocolate, unsalted butter, flour, baking powder, castor sugar, icing sugar" +8,lamb and chargrilled bell pepper soup,Healthy Food,non-veg,"lamb bones (preferably shank and shoulder), onions, celery, ginger, garlic, carrot, chargrilled red/yellow/green bell peppers (quartered), whole spices mix (black pepper, cinnamon, cardamom, clove, bay leaf), salt, water (warm), oil (sunflower or olive" +9,cream of almond soup,Healthy Food,veg,"vegetable stock, skimmed milk, toasted almonds (powdered), butter, flour, salt and pepper, nutmeg, almond essence, toasted almond flakes" +10,broccoli and almond soup,Healthy Food,veg,"vegetable stock, broccoli, ground almonds (toasted), skimmed milk, salt, freshly ground black pepper" +11,coconut lime quinoa salad,Healthy Food,veg,"uncooked quinoa, water, red onion, cucumber (diced), purple cabbage, avocado (ripened and diced), orange, shelled edamame (defrosted), unsweetened toasted coconut flakes, almonds, few shakes of black pepper, for the dressing:, orange juice, lime (juiced), apple cider vinegar, olive oil" +12,lemon honey glazed sous vide corn on the cob,Snack,non-veg,"young corn on the cob, honey, lemon juice, garlic cloves (smashed), celery stalk, chives, carrot, salt, paprika powder, parsley, plastic bag, food thermometer" +13,watermelon and strawberry smoothie,Healthy Food,veg,"fresh strawberries, honey, low fat yogurt, watermelon, chia seeds" +14,"peach, raspberry and nuts smoothie",Healthy Food,veg,"fresh raspberries, ripe banana, almond, fresh peach slices, low fat yogurt, fresh raspberry, peach fruit slices, almonds" +15,almond and cranberry poha,Indian,veg,"almond flakes, onion, poha, cranberries (frozen/ dried), salt, oil, curry leaves, green chilies, fresh coconut" +16,almond and raw banana galawat,Indian,veg,"almond slivers, raw banana (boiled), almond paste, cooking cream, refined oil, mace powder, cardamom powder , ginger garlic paste, garam masala powder, red chilli powder, salt, tempura batter" +17,baked namakpara with roasted almond dip,Snack,veg,"almonds (crushed), tomato, garlic cloves, basil sprig, lemon, salt, pepper, for namak para:, refined flour, sugar, salt, olive oil, water" +18,grilled almond barfi,Dessert,veg,"khoya, sweetener (optional), almonds (crushed)" +19,baked shankarpali ,Snack,veg,"whole wheat flour (atta), refined flour (maida), garlic cloves (crushed), salt, red chilli powder, chaat masala, cumin powder, tomato puree, ghee" +20,baked multigrain murukku,Snack,veg,"oats, ragi flour (bhakri atta), wheat flour, rice flour, urad dal flour (dry roast and grind the dal to a fine powder), cumin seeds, green chillies, ginger, salt, oil" +21,apple rabdi,Dessert,veg,"apples, milk, sugar, green cardamoms, almonds (blanched), pistachios (blanched)" +22,baked namak para,Snack,veg,"whole wheat flour (atta), refined flour (maida), baking powder, ghee, salt, carom seeds (ajwain)" +23,dates and nuts ladoo,Dessert,veg,"dates (pitted), mixed nuts (almonds, cashews, walnuts, pistachios, peanuts), dessicated coconut (optional)" +24,green lentil dessert fudge,Dessert,veg,"whole moong beans, cow ghee, raisins, whole milk, jaggery (organic), ground cardamom, almonds (halved)" +25,cashew nut cookies,Dessert,veg,"cashew paste, ghee, khaand (a sweetening agent and a healthier substitute of sugar), flax or chia seeds, plain yogurt, baking soda, baking powder, vanilla, oats, organic all-purpose flour" +26,almond pearls,Snack,veg,"toasted almonds, blueberries, oats, corn flakes, olive oil, salt, curry leaves, mustard seeds, cumin seeds, red chilli powder, turmeric powder, black pepper powder, ajwain, lemon juice" +27,hawaiin papaya salad,Healthy Food,veg,"papaya, fresh lime (juiced), watermelon balls or small squares (seedless), fresh pineapple chunks, coconut (unsweetened), vanilla flavoured yogurt (low fat)" +28,vegetable som tam salad,Healthy Food,veg,"raw papaya, carrot, french bean diamond, cherry tomato, garlic, crush mix chilli, somtam dressing, peanuts (crushed), peanuts" +29,spinach and feta crepes,French,veg,"milk (whole fat or skimmed), flour, water, butter, honey, salt, vegetable oil, extra butter (for crisping the crepe), spinach, feta cheese (crumbled), greek yogurt (whisked), honey" +30,couscous with ratatouille - tangy tomato sauce,French,veg,"for the cous cous:, plain couscous, extra virgin olive oil, vegetable stock, herbs (basil, parsley, thyme, cilantro work best), for the ratatouille:, olive oil (regular not extra virgin), red onions, aubergines (cut in to 3 cm cubes), zucchini (cut in to 3 cm cubes), garlic cloves, ground cumin, sweet paprika, tomato paste, salt" +31,baked almond kofta,Snack,veg,"potato (boiled), nutmeg, milk, almonds (crushed), green onions, refined flour, egg, salt, pepper, eggs, refined flour (for rolling), dried breadcrumbs (for rolling)" +32,almond and amaranth ladoo,Dessert,veg,"popped amaranth seeds, jaggery, almonds (slivered, unpeeled)" +33,moong dal kiwi coconut soup,Indian,veg,"green gram (dhuli moong dal), kiwis, coconut cream, oil, bay leaves, cumin seeds, coriander seeds, black peppercorns, garlic cloves, medium onion, carrot, turmeric powder, curry powder, salt, fresh coriander sprigs" +34,mixed berry & banana smoothie,Healthy Food,veg,"Frozen mixed berries, ripped banana,fresh orange juice, low fat curd" +35,banana walnut smoothie,Healthy Food,veg,"Low Fat Yogurt, Banana, Walnuts, Seeds (Facseeds and Chia Seeds), Honey" +36,spicy watermelon soup,Healthy Food,veg,"Watermelon, ginger-garlic paste, peppermint, chili flakes, olive oil (to cook)" +37,red rice poha,Indian,veg,"Onion, Pepper, Button Mushroom, Wild Mushroom, White Wine, Tomato, Sage Leaf, Red Poha, Truffle Oil, Rosemary" +38,mixed salad with lotus root,Healthy Food,veg,"Iceberg Lattoos, Lolo Rosso, Endive Lettuce, Red Cabbage, Lotus Root, Kaddu, Yam Beans, Small Onions, Pomegranate, Chalet Oil, Pomelo, Lemongrass Sauce" +39,sweet potato and quinoa bowl,Healthy Food,veg,"Sweet Potato Cubes, Coconut, Onion, Knoia (Ripe), Raisins, Cashew, Pepper Powder, Caen, Cassia, Salt" +40,corn and raw mango salad,Healthy Food,veg,"Corn kernels, onions, green onions, paprika, raw mango (pieces), celery stock (pieces), cherry tomatoes, pineapple (pieces), pasarley, green coriander, badge leaves, black olive, lemon juice, olive Oil, black salt, white pepper powder, taco shell" +41,khichdi,Indian,veg,"Quick cooking oats, peanuts, cumin, turmeric powder, red chilli powder, onion, tomatoes, carrots, green peas, ginger, green chillies, salt, extra virgin olive oil, water, coriander leaves" +42,sugar free modak,Japanese,veg,"seedless dates, almonds, cashew nuts, walnuts, raisins, dry coconut, poppy seeds, ghee" +43,beetroot modak,Japanese,veg,"gram flour (besan), semolina (rava/sooji), beetroot colour or two oven cooked beetroot paste , water, oil (for frying), for sugar syrup:, sugar, water, cardamom powder, lemon juice, cashews (kaju)" +44,andhra pan fried pomfret,Indian,veg,"white pomfret fish, sunflower refined ooil, red chilli powder, salt, turmeric powder, ginger-garlic paste, lemon (juiced), cumin powder, coriander powder, garam masala, green chilli, curry leaves, coriander" +45,ghee roast chicken dosa quesadilla,Mexican,non-veg,"dry red chillies, coriander seeds, cumin seeds, fenugreek seeds, black pepper, clove, garlic, turmeric powder, boneless chicken, curd, lemon (juiced), for cooking chicken:, tamarind pulp, ginger (1 inch), garlic cloves, onion, tomato, ghee, red chilli powder, salt, crushed jaggery, dosa batter (for 4 dosas), processed cheese, ghee or oil (to crisp the dosa), coriander leaves, curry leaves (deep fried), curd, salt, zeera powder, sugar, red chilli powder" +46,steam bunny chicken bao,Japanese,non-veg,"buns, all purpose white flour, dry yeast, sugar, salt, warm water, chicken mince, eggs, soy sauce, sugar, sesame oil, worcestershire sauce, chives, aromat powder" +47,meat lovers pizza,Italian,non-veg,"millet flour, tapioca flour, soy milk, garlic powder, baking powder, salt, tomato sauce, mozzarella cheese, prosciutto, pepperoni, chicken salami, salami milano, rosemary, olive oil" +48,almond and chicken momos (without shell),Chinese,non-veg,"chicken mince, garlic, carrots, spring onion, ginger, soya sauce, oyster sauce, sesame oil, pepper powder, egg, almonds (blanched), oil (for greasing)" +49,christmas tree pizza,Italian,veg,"pizza dough (2 boules), red pepper, red onion, basil pesto (purchased, homemade or vegan pesto), mozzarella cheese, kosher salt" +50,french pork chop,French,veg,"pork chop, pink pepper corn, green pepper corn, aromatic sauce, salt, black pepper, salted butter, refined oil, red cabbage, bell pepper chop, apricot, onion, red wine, ancho chilli, cayenne pepper, microgreen" +51,christmas chocolate fudge cookies,Dessert,veg,"unsalted butter, brown sugar, chocolate, chocolate chips, eggs, flour, cocoa powder, baking powder" +52,chicken parmigiana with tomato sauce,Italian,non-veg,"for chicken parmigiana:, chicken breast, egg whole frozen, flour, planko bread crumbs, fresh italian parsley, vegetable oil, solid butter unsalted, tomato sauce, mozzarella cheese, italian parmesan cheese, thyme, for pepperonata:, red pepper, green pepper, yellow pepper, red onion, fresh tomato, butternut squash, garlic, pure olive oil, vegetable oil, black pepper corn (crushed), salt, parsley, for garnishing:, parsley sprig, spinach" +53,chocolate appe,Snack,veg,"rice, coconut, baking powder, vanilla extract, cinnamon powder, brown sugar, unsweetened cocoa powder, salt, chocolate chips, butter" +54,sous-vide salmon tikka,French,non-veg,"norwegian salmon, black garlic pickle, butter, butchery bag, smoked yogurt, home made prawn crisp, edible flower, salt" +55,filo pizza,Italian,veg,"filo pastry, himalayan salt, black pepper, salted butter, tomato sauce, broccoli, button mushroom, green zucchini, yellow zucchini, capsicum yellow, capsicum green, capsicum red, pizza spice mix, mozzarella cheese" +56,chocolate samosa,Snack,veg,"refined flour, desi ghee, black cardamom seeds, chocolate block, almonds, cashew nuts, pistachio, sugar, garam masala powder, oil" +57,fish with jamun sauce,Indian,non-veg,"jamun, sugar, chilli, garlic cloves (minced), cumin (powdered), fish fillet (pomfret (or any white fish), semolina, salt, wilted spinach" +58,dahi lasooni chicken,Indian,non-veg,"chicken (boneless, cube size), chesse, garlic, ginger garlic paste, coriander, green chilli paste, cream (fresh), curd, kaju paste, salt, black salt , garam masala, chaat masala, butter" +59,pesto fish kebabs,Indian,non-veg,"King fish cut into one inch pieces, paprika cut into one inch pieces, bezel pasto, white wine vinegar, salt, cooking spray" +60,caramelized sesame smoked almonds,Snack,veg,"red lentils or masoor dal (half-boiled), potato (grated), carrot, french beans, bread slices, ground chickpea flour (sattu), green chillies, ginger, onions, garlic (minced), salt, sugar, chaat masala, red chilli powder, garam masala, corn flour, besan, bread crumbs for crunchyness, coriander, refined oil (for fryingp)" +61,crunchy vegetable dal sattu croquettes,Italian,veg,"red lentils or masoor dal (half-boiled), potato (grated), carrot, french beans, bread slices, ground chickpea flour (sattu), green chillies, ginger, onions, garlic (minced), salt, sugar, chaat masala, red chilli powder, garam masala, corn flour, besan, bread crumbs for crunchyness, coriander, refined oil (for fryingp)" +62,active charcoal modak,Japanese,veg,"for modak:, gram flour (besan), semolina (rava/suji), active charcoal, water, oil (for frying), sugar, water, cardamom powder, lemon juice, cashews (kaju)" +63,flax seed and beetroot modak,Japanese,veg,"rice flour, salt, oil (for greasing), beetroot puree, tossed flax seeds (for topping), fresh coconut, jaggery, poppy seeds, green cardamom powder, nutmeg powder" +64,chocolate prunes gujiyas,Japanese,veg,"for gujiya pastry:, refined flour, ghee, water, salt, for gujiya stuffing:, dark chocolate, prunes, mawa, sugar, cardamom" +65,almond white chocolate gujiya,Japanese,veg,"all-purpose flour, clarified butter (ghee), water, white chocolate, dessicated coconut, green cardamom powder, almond, jaggery" +66,betel nut popsicle,Dessert,veg,"full cream milk, cream, mascarpone, sugar, paan (crushed), cookie crumble, dark chocolate (melted), caramel sauce" +67,clotted cottage cheese croquettes,Italian,veg,"black pepper, black salt, bread crumb, corn flour, refined oil, jalapeno, jeera powder, maida, cottage cheese, parsley, salt, for thousand island dressing:, mayonnaise, tomato ketchup, minced garlic, celery, black pepper" +68,gajar tart,French,veg,"white butter, breakfast sugar, milk full fat, refined flour, winter carrots, ghee, sugar, khoya, cardamom powder, cashew nuts (crushed), almonds (crushed), full fat milk, full fat milk, grain sugar, green cardamom powder" +69,banana and maple ice lollies,Dessert,veg,"Banana, Greek yogurt, heavy cream, pure maple syrup, coconut" +70,shepherds salad (tamatar-kheera salaad),Healthy Food,veg,"1 cucumber peeled and chopped, onion, tomato, green chillies, garlic buds, pasarley, olive oil, lemon juice, salt and pepper" +71,carrot ginger soup,Healthy Food,veg,"Carrots, Olive Oil, Salt, Vegetable Stock, Ginger, Thyme, Onion, Garlic Buds, Pepper (Freshly Picked)" +72,dark night,Beverage,veg,"whisky, honey, cinnamon, star anise, cloves, green apple" +73,hot chocolate,Beverage,veg,"milk, chocolate, cocoa powder, powdered sugar, cinnamon, vanilla" +74,slow cooked spiced sangria,Mexican,veg,"red wine (merlot / zinfandel), apple cider, honey, orange (zested and juiced), cloves (whole), green cardamom, cinnamon sticks, anise (whole), brandy" +75,detox haldi tea,Beverage,veg,"haldi, ginger, black pepper, honey, water" +76,holi special ice tea thandai,Beverage,veg,"ground white pepper (kali mirch), assam tea bags, almonds (badam) - coarsley crushed, poppy seeds (khus-khus), fennel seeds (saunf) (crushed coarsely), cardamom (elaichi) powder, ground sugar, saffron (kesar) strands" +77,grilled lemon margarita,Beverage,veg,"vanilla infused tequila, vanilla liqueur, lemon juice, lemon, lemon" +78,spanish artichoke and spinach dip,Mexican,veg,"spinach, onion, cream, garlic, nutmeg, salt, lime juice, artichoke hearts (cubed), polenta, refined flour, salt, water, butter, flour, salt, pepper (freshly ground)" +79,beetroot and green apple soup,Healthy Food,veg,"beetroots, green apple, onion, olive oil, salt, black peppercorns (crushed), butter, vegetable stock, mixed dried herbs, fresh cream, lemon juice, parsley sprigs, red chilli flakes, soup sticks as required" +80,baba budan no. 7,Beverage,veg,"rum, espresso, sugar syrup/gur (jaggery) syrup, green cardamom pod," +81,fruit infused tea,Beverage,veg,"chamomile tea bags, orange, berries, blueberries, ginger, mint leaves, boiling water" +82,soya milk,Beverage,veg,"Soybean, hot water" +83,spiced coffee,Beverage,veg,"Freshly ground coffee, cardamom powder, ginger, milk, sugar, cream, chocolate" +84,filter coffee,Beverage,veg,"Coffee powder, water, milk, sugar" +85,garlic and pinenut soup with burnt butter essence,French,veg,"garlic cloves, almonds (whole), parsley, bayleaf, chicken broth, nutmeg, egg yolk, heavy cream, white bread slices, butter, salt, pepper, white wine" +86,roast turkey with cranberry sauce,Healthy Food,non-veg,"whole turkey, butter, onion, celery, crumbled sage and thyme , salt and pepper, garlic minced, balsamic vinegar, sugar, cranberry" +87,roasted spring chicken with root veggies,Healthy Food,non-veg,"whole chicken, thyme, garlic, lemon, orange, salt, black pepper, butter (to rub), extra olive oil, carrot, turnip, beetroot, chipotle powder, parsley" +88,peri peri chicken satay,Snack,non-veg,"boneless skinless chicken thigh (trimmed), salt and pepper, yogurt, chilli powder, ginger garlic paste, coriander leaves, oil to fry, peri peri sauce, potato fries" +89,chicken popcorn,Chinese,non-veg,"chicken (boneless), corn flour or all purpose flour, egg, bread crumbs, salt, ginger garlic paste, black pepper, onion powder (optional), red chilli powder or paprika or cayenne pepper, oil for deep frying" +90,half roast chicken,Healthy Food,non-veg,"cooking oil, whole chicken (skin on- rinsed and dressed), onions (quartered- skin on), garlic (halved - skin on), carrots (roughly quartered), celery , thyme, dunkleweizen (pour into a glass and keep open at room temperature for an hour or two to make flat), chicken stock, butter, sea salt" +91,chicken biryani,Indian,non-veg,"malabar peppercorn, cinnamon stick, green cardamom pods, star anise, cloves, fennel seeds, vermicelli, desi ghee, shahi jeera, bay leaves, green chilli, mint leaves, onion, saffron, garlic paste, salt, chicken (dark meat), ginger paste, turmeric powder, coriander powder, red chilli powder, roma tomatoes (de-seedeed), water" +92,chicken farcha,Indian,non-veg,"boneless chicken thigh (without skin), lemon juice, garlic paste, ginger paste, red chilli powder, coriander powder, garam masala, black pepper (crushed), salt, bread crumbs / semolina, eggs, red chilli powder, oil" +93,buldak (hot and spicy chicken),Japanese,non-veg,"chicken drum sticks/ chicken breast, soy sauce, sugar, traditional corn syrup (substitution honey), cheong ju (clear rice wine similar to japanese sake), green onion, black pepper, olive oil, sesame seeds (optional), for marinating sauce:, gochugaru (red chili pepper flakes), alapenos, korean pear (substitution asian pear), onion, garlic, soy sauce, spicy yellow mustard, sesame oil, sugar, mul yut (substitution honey)" +94,chicken sukka,Indian,non-veg,"chicken (small pieces), onion (thin slices), cardamom, salt, oil, coriander, cumin, mustard, pepper, cloves, cinnamon, bedki chillies, kashmiri chilies, turmeric, onion, garlic, ginger, salt" +95,steamed chicken roulade,French,non-veg,"lamb mince, garlic , salt, paprika powder, pomodaro tomatoes, olive oil, celery, shallots, carrot, black pepper, bay leaf, yellow chilli powder, cilantro, chicken thigh, salt, white pepper powder, garlic paste, white wine vinegar, olive oil, yellow chilli powder, refined oil, fenugreek seeds, white pepper powder, garlic, fenugreek leaves, onion, broken cashew, salt, fresh cream, kasoori methi" +96,methi chicken masala,Indian,non-veg,"boneless chicken, lemon juice, red chilli powder, salt, ginger garlic paste, curd, red chilli powder, salt, coriander powder, oil, onion, oil, bay leaf, green cardamom, black cardamom, cinnamon, ginger garlic paste, tomato, kasoori methi, green chilli, water" +97,restaurant style fried chicken,Chinese,non-veg,"egg, salt, white pepper, maida, salt, red chilli powder, garlic powder, ginger powder, onion powder, oregano, chillil flakes, white pepper, basil, chicken drumsticks, salt, white pepper, bread crumbs (as required)" +98,chicken potli,Chinese,non-veg,"chicken, onion, green chilli, garlic, ginger, salt, aromatic powder, soya sauce, oyster sauce, spring onion, filo sheets" +99,spicy chicken masala,Indian,non-veg,"mustard oil, curry leaves, kalonjee, saunf, garlic, onion, tomatoes, ginger, green chillies, boneless chicken, salt, tomato puree, red chilli powder, cumin powder, black salt, peppercorn (crushed), water, mint, coriander" +100,spicy chicken curry,Indian,non-veg,"oil, ghee, onion paste, garlic paste, ginger paste, turmeric powder, salt, tomato puree, coriander powder, red chilli powder, cumin powder, garam masala, chicken leg pieces, coriander leaves, water" +101,crispy herb chicken,Italian,non-veg,"fresh breadcrumbs, parmesan cheese, lemon rind, fresh parsley leaves, garlic powder, plain flour, egg, milk, chicken breast supremes, vegetable oil, mayonnaise, sour cream, fresh chives (dried if fresh not available), fresh parsley (dried if fresh not available), garlic, red onion (optional), ground black pepper, salt" +102,dahi chicken,Indian,non-veg,"dahi, cumin powder, garlic paste, garam masala, turmeric powder, red chilli powder, salt, boneless chicken, oil, green chilli, onion, tomato" +103,amritsari chicken masala,Indian,non-veg,"chicken, ginger garlic paste, curd, lemon juice, vinegar, coriander powder, cumin powder, red chilli powder, salt, onion, butter, red chilli powder, coriander powder, cumin powder, ginger, water, salt, green chilli, tomatoes, sugar, butter , cream" +104,chilli chicken,Chinese,non-veg,"boneless chicken, salt, cornflour, black pepper, egg, soy sauce, red chilli sauce, garlic, green chillies, green chilli sauce, vinegar, onion, capsicum, black pepper, salt" +105,chicken tenders,Snack,non-veg,"chicken breast (cut into strips), salt, garlic paste, ginger paste, green chilli paste, oats, refined flour, salt, freshly ground black pepper, fried mixed herbs (italian herb), garlic powder" +106,chicken nimbu dhaniya shorba,Beverage,non-veg,"water, chicken (diced)), ginger garlic paste, coriander, lemon, cream, butter, turmeric powder, green chilli paste, cornflour, salt" +107,garlic soya chicken,Healthy Food,non-veg,"chicken thigh/breast (cut crosswise into 1/2-inch-thin strips), sesame oil (toasted), white pepper (finely ground), ginger juice, peanut oil or vegetable oil (divided), garlic cloves (minced), ginger, red chilli flakes, red onion , snow peas (trimmed), red bell pepper (cut into thin strips), rice vinegar, soy sauce, dark soy sauce, chinese rice wine, brown sugar, cornflour" +108,cauliflower and chicken biryani,Indian,non-veg,"boneless chicken, biryani masala, yogurt, onions, coconut oil, cauliflower, coconut (diced), onion (diced), peppercorns, cardamoms, cumin seeds, bay leaves, cinnamon, cloves, coconut oil, garam masala, dry chilli flakes, turmeric powder, black peppercorns, cardamom, cinnamon, garlic (crushed), tomato puree, green chillies, fresh ginger (peeled), dry red chilli, salt" +109,chicken quinoa biryani,Healthy Food,non-veg,"onions, tomato, green chillies(slit open), ginger garlic paste, mint leaves, coriander leaves/cilantro, fresh yogurt/curd(beaten), turmeric powder, chilli powder, salt, chicken (cut into pieces), garam masala powder, ginger garlic paste, yogurt/curd, chilli powder, coriander powder, salt, oil, cloves, bay leaf , cardamom" +110,chicken and mushroom lasagna,Italian,non-veg,"chicken, salt, crush black pepper, garlic cloves (minced), olive oil, fresh thyme, button mushroom, onion, low fat milk, basil, basil-tomato sauce" +111,plum and cherry roasted chicken,Healthy Food,non-veg,"carrot (finely diced), onion (finely diced), celery (finely diced), fresh thyme sprigs, whole free range chicken , pepper (freshly ground), parsley stems, celery leaves, lemon slices (1/8 inch thick), onion, carrot, fresh lemon juice, chicken stock/broth, plums (cut), fresh cherries" +112,chicken roulade,French,non-veg,"chicken breasts, olives, jalapenos, bell peppers(all 3), thyme dried, white wine, salt, black pepper (crushed to taste), egg, olive oil, mozerella cheese, processed cheese" +113,sticky rum chicken wings,Snack,non-veg,"chicken wings without skin, hung curd, chilli powder, sweet paprika powder, english mustard paste, garlic paste, salt, garlic, rum, bbq sauce" +114,chicken dragon,Chinese,non-veg,"Chicken (boneless and cut into small pieces), eggs (lightly whipped), maida, garlic paste, ginger paste, salt, water, oil" +115,chicken palwal,Indian,non-veg,"Chicken, Onion, Tomato, Green Chilli, Cumin Powder, Coriander Powder, Green Coriander, Curd, Ginger Garlic, Red Chilli Powder, Salt, Oil" +116,pan seared thigh of chicken,Mexican,non-veg,"Chicken Thai, Salt, Pepper, Lemon, Fresh Thyme, Barley, Brockley, Mushroom, Extra Virgin Olive Oil, Cherry Tomato" +117,chicken in mahalak sauce,Indian,non-veg,"Salt, sugar, seasonings, oil, corn flour, chicken leg, fresh chilli, garlic, ginger, onion, red chili paste, tomato catchup, paprika, yellow capsicum, vegetable stock" +118,grilled kasundi honey chicken with sweet potato mash,Indian,non-veg,"Chicken breast, kasundi (Bengali mustard sauce), sweet potato, red and yellow capsicum, broccoli, garlic buds, refined oil, salt, butter, cream, green onion, honey, ginger and garlic paste" +119,chicken dong style,Chinese,non-veg,"Oil, chicken breast, garlic, ginger, tomato catchup, oyster sauce, shitake mushrooms, bomb shoots (boiled), salt, sugar, broth powder, shocking wine, potato starch, sesame oil, green onions" +120,microwave tandoori chicken,Indian,non-veg,"Chicken (sliced), garlic paste, ginger paste, Hung curd, salt, garam masala, coriander powder, pepper powder, cream, oil, for garnishing:, lemon pieces, onion rings" +121,methi malai cranberry chicken,Indian,non-veg,"Chicken (sliced ??into large pieces), salt, cranberry puree, red chilli powder, garam masala, coriander powder, cumin powder, chaat masala, corn flour, fenugreek leaves, gram flour, ginger, garlic, cream, Greek yogurt, butter" +122,southern fried chicken tenders,Snack,non-veg,"Chicken breast (cut 20 strips, 20 grams), all purpose flour, salt, hot red chillies, eggs, bread crumbles, white cabbage slaw, pasarley sprig, honey-mustard dip, green bell pepper" +123,thai style chicken tikka,Thai,non-veg,"Chicken Thais, Thai Ginger, Lemon Leaves, Lemongrass, Coconut Milk Powder, Refined Oil, Red Curry Paste, Peanut Butter, Bezel Leaves, Lotus Stem, Yellow Butter, Chaat Masala" +124,chicken gilafi kebab,Indian,non-veg,"Chicken Mince, Onion, Tomato, Green Capsicum, Green Coriander, Green Chilli, Salt, Oil, Cashew, Almond, Mint, Red Chilli Powder, Ginger Garlic Paste, Cumin Powder, Lemon Juice, Fresh Cream, Kewda Water" +125,cheese chicken kebabs,Indian,non-veg,"Chicken Thais, Garlic Paste, Garlic Paste, Yellow Chilli Powder, Cheese, Curd, Gram Flour, Green Cardamom Powder, Yellow Chilli Powder, Mace Powder, Nutmeg Powder, Rock Salt, Green Coriander, Oil" +126,andhra crab meat masala,Indian,non-veg,"processed crab meat, refined oil, curry leaves, garlic, green chilli, onion, ginger-garlic paste, coriander, coriander powder, cumin powder, turmeric powder, garam masala powder, kashmiri chilli powder, salt, lemon, chop masala, tomato" +127,cajun spiced turkey wrapped with bacon,Mexican,non-veg,"turkey breast, cajun spice, spinach leaves (cooked and drained), garlic pods, salted butter, feta cheese, bacon strips, ground black pepper, for cajun spice:, onion powder, garlic powder, seasoning salt, paprika, ground black pepper, cayenne pepper, oregano, thyme, red pepper flakes (if you like it spicy))" +128,thai lamb balls,Thai,non-veg,"lamb (minced), couscous, scallion, garlic, egg, parsley, olive oil, mint, ao nori herb, salt, five spice, cinnamon powder" +129,oyster lamb,Thai,non-veg,"vegetable oil, garlic, ginger, lamb, stock, oyster sauce, dry sherry, sugar, celery, salt, bokchoy, shitake mushroom" +130,chicken shami kebab,Indian,non-veg,"oil, cumin seeds, cloves, black peppercorns, cinnamon, coriander seeds, ajwain, red chilli whole, chilli flakes, boneless chicken, salt, water, ginger, green chilli, garlic, coriander leaves , mint leaves , egg" +131,balti meat,Mexican,non-veg,"refined oil, black cardamoms, green cardamoms, mace, clove, cinnamon stick, black pepper corn, ginger garlic paste, ginger, green chilies, mutton curry cut, brown onion paste, salt, kashmiri red chili powder, tomato puree, garam masala powder, coriander powder, cumin powder" +132,coffee marinated mutton chops,Thai,non-veg,"mutton chops, espresso, honey, balsamic vinegar, rosemary, pink peppercorns (crushed), olive oil, salt" +133,sali boti (parsi meat dish),Indian,non-veg,"mutton (wash and cut into very small cubes), ghee or oil, tomatoes, onions, green chillies, coriander leaves, chilli powder, turmeric powder, ginger-garlic paste, water, vinegar, jaggery, salt, potatoes, oil" +134,braised lamb shanks,Thai,non-veg,"Lang Shanks, Olive Oil, Onion, Carrots, Celery Stick, Garlic, White Wine, Lamb Stock, White Wine, Rosemary, Tomata Puree" +135,bengali lamb curry,Indian,non-veg,"Lamb pieces, yogurt, turmeric powder, castor sugar, red chili powder, mustard oil, onion, ginger paste, garlic paste, green chillies, mustard seeds, almonds" +136,malabari fish curry,Indian,non-veg,"sear fish, coconut, ginger, pureed tamarind, salt, powdered turmeric, green chillies, red chilli powder, shallots" +137,japanese fish stew,Japanese,non-veg,"sole fillet (you can also do 2/3 types of fish), large shrimps (optional), potatoes (peeled), onions (cut into slices), garlic cloves, red capsicum (can use green also), parsley or coriander (choose the herb that most suits your taste buds), few dashes of hot sauce, paprika, olive oil, white wine, rock salt" +138,malabar fish curry,Indian,non-veg,"whole coriander seeds, whole red chilli, coconut oil, mustard seeds, onion, curry leaf, coconut milk, tamarind pulp, coriander, seabass fish curry cuts" +139,surmai curry with lobster butter rice,Thai,veg,"tamarind, red chilli powder, turmeric powder, salt, fresh coconut, onion, garlic cloves, coriander seeds, fenugreek seeds, red chilli powder, turmeric powder, tamarind (lemon sized), butter, lobster (de-shell and devein and make a small cube), salt and lime, surmai" +140,seared salmon in tabasco butter,Thai,non-veg,"butter, tabasco, chives, salt, salmon fillet, olive oil, sea salt" +141,"risotto lobster with parmesan egg pancake, confit tomatoes and coral tuille",Italian,non-veg,"lobster shell, carrot, leeks, garlic cloves, tomato paste, bay leaf, peppercorn, water, lobster meat, arborio rice, onion, leeks, lobster stock, parmesan cheese, cream, salt, olive oil, egg, parmesan, flour, milk, parsley, salt, cherry tomatoes, garlic pods, olive oil, water, oil, flour, salt" +142,fish skewers with coriander and red wine vinegar dressing,Thai,non-veg,"sea bass fillets, olive oil (for grilling), red wine vinegar, sugar, extra virgin olive oil, garlic clove, coriander, bamboo skewers" +143,seafood rock filler,French,veg,"baked tart of mixed sea food served with phyllo fruit bowl, savory tart shell (semi baked), mixed sea food, mornay sauce, assorted herb and seasoning, phyllo pastry, assorted seasonal fruit" +144,shrimp & cilantro ceviche,French,veg,"prawns, gherkin, onion, cilantro, mix bell pepper, tiger milk, sweet corn, sea salt, black pepper, green lemon juice, cherry tomato, edible flower, coriander stems, celery stalks, garlic, ginger, red onion, lemon juice, sea salt, black pepper" +145,saewoo bokumbop (shrimp fried rice),Japanese,veg,"cooked rice, shrimp (de-veined, onion, cooked green peas, green onion, egg (scrambled), vegetable oil/butter, soy sauce, sesame oil, salt, pepper" +146,thai prawn curry & baked rice ,Thai,veg,"jeera whole, coriander seeds, kashmiri chilly, garlic, ginger, green chillies, kokum, coconut, onion, tomatoes, coriander, prawns, salt, turmeric powder, garlic paste, asafoetida, refined oil, ginger paste, goan rice, coconut milk, green chillies, fresh coconut, hing, refined oil, coriander leaves, basmati rice, coconut milk, green chillies, coconut milk, coconut, salt, green chillies, curry leaf, lecite" +147,bihari fish curry,Indian,non-veg,"rohu fish, salt, turmeric powder, chilli powder, oil, garlic , green chillies, mustard seeds, black peppercorns, cumin seeds, whole red chillies, fenugreek seeds, tomatoes, mustard oil, bay leaves, water, garam masala, coriander leaves" +148,curry fish fingers,Thai,non-veg,"river sole fish (cut in thin strips), garlic paste, salt , lime juice, beer, flour, cornflour, white pepper, eggs, mustard" +149,prawn and litchi salad,Healthy Food,veg,"prawns (shelled and cleaned), spring onions, mango flesh, litchis (deseeded), chilli flakes, lemon (for lemon juice), olive oil, peanuts (to garnish), salt" +150,kerala fish curry,Indian,non-veg,"white fish (cut into cubes), onion, tomato, garlic cloves, fresh green chillies (deseeded), oil, fresh coconut paste, red chilli paste, coriander powder, turmeric powder, salt, whole dry red chillies, black mustard seeds, curry leaves, tamarind extract, water" +151,fish andlouse,French,non-veg,"white wine and water mix to cover, onion, salt, bay leaf, black pepper corns, olive oil, onion, garlic, tomatoes (peeled and seeded), basil leaves, spring fresh thyme - optional 1 bay leaf, salt and pepper, olive oil, wine vinegar, prepared mustard, salt and pepper, assorted garden herbs ( parsley, basil etc." +152,prawn fried rice,Thai,veg,"Oil, Bacon, Prawns, Chicken, Carrots, Garlic Buds, Ginger, Spring Onion, Sweet Chili Sauce, Soy Sauce, Salt, Pepper, Sweet Corn, Vinegar, Baked Rice, Green Coriander" +153,damdama fish curry,Indian,non-veg,"Fish, onion, tomato, reshagi red chilli powder, cumin powder, coriander powder, coriander, cumin, salt, oil" +154,fish with white sauce,Italian,non-veg,"Fillet fish, oil, milk, flour, butter, salt, ground black pepper" +155,chilli fish,Chinese,non-veg,"For fish pieces (Boneless), Flour, Cornflour, Baking Powder, Soy Sauce, Celery, Pepper, Salt, Oil, Green Onion, Sauce:, Ginger, Garlic, Green Chili, Soy Sauce, Tomato Sauce, Chili Sauce, Cornflour" +156,fish ambultiyal,Indian,non-veg,"Tuna, onion, fenugreek, whole chilli, pandan leaf, salt, water, cardamom, cinnamon, coriander powder, black pepper, red chilli, curry powder, garlic (crushed), chili flakes, gorca, salt" +157,chettinad fish fry,Indian,non-veg,"Surmai fish, oil, garlic buds, ginger (mash), cumin, fennel, whole coriander, black pepper, mustard seeds, curry leaves, salt, oil, water, tomatoes, red pepper powder, turmeric powder, tamarind pulp, cornflour, lemon pieces of" +158,fish moilee,Indian,non-veg,"Basa fish, onion, ginger, garlic buds, green chillies, turmeric, red chilli powder, coriander powder, lemon juice, curry leaves, refined oil, refined oil, mustard seeds, black pepper, fennel, green cardamom, coconut milk, Cherry tomatoes" +159,batter fish,Mexican,non-veg,"Fresh fish pieces (river sol or sea bass), maida, passerley, oil, baking powder, maida, egg (optional), soda water, salt and pepper" +160,fish salan,Mexican,non-veg,"King fish, ripe tomatoes, desi ghee, ginger-garlic paste, onion, turmeric powder, red chilli powder, black pepper powder, garam masala, ginger, turmeric powder, fennel powder, refined oil, salt, water" +161,spanish fish fry,Mexican,non-veg,"Sol fish, tomato, onion, lemon juice, olive oil, ginger and garlic paste, bay leaves, cinnamon stick, green chillies, vinegar, salt, water" +162,prawn potato soup,Thai,veg,"Onion, potato, prawns, eggs, milk, butter, coriander, salt, water" +163,red rice vermicelli kheer,Indian,veg,"red rice vermicelli / broken sooji / semolina vermicelli, butter, almonds, whole milk, cardamom powder, saffron strands / thread, sugar" +164,green cucumber shots,Healthy Food,veg,"english cucumbers, garlic cloves (smashed), romaine lettuce, basil, parsley, cilantro, big lemon, sea salt, olive oil" +165,thai pineapple rice,Thai,veg,"rice, onion, thai ginger , fresh turmeric, curry leaf, lemon grass, coconut milk, salt, hot curry powder, pineapple chunks, oil, water, turmeric powder, fresh pineapple" +166,green asparagus risotto,Italian,veg,"carnaroli rice, vegetable broth, butter, extra virgin olive oil, parmigiano cheese, onion (minced), white wine, salt & pepper" +167,veg fried rice,Chinese,veg,"oil, ginger-garlic paste, spring onion, carrot, cabbage, capsicum, salt, soy sauce, vinegar, rice" +168,egg and garlic fried rice,Chinese,non-veg,"oil, garlic, spring onion , ginger, red chilli , egg, cooked rice, salt, black pepper powder, soya sauce" +169,curd rice,Indian,veg,"rice, water, curd, milk, carrot, green chilli, ginger, salt, coriander leaves, oil, mustard seeds, chana dal, urad dal, curry leaves, red chilli, hing" +170,fried rice with soya chunks,Chinese,veg,"basmati rice, carrot, capsicum, beans, green peas, ginger, garlic, green chillies, bay leaf, cinnamon sticks, green cardamoms, clove, soya chunks, salt," +171,corn pulao,Indian,veg,"basmati rice, american corn kernels, olive oil, onion, ginger garlic paste, salt, green chillies, cumin seed, bay leaf, pepper corn, cloves, hot water, coriander leaves, lime juice, bell pepper (saut?ed and diced), coconut" +172,zucchini methi pulao,Indian,veg,"zucchini, basmati rice, fenugreek (methi), clarified butter (desi ghee), clarified butter (desi ghee), cumin seeds (jeera), asafoetida (heeng), green chilles, ginger, salt" +173,lemon rice,Indian,veg,"cooked basmati rice, oil, asafoetida (heeng), mustard seeds (sarson), curry leaves (kadhi patta), whole red chilli (sabut laal mirch), turmeric powder(haldi), salt, lemon juice, peanuts, urad dal, chana dal, ginger" +174,kale channe ki biryani,Indian,veg,"black gram (kala chana), basmati rice (soaked for 2 hours), cinnamon, green cardamoms, cloves, ghee, black cardamoms, black peppercorns, bay leaves, onions, green chillies (slit), ginger-garlic paste, coriander powder, turmeric powder, yogurt, fresh mint leaves, fresh coriander leaves, garam masala powder, red chilli powder, salt, cashew nuts, almonds, milk, fresh cream, saffron, ginger, screw pine essence (kewra), browned onions, whole wheat flour dough" +175,chicken paella,Mexican,non-veg,"chicken, oil, salt and pepper, paprika powder, chilli flakes, garlic paste, onions, bell peppers, rice, vegetable stock, saffron, peas, olives, parsley, white wine" +176,thai fish curry,Thai,non-veg,"fish (cubed), thai green curry paste, oil, onions, garlic, ginger/galangal, coconut milk, coriander, lemon juice, palm sugar, basil leaves, salt & pepper" +177,vegetable pulao,Indian,veg,"water, basmati rice, ghee, paneer (cooked), carrot, beans, peas, elaichi, cardamoms, cinnamon, bay leaves, cumin seeds, chilli powder, turmeric, salt, coriander leaves" +178,oats shallots pulao,Healthy Food,veg,"Rice, Green Coriander, Green Chillies, Onion, Cinnamon, Cardamom, Cloves, Red Chilli Powder, Salt, Garlic Flakes, Ginger, Chalet, Olive Oil" +179,shiitake fried rice with water chestnuts,Chinese,veg,"Shitake Mushrooms, Vegetable Oil, Garlic Buds, Green Chillies, Water chestnuts, Onions, Leaks, Celery, Ginger, Sesame Oil, Rice Baked, White Chillies, Salt, Rice Wine Vinegar, Green Onions, Small Bunch Pasarley, Sesame Oil" +180,lotus leaf wrapped fried rice,Chinese,veg,"Jasmine Rice (Baked), Edamame Beans, Mock Meat, Shitake Mushroom, Spring Onion, Dark Soy Sauce, Sunflower Oil, Salt" +181,vegetable biryani,Indian,veg,"Cumin, Onion, Ginger Garlic, Mix Vegetable, Coriander Powder, Garam Masala, Turmeric Powder, Salt, Red Chilli Powder, Green Chillies, Lemon Juice, Steamed Rice, Green Coriander" +182,avial with red rice,Indian,veg,"Red rice, water, potatoes, carrots, raw banana, drumstick, small raw mango, sour curd, bean stick, onion, salt, turmeric, water, coconut oil, green chillies, mustard seeds (crushed)" +183,rice in lamb stock,Thai,non-veg,"Rice, desi ghee, big cardamom, bay leaf, cinnamon stick, fennel, onion, lamb bonz, royal cumin, cashew, cream, green cardamom powder, salt, water" +184,vegetable bruschetta,Italian,veg,"baguette (grilled slices), black olive tapenade, artichoke hearts, lettuce arugula (trummed), tomato confit, fresh basil leaves, mint leaves, zucchini, goat cheese, parmesan cheese shavings, mozzarella buffalo cheese" +185,red wine braised mushroom flatbread,Italian,veg,"olive oil, fresh buffalo mozzarella cheese, canned pelati tomatoes (cooked), pizza/flatbread base, mushrooms, red wine, parsley, garlic cloves (halved), salt" +186,strawberry & pistachio breton tart,Dessert,veg,"plain flour, baking powder, sea salt, unsalted butter, egg yoks, castor sugar, double cream, yolk, sugar, gelatine, butter, pistachio paste, strawberry" +187,tricolour pizza,Italian,veg,"pizza base , pizza sauce, mozzarella cheese, black olive, green capsicum, carrots, olive oil" +188,instant rava dosa,Indian,veg,"rava/suji/semolina, rice flour, all purpose flour, fresh coconut (pieces), jeera, green chilli, dhaniya, onion (diced), salt, water, oil, ghee/clarified butter" +189,easy bread poha,Indian,veg,"oil, hing, mustard seeds, curry leaves, whole red chillies, cookes peas, peanuts, turmeric powder, salt, bread, green chillies, lemon juice, coriander leaves, dessicated coconut" +190,bread chana basket,Indian,veg,"bread (white/brown), butter, for chana chaat:, chana, onion, tomatoes, cumin seeds, ginger garlic paste, chilli powder, chana masala, turmeric powder , oil, salt" +191,spaghetti with clams & crispy bread crumbs,Italian,veg,"panko, 1/4 cup plus 1 tablespoon extra-virgin olive oil, plus more for drizzling, kosher salt, freshly ground pepper, garlic cloves, manila clams or cockles, scrubbed, dry white wine, spaghetti, lemon zest, lemon juice, mullet bottarga (optional; see note), red pepper, thyme, rosemary, parsley" +192,kasha bread,French,veg,"kasha, boiling water, buckwheat flour, all-purpose gluten free flour, brown sugar, baking powder, baking soda, xanthum gum, salt, buttermilk, cream, egg, oil, walnuts" +193,egg paratha,Indian,non-veg,"whole wheat flour, salt, oil, eggs, onions, green chilli, coriander leaves, garam masala" +194,egg and cheddar cheese sandwich,Mexican,non-veg,"egg, salt, pepper, ham slices, basil leaves" +195,egg in a blanket,French,non-veg,"eggs, brown bread slices, butter, chilli flakes, oregano, salt" +196,bread dahi vada,Indian,veg,"bread slices, curd, oil, salt and black pepper , red chilli powder, aamchoor powder, mint leaves, zeera powder, anardana" +197,cheese and avocado parantha,Mexican,veg,"Wheat flour, Kasuri methi, water, olive oil, avocado, mozzarella cheese, pizza tasting, ghee, salt" +198,bread with tomatoes and olives,Italian,veg,"French Bread Loaf, Tomato, Extra Virgin Olive Oil, Salt, Black Pepper, Passerley, Stuffed Green Olive" +199,lemon poppy seed cake,Dessert,veg,"plain flour, baking powder, salt, castor sugar, baking soda, eggs, butter, vanilla essence, lemon juice, poppy seed, lemon peel, to serve:, lemon syrup, whip cream, fresh berry" +200,chocolate kaju katli,Dessert,veg,"cashew nuts, sugar, water, milk chocolate or dark chocolate" +201,mix fruit laccha rabri tortilla crunch,Mexican,veg,"milk, sugar, cardamom powder, saffron, almonds and pistachios, grapes, apple, kiwi, orange, pomegranate, mint leaves, tortilla sheet, oil" +202,pista chocolate & mandarin,Dessert,veg,"Pistachios, milk, sugar, broken rice, green cardamom, white chocolate, milk, egg yolks, whipped cream, vanilla pod, mandarin, sugar, water" +203,banana and chia tea cake,Dessert,veg,"Banana, Castor Sugar, Flour, Oil, Milk, Eggs, Baking Soda, Flax Seeds, Almond Flakes, Chia Seeds" +204,chocolate and almond rum ball ,Dessert,veg,"Chocolate sponge eggless, dark chocolate, single cream, almonds, castor sugar, instant coffee powder, dark rum" +205,lemon sushi cake,Dessert,veg,"Vanilla pre mix, gel, oil, water, egg yolks, lemon juice, sugar, butter, white chocolate, cooking cream, milk" +206,chocolate doughnut,Dessert,veg,"Sugar, egg yolks, egg, butter, yeast, milk" +207,spiced almond banana jaggery cake,Dessert,veg,"Butter, cinnamon molasses powder, nutmeg (powdered), almonds, sugar, eggs, orange peel, banana, flour, baking soda, baking powder, salt, buttermilk, powdered cinnamon" +208,fennel scented sweet banana fritters,Dessert,veg,"Wheat flour, banana, jaggery, milk or water, ghee or oil" +209,camel milk cake tart,Dessert,veg,"Camel milk, sugar, vinegar, butter, brown sugar, maida, eggs" +210,quinoa coconut crumble custard,Dessert,veg,"Knoia (cooked), oats, cinnamon powder, salt, brown sugar or jaggery, nuts, coconut nuts, eggs, kinoia, coconut milk, maple syrup, vanilla extract, cinnamon powder, salt, honey" +211,lamb barley pot,Healthy Food,non-veg,"pot barley, onions, worcestershire sauce, chilli flakes, mustard seeds, thick mutton chunks, thick neck of lamb chops, water or stock (to cover), salt, black pepper (freshly ground)" +212,al hachi chicken,Indian,non-veg,"shallow fried chicken, bottle gourd (boiled and sun dried), onion, garlic, whole spices:, green cardamom, black cardamom, cinnamon sticks, turmeric, deggi mirch, fennel seeds powder, dry ginger powder, coriander powder, salt, mustard oil, desi ghee" +213,berry parfait hazelnut white chocolate sable,Dessert,veg,"for berry parfait:, egg yolk, caster sugar, berry puree, cream cheese, double cream, for hazelnut streusel:, ground hazelnut, flour, caster sugar, butter, for hazelnut white chocolate pressed sable:, hazelnut streusel, cocoa butter, puffed rice, clarified butter, melted white chocolate, for flexy berry:, raspberry puree, sugar, liquid glucose, pectin" +214,badam papite ke kebab with pineapple salsa,Indian,non-veg,"Raw papaya, raw potato, almonds, salt, asafoetida, turmeric, celery, coriander, ginger, green chillies, gram flour, green coriander, oil, pineapple, onion, green chillies, green coriander, lemon juice, salt, pepper" +215,mixed vegetable soup,Healthy Food,veg,"Mix vegetable (tomatoes, carrots, peas and French beans), salt, cumin powder, oil, curry leaves" +216,duo of chocolate and strawberry,Dessert,veg,"dark chocolate, white chocolate, strawberries" +217,mustard-parmesan whole roasted cauliflower,Healthy Food,veg,"cauliflowers, garlic, halved, olive oil , dijon mustard , kosher salt , freshly ground black pepper , fresh parsley leaves, parmesan , lemon wedges" +218,hassel back sweet potatoes,Healthy Food,veg,"sweet potatoes, butter, brown sugar, pure vanilla extract, ground cinnamon, himalayan pink salt / rock salt" +219,mother christmas cake,Dessert,veg,"tart apples (2 large), sugar, apple juice, eggs, vegetable oil, vanilla extracts, all-purpose flour, apple pie spice (cinnamon), salt, pecans, candied red cherries (halved), candied green cherried (halved), candied pineapple (diced), cashews (optional)" +220,matcha tea macarons,Dessert,veg,"egg whites, breakfast sugar, icing sugar, almond powder, matcha powder, heavy cream, white chocolate" +221,"amaranthus granola with lemon yogurt, berries and marigold",Healthy Food,veg,"popped amaranthus (cholai), oats, almonds, cinnamon powder, sunflower seeds, sesame seeds, honey, brown sugar, salt, olive oil, lemon rind, plain yogurt (whipped with lemon rind), blueberries" +222,chocolate fudge cookies,Dessert,veg,"dark chocolate coverture, butter, sugar, eggs, flour, baking powder, salt, cocoa powder, vanilla essence" +223,veg summer rolls,Thai,veg,"rice paper sheets, iceberg lettuce, carrot, bean sprouts, cucumber, tofu, basil, mint leaves, coriander, rice noodles (soaked in warm water for 20-25 minutes), peanuts, hoisin sauce, peanuts, garlic, oil, red chillies, water" +224,eggless vanilla cake,Dessert,veg,"maida, baking powder, castor sugar, butter, milk, vanilla essence, vinegar" +225,sweet potato pie,Dessert,veg,"yams (red skinned), condensed milk, sugar, egg , cinnamon , marshmallows ," +226,wok tossed asparagus in mild garlic sauce,Healthy Food,veg,"asparagus, 1 fried onion (medium), 1/2 tsp breakfast sugar, 10 ml potato starch, 20 ml stock vegetarian, 20 gm porcini, salt to taste, 50 ml oil" +227,cinnamon oatmeal pancakes,Healthy Food,veg,"rolled oats, buttermilk (divided), whole wheat flour or oat flour, baking powder, baking soda, cinnamon, salt, eggs, canola oil , honey or maple syrup," +228,chocolate chip cheesecake,Dessert,veg,"butter biscuits (broken into pieces), butter (softened), cream cheese (softened), sugar, egg, rose essence or vanilla essence, chocolate chips" +229,chocolate lava cake,Dessert,veg,"dark chocolate, butter, icing sugar, egg yolks + whole eggs, flour" +230,eggless coffee cupcakes,Dessert,veg,"maida/ flour, baking powder, sugar, cocoa powder, vanilla essence, butter (softened, salted. in case you are using unsalted butter, add 1/4 tsp salt), milk (to make the batter smooth. adjust the milk quantity according to your batter's smoothness), coffee powder + 1 tsp water, to make a paste (you can increase or decrease according to your coffee tolerance)" +231,chicken in white wine,Italian,non-veg,"black pepper, plain flour, olive or sunflower oil, rashers lean bacon, onions or shallots (cut in half), mushrooms, butter, boneless and skinless chicken breasts (chopped into 3cm pieces), dry white wine, chicken or vegetable stock, garlic cloves, bay leaves, fresh thyme sprigs (washed or 1/2 tsp dried thyme)" +232,apple and walnut cake,Dessert,veg,"Apple, Eggs, Walnuts, Sugar, Walnuts, Oil, Flour, Baking Powder, Cinnamon Powder" +233,gluten free almond cake,Healthy Food,veg,"Almond Powder, Egg, Honey, Baking Soda, Vanilla Essence, Salt, Honey, Almond" +234,cinnamon star cookies,Dessert,veg,"Butter, Castor Sugar, Christmas Mix Spicy, Cinnamon Powder, Honey, Glucose, Cream, Poultry Flour, Baking Soda, Bread Flour" +235,whole wheat cake,Healthy Food,veg,"Oven temperature, wheat flour, jaggery sugar, baking powder, oil, eggs, water, almond essence, walnuts" +236,plum cake,Dessert,veg,"oven temp: 150 c-300 f, butter, sugar, eggs, almonds, vanilla essence, mixed fruits (sultanas, raisins, candied peels and cherries), refined flour," +237,double chocolate easter cookies,Dessert,veg,"Butter, Brown Sugar, Castor Sugar, Vanilla Essence, Dark Chocolate, Refined Flour, Cocoa Powder, Baking Soda, Salt, Milk, Cadbury Gems" +238,holi special malai kofta,Indian,veg,"potatoes, paneer (cottage cheese), maida, coriander leaves (chopped), onion, ginger-garlic paste, tomatoes, malai or cream, raisins and cashew nuts, cashew nuts paste, haldi, red chilli powder, kitchen king masala, kasuri methi (dry fenugreek), salt, sugar" +239,homemade gulab jamun,Dessert,veg,"sugar, water, milk, cardamom seeds, saffron, cardamom powder, khoya, baking soda, maida, milk" +240,lamb rogan josh,French,non-veg,"lamb chops or stewing lamb, vegetable oil, cassia bark or cinnamon stick, bay leaves, green cardamoms, onions, garlic cloves, butter, turmeric, chilli powder, ground cumin, ground coriander, tomato puree, salt, garam masala, lemon juice" +241,fish curry,Thai,non-veg,"fresh sole fish, black pepper powder, lemon (juiced), onion, coriander seeds, black pepper, raw rice, garlic cloves, coconut, coriander leaves, ginger (large), cinnamon powder, clove powder, ground nut oil, tamarind paste, cooking oil, salt," +242,rice kheer,Indian,veg,"milk, rice (washed), sugar, raisins, green cardamoms, almonds (shredded)" +243,assorted rice kheer sushi,Japanese,veg,"basmati rice, milk, sugar, pistachio, almonds, green cardamoms(powdered), saffron, rose water, rose petals (dried), dark chocolate" +244,jalebi with fennel yogurt pudding,Dessert,veg,"all purpose flour, yogurt, oil, sugar, water, saffron, green cardamom, yogurt (strained), milk (warm), sugar, nutmeg, cardamom powder" +245,broccoli souffle,Italian,veg,"broccoli, butter, extra virgin olive oil, all-purpose flour, low fat milk, salt, black pepper, cheddar cheese, eggs (separated), egg whites, cream of tarter" +246,christmas dry fruit cake,Dessert,veg,"butter (at room temperature), dark brown sugar, eggs, flour, salt, all spice powder, cinnamon, nutmeg, currants; golden raisins; dark raisins (each), soft dried figs, dates (pitted), prunes (stoned dried), apricots (dried), almonds (chopped), brandy, instant espresso (mixed with 1 tbsp water)" +247,microwave chicken steak,Healthy Food,non-veg,"chicken breasts (boneless), eggs (slightly whisked), ginger paste, garlic paste, onions, coriander leaves, green chillies, black pepper powder, flour, vinegar, salt, oil" +248,cheese and ham roll,Snack,veg,"hung curd, butter, cream, ground pimento, lemon juice, vodka, salt and pepper, asparagus spears(cooked), ham, pineapple slices" +249,vegetable manchurian,Chinese,veg,"mixed vegetables - chopped fine or grated, eggs (slightly beaten), refined flour, garlic paste, ginger paste, water, oil , garlic , onions , capsicum, cornflour (blended with water), vinegar, salt, soya sauce, tomato puree, celery, ajinomoto (optional), water" +250,jerk chicken,Indian,non-veg,"chicken legs, lime (halved), jerk seasoning powder (bottled), jerk seasoning paste (bottled), olive oil" +251,lemon poppy seed cake ,Dessert,veg,"Flour, Baking Powder, Salt, Castor Sugar, Baking Soda, Eggs, Butter, Vanilla Essence, Lemon Juice, Poppy, Lemon Peel, Lemon Syrup, Wiped Cream, Fresh Berry" +252,steam bunny chicken bao ,Japanese,non-veg,"Buns, Flour, Dry Yeast, Sugar, Salt, Hot Water, Chicken Mince, Eggs, Soy Sauce, Sugar, Sesame Oil, Worcestershire Sauce, Chives, Aromat Powder" +253,double chocolate easter cookies ,Dessert,veg,"Butter, Brown Sugar, Castor Sugar, Vanilla Essence, Dark Chocolate, Refined Flour, Cocoa Powder, Baking Soda, Salt, Milk, Cadbury Gems" +254,orange quinoa sevaiyan,Healthy Food,veg,"sevaiyan, quinoa, orange juice , dried figs, sugar, almond milk, desi ghee, jaggery, walnuts, melon seeds, peanuts" +255,spicy creamy kadai chicken,Indian,non-veg,"chicken, ginger-garlic paste, pepper powder, lime juice, oil, salt, tomatoes, green chillies, ginger-garlic paste, chilli powder, black cardamoms, cloves, water, onion, ginger, green chillies, chilli powder, turmeric powder, garam masala, kasturi methi, cream" +256,apple kheer,Dessert,veg,"apples, basmati rice, nuscovado sugar (you can also use normal sugar), cashew nuts and almonds, cassia bark or cinnamon stick, red grapes" +257,ragi oats ladoo (laddu),Dessert,veg,"ragi flour, oats flour, dates (ripe), milk, honey, ghee, green cardamom powder, white sesame seeds, coconut powder, cashew nuts" +258,lamb korma,Indian,non-veg,"onions, almond paste, ghee, cinnamon sticks, green cardamom, cloves, mace, bay leaves, garlic paste, ginger paste, lamb, salt, rose water (infused with 4-5 strands of saffron), yellow chilli powder, yogurt, yellow chilli powder, black pepper powder, coriander seed powder, cumin powder, red chilli powder, turmeric powder, clove powder, green cardamom powder, rose petal powder, nutmeg powder, black cardamom powder, fennel powder, cinnamon powder, mace powder, onions, cream/malai" +259,ragi coconut ladoo (laddu),Dessert,veg,"finger millet flour (ragi), jaggery, peanuts, coconut, salt" +260,quick chicken curry,Indian,non-veg,"Chicken, onion paste, tomato, garlic paste, ginger paste, coriander powder, cumin powder, turmeric powder, red chilli powder, garam masala powder, oil, salt, green coriander" +261,chicken shaami kebab,Indian,non-veg,"Chana Dal, Chicken Thai, Salt, Whole Red Chillies, Cumin, Whole Coriander, Cloves, Pepper Whole, Cinnamon Stick, Celery, Eggs, Green Coriander, Mint, Green Chillies, Ginger, Garlic Buds, Oil" +262,chicken masala,Indian,non-veg,"Chicken, ginger, garlic, onion, tomato, garam masala, bay leaves, salt, turmeric, coriander powder, red chilli powder, oil, green coriander, cream" +263,holi special bhang pakode,Indian,veg,"besan, potato, fresh bhang leaves, spinach, bhang seed powder, ajwain, fresh green chilly , salt, tamarind paste, red chilly powder, mustard seeds, whole red chilli, soda" +264,kuttu atta pizza,Italian,veg,"kuttu atta, salt, sugar, yeast, mozzarella cheese fresh, tomato, basil, cottage cheese, green chillies, olive oil, salt, black pepper (crushed), oregano" +265,arbi kofta with mint yogurt dip,Snack,veg,"arbi/colocasia roots, water chestnut flour (kuttu ka aata), green chili, ginger, carom seeds, rock salt, oil, mint, curd, cucumber, pomegranate" +266,puffed rice squares,Snack,veg,"puffed rice, nuts, honey, jaggery, butter, kewda to flavor, cardamom powder" +267,red velvet banana pudding,Dessert,veg,"one can of sweetened condensed milk, ice cold water, instant vanilla pudding mix, cream cheese (softened and cut into 8 pieces), heavy cream, one 9 x 13 inch layer of red velvet cakcake, ripe bananas, mini chocolate chips, sugar, cocoa, cake flour, baking soda, salt, butter (room temp cut into 1 inch pieces), eggs, milk, sour cream, cider vinegar, vanilla extract, red food coloring, all-purpose flour, milk, unsalted butter (room temperature), sugar, vanilla extract" +268,baked wild berry cheesecake,Dessert,veg,"butter, digestive biscuits, berries, cream cheese, castor sugar, vanilla extract, egg (lightly beaten), icing sugar" +269,spiced orange valencia cake,Dessert,veg,"egg whites, egg white powder, sugar, almond powder, hazelnut powder, sugar, hazelnut (toasted)" +270,jalapeno cheese fingers,Mexican,veg,"yellow cornmeal, sugar, baking soda, salt, buttermilk (well-shaken), egg, cheddar (extra sharp), scallion (white and pale green parts only), pickled jalapenos (drained), unsalted butter," +271,californian breakfast benedict,Snack,veg,"brioche loaf, avocado paste, eggs, tomato, spinach, nutmeg powder, phyllo pastry sheet, assorted seasonal fruits, hollandaise sauce" +272,chocolate marquise,Dessert,veg,"dark chocolate (melted), castor sugar, egg yolk, egg, cocoa powder, coffee, cream, berries, dark chocolate, fresh cream" +273,corn & jalapeno poppers,Mexican,veg,"fresh corn kernels, corn flour, whole egg, cheddar cheese, jalapeno poppers, smoked paprika, coriander (toasted & ground), green onions, fresh cilantro, lemon (zest and juice), cooking oil" +274,banana phirni tartlets with fresh strawberries,Snack,veg,"basmati rice (soaked in water), milk, cardamom powder, milk, saffron, sugar, banana, fresh strawberries, plain flour, butter (chilled), castor sugar, egg yolk, chilled water" +275,mexican pizza,Mexican,veg,"Dough tortia, refried beans, bell paper, spring onion, lettuce, mozzarella cheese, orange cheddar cheese, chitpole dressing, for pico de chelo, tomato, onion, lemon juice, salt, green chilli, coriander, black beans (boiled ), Tomato, onion, vine paper, vegetable oil" +276,apple and pear cake,Healthy Food,veg,"Core, Chopped and Sliced ??Apple, Core, Chopped and Sliced ??Pears, Castor Sugar, Vanilla Essence, Sunflower Oil, Eggs, Flour, Milk, Baking Powder, Milk, Cinnamon Stick, Egg yolks, Castor Sugar" +277,microwave chocolate cake,Dessert,veg,"Flour, castor or powdered sugar, oil, butter or margarine, cocoa powder, water, vanilla essence, baking soda, baking powder, salt, egg" +278,white chocolate and lemon pastry,Dessert,veg,"White chocolate, fresh cream, VIP cream, vanilla bean, vanilla extract, cream cheese, gelatin, lemon, egg yolks, butter, castor sugar" +279,mixed beans salad,Healthy Food,veg,"mixed boiled beans (choose from rajma, chawli, chick peas, hara chana), spring onions, tomatoes (diced), oil, lemon juice, basil, garlic, salt and pepper, for garnish:, coriander" +280,baked raw banana samosa,Snack,veg,"onion, ginger, curry powder, fresh coriander, green chilli, raw banana paste, refined oil, mustard seeds, phyllo sheets, salt" +281,coconut mango oatmeal with cinnamon hint,Healthy Food,veg,"coconut (tender), coconut milk, oats, ripe mango (diced), castor sugar, dry fruits, honey, cardamom powder" +282,fruit cube salad,Healthy Food,veg,"watermelon, cantaloupe, kiwifruiit, pineapple, marshmallow, mint leaf (crushed nuts, sesame seeds or cinnamon)" +283,veg hakka noodles,Chinese,veg,"noodles, salt, oil, garlic paste, ginger paste, beans, cabbage, carrot, spring onion, capsicum, soy sauce, green chilli sauce, tomato sauce" +284,strawberry quinoa pancakes,Healthy Food,veg,"quinoa, milk, olive oil, egg (slightly beaten), baking powder, orange essence, castor sugar, maple syrup, strawberries (to garnish)" +285,spinach & banana pancakes,Healthy Food,veg,"Rolled Oats, Milk, Spinach, Banana, Egg, Cinnamon Powder, Vanilla Extract, Baking Powder" +286,french onion grilled cheese,French,veg,"Brown slice bread, onion, oil, emmental cheese" +287,pasta in cheese sauce,Italian,veg,"Milk, Flour, Butter, Pepper Powder, Nutmeg, Cheese, Pasta (of your choice)" +288,deviled scotch egg,French,non-veg,"Lamb Keema, Rosemary, Thyme, Eggs (hard boiled peel), Flour, Eggs (Lightly Whipped), Panco, Peanut Oil, Salt, Pepper, Mayonnaise, Apple Cedar Vinegar, English Mustard, Salt, Pepper, Paprika and Olive Powder" +289,amritsari fish,Indian,non-veg,"fish with curry, ginger and garlic" +290,butter chicken,Indian,non-veg,mughalai delecacy +291,chicken razala,Indian,non-veg,chicken cooked in a rich gravy with mint +292,chicken tikka,Indian,non-veg, served on a skewer +293,chicken tikka masala,Indian,non-veg,chicken roasted in a yogurt tomato sauce. creamy texture. +294,mushroom matar,Indian,veg,mushroom and peas in a masala/chili sauce +295,tandoori chicken,Indian,non-veg,as a dish originated in the punjab before the independence of india and pakistan. +296,tandoori fish tikka,Indian,non-veg,chickenn lime and ginger and cooked over an open fire. +297,chettinadu chicken,Indian,non-veg,chicken and spices +298,chicken 65,Chinese,non-veg,"ed chicken preparation. chicken, onion, ginger" +299,kolim / jawla,Indian,veg,dried fish named kolim or jawla found in coastal maharashtra with onion and spices. usually eaten with bhakri or chapati +300,black rice,Healthy Food,veg,riety of rice +301,brown rice,Healthy Food,veg,riety of rice. +302,koldil chicken,Chinese,non-veg,made with banana flower; an assamese specialty +303,red rice,Healthy Food,veg,riety of rice. +304,rice,Indian,veg,boiled rice +305,sunga pork,Japanese,veg,curry +306,banana chips,Snack,veg,"dried slices of bananas (fruits of herbaceous plants of the genus musa of the soft, sweet ""dessert banana"" variety), they can be covered with sugar or honey and have a sweet taste, or they can be fried in oil and spices and have a salty and/or spicy taste.[2]" +307,bhurji- egg,Indian,non-veg,"made using indian spices, onion, tomatoes, green chilli, and had with bread, or parathas." +308,flattened rice / poha,Indian,veg,"dehusked rice which is flattened into flat light dry flakes, these flakes of rice swell when added to liquid, whether hot or cold, as they absorb water, milk, or any other liquids. the thicknesses of these flakes vary between almost translucently thin (the more expensive varieties) to nearly four times thicker than a normal rice grain." +309,puffed rice,Snack,veg,"grain made from rice; usually made by heating rice kernels under high pressure in the presence of steam, though the method of manufacture varies widely. pori (puffed rice) has been mentioned in various tamil literatures as an offering to hindu deities. offerings of pori and jaggery made to vinayagar (lord ganesh) are mentioned in the tiruppugazh, a 15th-century anthology of tamil religious songs, written by tamil poet arunagirinathar. pori is offered to hindu gods and goddesses in all poojas in the south indian states of kerala and tamil nadu." +310,Miso-Butter Roast Chicken With Acorn Squash Panzanella,Japanese,non-veg,"chicken, acorn squash,sage, rosemary, butter" +311,Honeydew Salad with Ginger Dressing and Peanuts,Healthy Food,veg,"peeled ginger, lightbrown sugar,chilli, honeydew melon,cucumber, avacado" +312,Kimchi and Miso Noodle Soup,Korean,veg,"scallions, cloves of garlic,cabbage kimchi, tofu " +313,Spicy Korean Steak,Korean,non-veg," boneless steak, olive oil, black pepper, oyster sauce, ginger, chilli flakes, cooking wine, kimchi,cilantro" +314,French Spiced Bread,French,veg,"all purpose flour, butter, dried appricots, honey, milk dried plumps, sugar,salt" +315,Quinoa Bowl and Berries,Healthy Food,veg,"quinoa, black berries,strawberries, blueberries,,chia seeds, almonds" +316,Shawarma-Spiced Braised Leg of Lamb,Indian,non-veg,"cumin seed, garlic, chilli powder, lamb leg, corriender, onion, turmeric powder, lemon, tomato" +317,Roast Pork Tenderloin with Carrot Romesco, Korean,non-veg,"carrots, Olive Oil, Salt , pork tenderloin, Garlic, red pepper flake, red wine vineger, " +318,"Ricotta Gnocchi with Asparagus, Peas, and Morels",Italian,veg," ricotta,Asparagus, morel mushrooms, Olive Oil, Parmesan, fresh peas, butter, black pepper, shallot, " +319,Crispy Pakora,Indian,veg,"Cabbage, carrot, onion, gram flour" +320,Lamb Tikka,Indian,non-veg,"Boneless Lamb, Turmeric powder, corriender powder, cumin powder, yogurt, garlic, giner" +321,Grilled Sweet Prawn,Chinese,non-veg,"Prawn, teriyaki sauce, salmon, avacado, cucumber" +322,Pho Tai rare beef,Vietnames,non-veg,"Bean Sprouts, lemon, Thai basils, Rice noodles, beef" +323,Summer Rolls,Vietnames,non-veg,"Mint, Lettuce, Noodles, Prawn, Rice Paper,Peanut Sauce" +324,Spice Chicken Baugette,Vietnames,non-veg,"Spice Chicken, cucumber, coriander, pickled carrort, mayo" +325,Bean Curd Rolls,Vietnames,veg,"bean curd,mix herbs, lettuce, cucuber, rice paper" +326,Pho Chay Soup,Vietnames,veg,"Rice noodle,vegatable soup, bean curd, mushroom" +327,Pho Ga Chicken,Vietnames,non-veg,"Rice noodle, chicken, lemon, bean sprouts, Thai basils, chicken" +328,Chicken Sweet Corn Soup,Chinese,non-veg,"Chicken,Chicken brouth, Sweet corn, garlic, ginger" +329,Thai Spareribs,Thai,non-veg," lemongrass, ginger, meaty sparerib, Sesame-Cilantro Rice,peanut sauce,sesame oil,garlic, soy sauce " +330,Frenched Green Beans,French,veg," green beans, Olive Oil, black paper, vineger," +331,Lemony Crab Salad with Baby Greens,Healthy Food,non-veg,"Lemon juice, olive oil, crab, lecttus, paper,salt, vineger" +332,Mushroom Manchruian,Indian,veg,"Mushroom, Panner, Soy sauce, ketchup, Viniger,onion, corriender, Indian Spices" +333,Biryani,Indian,non-veg,"Chicken, Rice, Cinamon, Clove, ginger, garlic, yougurt, turmeric powder, cumin powder, corriender powder" +334,Tandoori Chicken,Indian,non-veg,"Cicken, yougurt, tandori spices, ginger, coriander, turmeric, lime" +335,Shrimp Olivier,French,non-veg,"Shrimp, egg, Potatoes, carrot, onion, pasley, mayo" +336,Potato Casserole,Italian,veg,"potatoes, butter, salt, pepper" +337,Thyme-Roasted Sweet Potatoes,Healthy Food,veg,"sweet potatoes, thyme leaves, olive oil, red papper flakes," +338,Noodle Curry,Vietnames,non-veg,"Vegetable stock curry, coconut, lime leaves, raddish, carrot" +339,Grill Lemon grass Pork baguette,Vietnames,non-veg,"Lemon grass, pork, coriender,cucumber, pickeled carrot, chilli" +340,Sukuti Chatpate,Nepalese,non-veg,"crunchy noodles, onion, cucumber,tomatoes, lemon juice, puffed rice, dried meat" +341,Cheese Naan,Indian,veg,"Aall purpose flour, yougurt, cheese" +342,Mushroom Rice,Nepalese,veg,"Mushroom, Rice, Cumin Seeds" +343,Bringle Alo,Nepalese,veg,"aubergine,potato, garlic, ginger, cumin seeds, spices" +344,Mutar Paneer,Indian,veg,"Green pea, cottage cheese, garlic, indian spices" +345,Cucumber and Radish Salad,Healthy Food,veg,"cucumber,raddish,vinegar, coriander,olive, salt papper" +346,Channa Masala,Indian,veg,"chickpeas, turmeric powder, onion, ginger, garlic, curry powder" +347,Saag Alo ,Nepalese,veg,"Spinach, onion, potato, cumin seed, turmeric powder, nepali spices" +348,Alo Tama Bodi,Nepalese,veg,"bamboo shoot, black eye bean, potato" +349,Tarka Daal,Nepalese,veg,"yello lentils,garlic, cumin seed, butter" +350,Jeera Alu,Nepalese,veg,"boiled potatoes, cumin seeds, lemon juice" +351,Nepali Chicken Curry,Nepalese,non-veg,"boneless chicken, onion, tomato, coriender powder,cumin powder,butter,turmeric, chilli powder, garlic,ginger" +352,Lamb Shashlik,Indian,non-veg,"minced lamb marinated with indian spices, garlic, ginger, coriander, yogurt, turmeric, cloves" +353,Hyakula,Nepalese,non-veg,"lamb ribs, cumin powder,turmeric powder,nepali spices, mustard oil " +354,Alo Achar,Nepalese,veg,"boiled potatoes, peas, cucumber, mustard oil, turmeric powder, cumin powder, carrot, salt" +355,Chicken Momo,Nepalese,non-veg,"minced chicken meat, coriender, cumin powder,garlic,ginger,onion,flour wrapping" +356,Black-Bean Burgers,Chinese,veg,"cumin, black bean, cilantro, lettuce,sour cream, oregano, buns" +357,Parmesan Toasts,Italian,veg,"Bread, garlic, olive oil, Parmesan cheese, salt" +358,Rice with Soy-Glazed Bonito Flakes and Sesame Seeds,Japanese,non-veg,"sake, sesame seeds, soy sauce, rice, japanese sauces" +359,Shirazi Salad,Healthy Food,veg,"Spring onion, cheese, lemon juice, cucumber, onion, carrot, parsley, cilantro" +360,Sesame Noodles with Chili Oil and Scallions,Chinese,veg,"Noodles, Salt,garlic,chilli oil, sesame oil, chilli flakes, scallions, sichuan pepper, vinegar, soy sauce" +361,Thai Green Curry ,Thai,veg,"cumin seeds, coriander seeds, chillies, garlic, ginger onion, thai sauces,lemon grass, penut " +362,Ground Pork Menudo,Spanish,non-veg,"onion, garlic, olive oil, minced pork, fish sauce, green peas, black peper" +363,Bao Bun ,Chinese,non-veg,"sesame seed, all purpose flour, spring onion, teriyaki sauce" +364,Garlic Naan,Indian,veg,"garlic, all purpose flour, yougurt, cumin seed" +365,Egg Curry with Tomatoes and Cilantro,Indian,non-veg,"boiled egg, cilantro, garlic,cucumber,tumeric powder,tomatoes," +366,Kimchi Bokumbab,Korean,non-veg,"Cabbage Kimchi, Stir fried eggs, rice, seaweed, sesame seeds, spring onion" +367,Korean fried Chicken,Korean,non-veg," korean sauce, boneless chicken,sesame seeds" +368,Prawn kastu Curry,Japanese,non-veg,"prawn, rice, japanes curry sauce, bread crumbs, rice" +369,Beef Bibimbab,Korean,non-veg,"Beef,Cucumber,carrot, mushrooms,Spinich,Rice, Gochujang, Soy sauce" +370,Sweet and Sour Chicken Fried Rice,Chinese,non-veg," Rice, carrot, peas, chicken, soy sauce, sugar, garlic, capcicum" +371,Sea Food Soup,Chinese,non-veg,"Prawn,Squid, fish ball, fish cake, mussels, crab sticks" +372,Pad Thai,Thai,non-veg,"noodles, vegetable oil, garlic, eggs, lime juice , brown sugar, fish sauce, ginger,red pepper flakes, onion, cilantro, penuts" +373,Spicy Kimchi Tofu Stew,Korean,veg,"tofu, kimchi, gochujang,black pepper, sesame seeds, soy sauce, scallions" +374,Slow-Roasted Pork ,Korean,non-veg,"fresh sage,garlic cloves, bone-in pork shoulder,ground pepper,Dijon mustard" +375,Pico de Gallo Verde,Mexican,veg,"avacado, tomato, cucumber,celery, garlic, mint, lemon juice, chillies, onion, cilantro" +376,Pineapple-Coconut Rice,Thai,veg,"brown rice, coconut milk, onion, carrot, cashew, pineapple, vegetable oil, ginger, curry powder" +377,Lamb and Green Squash Dumplings,Chinese,non-veg,"Squash, zucchini, scallions, soya sauce, ginger, cooking wine, vineger, all purpose flour, sichuan peppercorns,ground lamb," +378,Crispy Tofu Balls,Japanese,veg," tofu, scallions, mushrooms,Spicy aioli,corn, white pepper, vegetable oil" +379,Grilled Chicken with Almond and Garlic Sauce,Healthy Food,non-veg,"lemon juice, olive oil, mushrooms lemon zest, chicken breast, roasted almonds, garlic" +380,Parmesan Cauliflower and Parsley Salad,Italian,veg,",olives, parmesan cheese, pasley,pepper,oil" +381,Vietnamese Chicken Salad,Vietnames,non-veg,"roasted peanuts,basil leaves, mint leaves, chicken, cucumber,carrot" +382,Eggplant and Beef Stir-Fry,Thai,non-veg,"soya sauce,mint,thai chillies,vermicelli noodles,beef, asaian eggplat" +383,Stir-Fried Lettuces With Crispy Shallots,Chinese,non-veg,"sliced shallots,pepper,salt,garlic, ginger, lettuce,brown rice" +384,Chicken and Dumplings,Chinese,non-veg,"purpose flour,egg,nutmeg, chicken broth, carrot,salt,chives, pepper, chicken thighs" +385,Asian Salmon Bowl with Lime Drizzle,Thai,non-veg," jasmine rice, garlic, butter, soy sauce, salmon fillet,spinach,black sesame seeds" +386,Pasta with Garlic-Scape Pesto,Italian,veg,"pistachios, Parmigiano-Reggiano cheese,salt and black pepper, spaghetti,olive oil" +387,Pico de Gallo,Mexican,veg,"onion,tomatoes, oil, cilantro, jalapeao" +388,Basmati Rice with Summer Vegetable Salad,Indian,veg,"basmati rice,shallot,radishes,greens, sprouts, tomatoes, peas, summer squash,carrot, oil,salt" +389,Fresh Corn Tortillas,Mexican,veg,"corn tortilla mix,corn,salt" +390,Quinoa Tabbouleh,Healthy Food,veg," quinoa, olive oil, tomato, cucumber, parsley, mint, scallions, thinly sliced" +391,Grilled Clams With Herb Butter,French,non-veg," parsley, pepper, salt, scallion, lemon juice, littleneck clams" +392,Rajas Poblanas,Mexican,veg,"poblano pepper, cream, salt, cream cheese" +393,Braised Beef Short Ribs,Korean,non-veg,"Beef, Scallion, Raddish, Spanich " +394,Fig and Sesame Tart with Cardamom Orange Cream,Dessert,non-veg," all-purpose flour,sugar,salt,unsalted butter, egg yolk,heavy cream, cinnamon,orange zest, fig,sesame seeds, mild honey" +395,Rouille,French,veg," bread crumbs,cayenne, extra-virgin olive oil,coarse sea salt, garlic" +396,Kimchi Toast,Korean,veg," cream cheese, chopped kimchi, scallions,country-style bread, sesame seeds, cilantro leaves" +397,"Tacos de Gobernador (Shrimp, Poblano, and Cheese Tacos)",Mexican,non-veg,"poblano chiles, bacon, shrips, red salsa, garlic, corn tortillas, lime juice" +398,Melted Broccoli Pasta With Capers and Anchovies,French,non-veg,"broccoli,Bread Crumbs, anchovy fillets, garlic cloves, red pepper flakes, penne pasta, olive oil" +399,Lemon-Ginger Cake with Pistachios,Dessert,non-veg,"egg yolks,lemon juice, unsalted butter, all purpose flour, sugar, ginger, milk" +400,Rosemary Roasted Vegetables,Healthy Food,veg,"kosher salt, rosemary, garlic, potato, olive oil, carrot, walnut, cheese, ground pepper" diff --git a/examples/archived_examples/Food_recommendation/ratings.csv b/examples/archived_examples/Food_recommendation/ratings.csv new file mode 100644 index 0000000..db9c4d0 --- /dev/null +++ b/examples/archived_examples/Food_recommendation/ratings.csv @@ -0,0 +1,513 @@ +User_ID,Food_ID,Rating +1,88,4 +1,46,3 +1,24,5 +1,25,4 +2,49,1 +2,33,8 +2,106,9 +2,71,8 +3,73,9 +3,110,10 +3,168,1 +3,201,8 +3,209,6 +3,46,2 +3,65,3 +3,292,8 +3,299,1 +4,14,5 +4,141,5 +4,170,1 +4,212,10 +4,128,6 +4,21,1 +5,52,9 +5,64,2 +5,198,10 +5,241,9 +5,8,6 +5,27,6 +6,22,5 +6,185,7 +6,207,2 +6,213,6 +7,131,5 +7,109,10 +7,116,8 +7,220,3 +7,21,9 +7,31,10 +8,101,8 +8,138,9 +8,140,4 +8,201,3 +8,242,3 +8,224,9 +8,128,4 +8,163,5 +9,15,1 +9,60,5 +9,91,5 +9,2,3 +9,180,2 +9,182,4 +9,184,6 +9,244,3 +10,70,5 +10,44,10 +10,227,2 +10,100,4 +10,250,2 +11,35,4 +11,154,3 +11,258,10 +11,200,10 +11,274,2 +11,48,5 +12,38,4 +12,169,4 +13,74,8 +13,127,10 +14,17,3 +14,51,9 +14,87,8 +14,134,7 +14,267,5 +14,272,1 +15,29,7 +15,116,6 +15,130,5 +15,149,1 +15,43,3 +15,214,7 +15,238,6 +15,29,9 +16,243,3 +16,10,4 +16,270,3 +16,282,10 +16,291,1 +17,23,4 +17,58,7 +17,114,1 +17,8,5 +17,251,5 +17,278,8 +17,7,4 +18,18,5 +18,133,8 +18,153,7 +18,37,2 +18,31,7 +18,17,6 +19,9,9 +19,11,4 +19,211,1 +19,249,10 +19,84,10 +20,46,6 +20,261,8 +21,13,9 +21,42,9 +21,126,1 +21,230,3 +21,185,2 +21,280,5 +22,165,2 +22,2,5 +22,127,6 +22,53,3 +22,275,4 +23,26,6 +23,223,5 +24,23,2 +24,193,8 +24,51,5 +25,55,5 +25,63,9 +25,57,10 +25,178,3 +25,287,3 +26,15,9 +26,104,6 +26,55,5 +26,45,5 +27,161,2 +27,213,3 +27,273,10 +27,207,4 +27,86,4 +27,54,1 +27,279,9 +28,54,7 +28,162,9 +28,50,10 +28,5,10 +29,34,5 +29,200,9 +29,281,1 +29,300,9 +30,53,6 +30,304,1 +31,80,1 +31,129,5 +31,150,9 +31,188,10 +31,74,5 +31,232,8 +31,240,3 +31,230,8 +31,264,6 +31,285,9 +32,32,4 +32,39,7 +32,71,1 +32,94,10 +32,127,9 +32,91,2 +32,244,8 +32,263,1 +32,143,1 +32,290,7 +32,309,5 +33,166,4 +33,248,10 +34,163,6 +34,65,4 +34,12,2 +35,28,5 +35,147,3 +35,68,3 +35,15,1 +36,25,8 +36,45,6 +36,96,7 +36,155,10 +36,49,2 +36,212,3 +36,16,7 +36,23,1 +36,184,10 +36,19,5 +37,98,6 +37,191,1 +37,218,7 +37,283,3 +38,42,7 +38,160,2 +38,130,3 +38,243,4 +38,186,8 +39,2,10 +39,28,2 +39,30,6 +39,86,4 +39,175,5 +39,195,4 +39,256,5 +39,50,7 +39,5,10 +40,111,2 +41,4,6 +41,100,3 +41,51,3 +41,259,2 +42,84,2 +42,196,9 +42,284,4 +42,294,4 +42,296,10 +43,34,8 +43,6,10 +43,72,5 +43,85,10 +43,174,6 +43,213,8 +43,32,6 +43,11,4 +44,245,10 +44,295,10 +45,47,2 +45,57,3 +45,10,6 +45,108,3 +45,181,3 +45,208,5 +45,236,2 +46,102,6 +46,59,3 +46,136,10 +46,236,4 +46,68,4 +46,184,7 +47,19,1 +47,62,8 +47,225,3 +47,199,2 +47,49,10 +48,7,5 +48,58,1 +48,119,8 +48,172,8 +48,85,3 +48,9,3 +49,1,5 +49,21,3 +49,77,10 +49,139,10 +49,171,8 +49,205,3 +49,44,3 +49,276,10 +49,6,6 +50,19,9 +50,81,8 +50,95,2 +50,57,7 +50,158,6 +50,167,2 +50,225,3 +51,136,6 +51,151,10 +51,53,4 +52,44,10 +52,187,8 +52,69,6 +52,254,2 +52,268,3 +52,211,1 +53,128,2 +53,189,7 +53,190,10 +53,226,8 +53,22,3 +54,65,7 +54,109,10 +54,113,6 +54,146,4 +54,25,3 +54,204,2 +54,100,1 +54,259,9 +54,255,1 +54,301,1 +55,52,5 +55,90,2 +55,121,3 +55,117,7 +55,163,2 +55,13,1 +55,288,3 +56,289,9 +56,298,4 +56,305,9 +57,33,3 +57,78,8 +57,75,7 +57,164,6 +57,67,1 +57,271,1 +58,137,1 +58,84,10 +59,43,7 +59,43,3 +59,99,9 +59,145,3 +59,171,10 +59,239,10 +59,270,5 +60,107,1 +60,221,7 +60,144,2 +61,177,6 +61,45,3 +61,22,3 +62,58,4 +62,59,8 +62,9,7 +62,74,7 +62,108,4 +62,118,7 +62,124,7 +62,157,2 +62,183,1 +62,265,4 +62,215,10 +63,17,6 +63,61,10 +63,136,9 +63,186,8 +63,7,9 +63,269,3 +64,18,5 +64,23,2 +64,303,6 +65,48,7 +65,165,5 +66,92,7 +66,192,7 +67,146,7 +67,156,2 +67,203,10 +67,16,9 +67,252,4 +67,47,4 +68,22,10 +68,76,4 +68,47,5 +68,142,6 +68,179,5 +68,186,10 +68,199,8 +68,219,9 +69,46,9 +69,126,4 +69,23,10 +69,25,10 +70,7,8 +70,16,2 +70,68,4 +70,83,9 +70,122,3 +70,262,5 +70,18,7 +70,56,5 +70,277,7 +71,50,4 +71,1,10 +71,137,1 +71,206,5 +71,7,8 +71,53,3 +71,307,1 +72,221,6 +72,222,1 +73,56,1 +73,135,7 +73,234,2 +73,5,7 +74,56,3 +74,132,6 +74,44,6 +75,37,2 +75,12,3 +76,115,5 +76,23,1 +76,216,5 +77,3,1 +77,20,4 +77,6,6 +77,123,10 +77,204,9 +77,228,4 +77,163,2 +78,32,10 +78,246,9 +78,21,4 +78,302,5 +79,36,6 +79,173,3 +79,163,4 +79,15,8 +80,66,5 +80,103,9 +80,112,4 +80,49,8 +80,56,7 +80,21,10 +80,257,5 +80,49,9 +80,306,8 +81,197,5 +81,63,3 +81,65,3 +81,286,6 +82,8,10 +82,12,4 +82,16,4 +82,27,4 +82,229,5 +82,235,7 +82,257,2 +82,87,7 +83,41,5 +83,79,1 +84,18,10 +84,253,8 +85,47,9 +85,202,3 +85,238,7 +86,24,1 +86,159,2 +86,224,5 +86,237,4 +86,31,6 +86,260,7 +87,29,1 +87,200,2 +87,137,4 +88,5,3 +88,10,6 +88,210,2 +88,48,9 +89,120,6 +89,143,4 +89,148,7 +89,163,5 +89,3,7 +90,4,6 +90,75,6 +90,126,10 +91,176,5 +91,163,1 +91,231,5 +91,247,5 +92,6,1 +92,53,5 +92,105,9 +92,62,6 +92,64,3 +93,18,9 +93,125,10 +93,130,5 +93,266,10 +94,117,4 +94,5,2 +94,48,3 +94,243,5 +94,255,8 +95,67,1 +95,228,3 +95,28,10 +95,297,5 +96,89,8 +96,97,7 +96,222,6 +96,51,10 +96,271,7 +96,293,5 +97,20,1 +97,31,3 +97,46,7 +97,144,6 +97,64,4 +97,215,9 +97,308,3 +98,35,4 +98,40,10 +98,5,7 +98,82,2 +98,47,2 +98,194,2 +98,217,3 +99,49,3 +99,69,8 +99,93,7 +99,152,2 +99,65,7 +99,22,1 +100,24,10 +100,233,10 +100,29,7 +,, diff --git a/tutorials/GraphRAG_with_cognee/cognee_multimedia_demo.ipynb b/tutorials/GraphRAG_with_cognee/cognee_multimedia_demo.ipynb index 660c0ce..376ac7d 100644 --- a/tutorials/GraphRAG_with_cognee/cognee_multimedia_demo.ipynb +++ b/tutorials/GraphRAG_with_cognee/cognee_multimedia_demo.ipynb @@ -292,8 +292,8 @@ "metadata": {}, "outputs": [], "source": [ - "mp3_file_path = '/content/text_to_speech.mp3'\n", - "png_file_path = '/content/example.png'" + "mp3_file_path = \"/content/text_to_speech.mp3\"\n", + "png_file_path = \"/content/example.png\"" ] }, { @@ -314,10 +314,10 @@ "import os\n", "\n", "# Setting environment variables\n", - "if \"GRAPHISTRY_USERNAME\" not in os.environ: \n", + "if \"GRAPHISTRY_USERNAME\" not in os.environ:\n", " os.environ[\"GRAPHISTRY_USERNAME\"] = \"\"\n", "\n", - "if \"GRAPHISTRY_PASSWORD\" not in os.environ: \n", + "if \"GRAPHISTRY_PASSWORD\" not in os.environ:\n", " os.environ[\"GRAPHISTRY_PASSWORD\"] = \"\"\n", "\n", "if \"LLM_API_KEY\" not in os.environ:\n", @@ -325,23 +325,23 @@ "\n", "\n", "# \"neo4j\" or \"networkx\"\n", - "os.environ[\"GRAPH_DATABASE_PROVIDER\"]=\"networkx\" \n", + "os.environ[\"GRAPH_DATABASE_PROVIDER\"] = \"networkx\"\n", "# Not needed if using networkx\n", - "#os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n", - "#os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n", - "#os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n", + "# os.environ[\"GRAPH_DATABASE_URL\"]=\"\"\n", + "# os.environ[\"GRAPH_DATABASE_USERNAME\"]=\"\"\n", + "# os.environ[\"GRAPH_DATABASE_PASSWORD\"]=\"\"\n", "\n", "# \"pgvector\", \"qdrant\", \"weaviate\" or \"lancedb\"\n", - "os.environ[\"VECTOR_DB_PROVIDER\"]=\"lancedb\" \n", + "os.environ[\"VECTOR_DB_PROVIDER\"] = \"lancedb\"\n", "# Not needed if using \"lancedb\" or \"pgvector\"\n", "# os.environ[\"VECTOR_DB_URL\"]=\"\"\n", "# os.environ[\"VECTOR_DB_KEY\"]=\"\"\n", "\n", "# Relational Database provider \"sqlite\" or \"postgres\"\n", - "os.environ[\"DB_PROVIDER\"]=\"sqlite\"\n", + "os.environ[\"DB_PROVIDER\"] = \"sqlite\"\n", "\n", "# Database name\n", - "os.environ[\"DB_NAME\"]=\"cognee_db\"\n", + "os.environ[\"DB_NAME\"] = \"cognee_db\"\n", "\n", "# Postgres specific parameters (Only if Postgres or PGVector is used)\n", "# os.environ[\"DB_HOST\"]=\"127.0.0.1\"\n", @@ -451,7 +451,9 @@ "from cognee.infrastructure.databases.graph import get_graph_engine\n", "import graphistry\n", "\n", - "graphistry.login(username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\"))\n", + "graphistry.login(\n", + " username=os.getenv(\"GRAPHISTRY_USERNAME\"), password=os.getenv(\"GRAPHISTRY_PASSWORD\")\n", + ")\n", "\n", "graph_engine = await get_graph_engine()\n", "\n",