diff --git a/examples/imagebind_demo/README.md b/examples/imagebind_demo/README.md new file mode 100644 index 00000000..341206c3 --- /dev/null +++ b/examples/imagebind_demo/README.md @@ -0,0 +1,16 @@ +# Imagebind demo + +A gradio app showcasing multi-modal capabilities of Imagebind supported via lanceDB API + +## Usage +you can run it locally by cloning the project as mentioned below, or access via Colab - <a href="https://colab.research.google.com/github/lancedb/vectordb-recipes/blob/main/examples/imagebind_demo/main.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> + +```bash +git clone https://github.com/lancedb/vectordb-recipes.git +cd examples/imagebind_demo +``` +## Install dependencies and run the app +```bash +pip install -r requirements.txt +python3 app.py +``` diff --git a/examples/imagebind_demo/app.py b/examples/imagebind_demo/app.py new file mode 100644 index 00000000..53cae54e --- /dev/null +++ b/examples/imagebind_demo/app.py @@ -0,0 +1,146 @@ +import lancedb +import requests +import lancedb.embeddings.imagebind +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +import pandas as pd +import os +import gradio as gr +from downloader import dowload_and_save_audio, dowload_and_save_image + +model = get_registry().get("imagebind").create() + +class TextModel(LanceModel): + text: str + image_uri: str = model.SourceField() + audio_path: str + vector: Vector(model.ndims()) = model.VectorField() + +text_list=["A bird", "A dragon", "A car"] +image_paths=dowload_and_save_image() +audio_paths=dowload_and_save_audio() + +# Load data +inputs = [ + { + "text": a, + "audio_path":b, + "image_uri":c + } for a, + b, + c in zip(text_list, + audio_paths, + image_paths) +] + +db = lancedb.connect("/tmp/lancedb") +table = db.create_table("img_bind",schema=TextModel) +table.add(inputs) + +def process_image(inp_img) -> str: + + actual = ( + table.search(inp_img, vector_column_name="vector") + .limit(1) + .to_pydantic(TextModel)[0] + ) + + return actual.text, actual.audio_path + +def process_text(inp_text) -> str: + + actual = ( + table.search(inp_text, vector_column_name="vector") + .limit(1) + .to_pydantic(TextModel)[0] + ) + + return actual.image_uri, actual.audio_path + +def process_audio(inp_audio) -> str: + + actual = ( + table.search(inp_audio, vector_column_name="vector") + .limit(1) + .to_pydantic(TextModel)[0] + ) + + return actual.image_uri, actual.text + +css = """ +output-audio, output-text { +display: None +} +img { +# width: 500px; +# height: 450px; +margin-left: auto; +margin-right: auto; +object-fit: cover; + +""" +with gr.Blocks(css=css) as app: + # Using Markdown for custom CSS (optional) + with gr.Tab("Image to Text and Audio"): + with gr.Row(): + with gr.Column(): + + inp1 = gr.Image(value=image_paths[0],type='filepath',elem_id='img',interactive=False) + output_audio1 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_text1 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_img1 = gr.Button("Retrieve") + + # output_audio1 = gr.Audio(label="Output Audio 1", elem_id="output-audio1") + with gr.Column(): + inp2 = gr.Image(value=image_paths[1],type='filepath',elem_id='img',interactive=False) + output_audio2 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_text2 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_img2 = gr.Button("Retrieve") + + with gr.Column(): + inp3 = gr.Image(value=image_paths[2],type='filepath',elem_id='img',interactive=False) + output_audio3 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_text3 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_img3 = gr.Button("Retrieve") + + with gr.Tab("Text to Image and Audio"): + with gr.Row(): + with gr.Column(): + input_txt1 = gr.Textbox(label="Enter a prompt:", elem_id="output-text") + output_audio4 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_img1 = gr.Image(type='filepath',elem_id='img') + + with gr.Tab("Audio to Image and Text"): + with gr.Row(): + with gr.Column(): + inp_audio1 = gr.Audio(value=audio_paths[0],type='filepath',interactive=False) + output_img7 = gr.Image(type='filepath',elem_id='img') + output_text7 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_audio1 = gr.Button("Retrieve") + + with gr.Column(): + inp_audio2 = gr.Audio(value=audio_paths[1],type='filepath',interactive=False) + output_img8 = gr.Image(type='filepath',elem_id='img') + output_text8 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_audio2 = gr.Button("Retrieve") + + with gr.Column(): + inp_audio3 = gr.Audio(value=audio_paths[2],type='filepath',interactive=False) + output_img9 = gr.Image(type='filepath',elem_id='img') + output_text9 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_audio3 = gr.Button("Retrieve") + + # Click actions for buttons/Textboxes + btn_img1.click(process_image, inputs=[inp1],outputs=[output_text1,output_audio1]) + btn_img2.click(process_image, inputs=[inp2],outputs=[output_text2,output_audio2]) + btn_img3.click(process_image, inputs=[inp3],outputs=[output_text3,output_audio3]) + + input_txt1.submit(process_text, inputs=[input_txt1],outputs=[output_img1,output_audio4]) + + btn_audio1.click(process_audio, inputs=[inp_audio1],outputs=[output_img7,output_text7]) + btn_audio2.click(process_audio, inputs=[inp_audio2],outputs=[output_img8,output_text8]) + btn_audio3.click(process_audio, inputs=[inp_audio3],outputs=[output_img9,output_text9]) + +if __name__ == "__main__": + app.launch(share=True,allowed_paths=['./test_inputs/']) + diff --git a/examples/imagebind_demo/downloader.py b/examples/imagebind_demo/downloader.py new file mode 100644 index 00000000..1b6b58e9 --- /dev/null +++ b/examples/imagebind_demo/downloader.py @@ -0,0 +1,56 @@ +import requests +import os +from pathlib import Path + +# URL of the raw audio file on GitHub +audio_file_urls = ['https://github.com/raghavdixit99/assets/raw/main/bird_audio.wav', + 'https://github.com/raghavdixit99/assets/raw/main/dragon-growl-37570.wav', + 'https://github.com/raghavdixit99/assets/raw/main/car_audio.wav' + ] +image_urls = ['https://github.com/raghavdixit99/assets/assets/34462078/abf47cc4-d979-4aaa-83be-53a2115bf318', + 'https://github.com/raghavdixit99/assets/assets/34462078/93be928e-522b-4e37-889d-d4efd54b2112', + 'https://github.com/raghavdixit99/assets/assets/34462078/025deaff-632a-4829-a86c-3de6e326402f'] + +base_path = os.path.dirname(os.path.abspath(__file__)) +# Local path where you want to save the .wav file +def dowload_and_save_audio(): + audio_pths=[] + for url in audio_file_urls : + filename=url.split('/')[-1] + local_file_path = Path(f'{base_path}/test_inputs/{filename}') + local_file_path.parent.mkdir(parents=True, exist_ok=True) + # Perform the GET request + response = requests.get(url) + + # Check if the request was successful + if response.status_code == 200: + # Write the content of the response to a local file + with open(local_file_path, 'wb') as audio_file: + audio_file.write(response.content) + audio_pths.append(str(local_file_path)) + print(f"Audio file downloaded successfully and saved as '{local_file_path}'.") + else: + print(f"Failed to download file. Status code: {response.status_code}") + return audio_pths + +def dowload_and_save_image(): + image_paths=[] + for url in image_urls : + filename=url.split('/')[-1] + local_file_path = Path(f'{base_path}/test_inputs/{filename}.jpeg') + + local_file_path.parent.mkdir(parents=True, exist_ok=True) + # Perform the GET request + response = requests.get(url) + + # Check if the request was successful + if response.status_code == 200: + # Write the content of the response to a local file + with open(local_file_path, 'wb') as image_file: + image_file.write(response.content) + image_paths.append(str(local_file_path)) + print(f"Image file downloaded successfully and saved as '{local_file_path}'.") + else: + print(f"Failed to download file. Status code: {response.status_code}") + + return image_paths \ No newline at end of file diff --git a/examples/imagebind_demo/main.ipynb b/examples/imagebind_demo/main.ipynb new file mode 100644 index 00000000..c8e87ada --- /dev/null +++ b/examples/imagebind_demo/main.ipynb @@ -0,0 +1,70 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imagebind demo\n", + "\n", + "This notebook runs the gradio interface for the demo app" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pip install -r requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/raghavdixit/Desktop/open_source/imagebind_demo/./app.py\", line 1, in <module>\n", + " import lancedb\n", + "ModuleNotFoundError: No module named 'lancedb'\n" + ] + } + ], + "source": [ + "! python3 ./app.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/imagebind_demo/requirements.txt b/examples/imagebind_demo/requirements.txt new file mode 100644 index 00000000..a9d25227 --- /dev/null +++ b/examples/imagebind_demo/requirements.txt @@ -0,0 +1,4 @@ +lancedb +gradio +pandas +imagebind@git+https://github.com/raghavdixit99/ImageBind.git \ No newline at end of file