From a8b9e5456c76216206b594f834fb1e2901d5eb08 Mon Sep 17 00:00:00 2001 From: Raghav Dixit <34462078+raghavdixit99@users.noreply.github.com> Date: Fri, 1 Mar 2024 12:29:39 -0500 Subject: [PATCH] Imagebind demo (#149) --- examples/imagebind_demo/README.md | 16 +++ examples/imagebind_demo/app.py | 166 +++++++++++++++++++++++ examples/imagebind_demo/downloader.py | 66 +++++++++ examples/imagebind_demo/main.ipynb | 76 +++++++++++ examples/imagebind_demo/requirements.txt | 4 + 5 files changed, 328 insertions(+) create mode 100644 examples/imagebind_demo/README.md create mode 100644 examples/imagebind_demo/app.py create mode 100644 examples/imagebind_demo/downloader.py create mode 100644 examples/imagebind_demo/main.ipynb create mode 100644 examples/imagebind_demo/requirements.txt diff --git a/examples/imagebind_demo/README.md b/examples/imagebind_demo/README.md new file mode 100644 index 0000000..341206c --- /dev/null +++ b/examples/imagebind_demo/README.md @@ -0,0 +1,16 @@ +# Imagebind demo + +A gradio app showcasing multi-modal capabilities of Imagebind supported via lanceDB API + +## Usage +you can run it locally by cloning the project as mentioned below, or access via Colab - Open In Colab + +```bash +git clone https://github.com/lancedb/vectordb-recipes.git +cd examples/imagebind_demo +``` +## Install dependencies and run the app +```bash +pip install -r requirements.txt +python3 app.py +``` diff --git a/examples/imagebind_demo/app.py b/examples/imagebind_demo/app.py new file mode 100644 index 0000000..1a52951 --- /dev/null +++ b/examples/imagebind_demo/app.py @@ -0,0 +1,166 @@ +import lancedb +import lancedb.embeddings.imagebind +from lancedb.embeddings import get_registry +from lancedb.pydantic import LanceModel, Vector +import gradio as gr +from downloader import dowload_and_save_audio, dowload_and_save_image + +model = get_registry().get("imagebind").create() + + +class TextModel(LanceModel): + text: str + image_uri: str = model.SourceField() + audio_path: str + vector: Vector(model.ndims()) = model.VectorField() + + +text_list = ["A bird", "A dragon", "A car"] +image_paths = dowload_and_save_image() +audio_paths = dowload_and_save_audio() + +# Load data +inputs = [ + {"text": a, "audio_path": b, "image_uri": c} + for a, b, c in zip(text_list, audio_paths, image_paths) +] + +db = lancedb.connect("/tmp/lancedb") +table = db.create_table("img_bind", schema=TextModel) +table.add(inputs) + + +def process_image(inp_img) -> str: + actual = ( + table.search(inp_img, vector_column_name="vector") + .limit(1) + .to_pydantic(TextModel)[0] + ) + + return actual.text, actual.audio_path + + +def process_text(inp_text) -> str: + actual = ( + table.search(inp_text, vector_column_name="vector") + .limit(1) + .to_pydantic(TextModel)[0] + ) + + return actual.image_uri, actual.audio_path + + +def process_audio(inp_audio) -> str: + actual = ( + table.search(inp_audio, vector_column_name="vector") + .limit(1) + .to_pydantic(TextModel)[0] + ) + + return actual.image_uri, actual.text + + +css = """ +output-audio, output-text { +display: None +} +img { +# width: 500px; +# height: 450px; +margin-left: auto; +margin-right: auto; +object-fit: cover; + +""" +with gr.Blocks(css=css) as app: + # Using Markdown for custom CSS (optional) + with gr.Tab("Image to Text and Audio"): + with gr.Row(): + with gr.Column(): + inp1 = gr.Image( + value=image_paths[0], + type="filepath", + elem_id="img", + interactive=False, + ) + output_audio1 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_text1 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_img1 = gr.Button("Retrieve") + + # output_audio1 = gr.Audio(label="Output Audio 1", elem_id="output-audio1") + with gr.Column(): + inp2 = gr.Image( + value=image_paths[1], + type="filepath", + elem_id="img", + interactive=False, + ) + output_audio2 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_text2 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_img2 = gr.Button("Retrieve") + + with gr.Column(): + inp3 = gr.Image( + value=image_paths[2], + type="filepath", + elem_id="img", + interactive=False, + ) + output_audio3 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_text3 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_img3 = gr.Button("Retrieve") + + with gr.Tab("Text to Image and Audio"): + with gr.Row(): + with gr.Column(): + input_txt1 = gr.Textbox(label="Enter a prompt:", elem_id="output-text") + output_audio4 = gr.Audio(label="Output Audio", elem_id="output-audio") + output_img1 = gr.Image(type="filepath", elem_id="img") + + with gr.Tab("Audio to Image and Text"): + with gr.Row(): + with gr.Column(): + inp_audio1 = gr.Audio( + value=audio_paths[0], type="filepath", interactive=False + ) + output_img7 = gr.Image(type="filepath", elem_id="img") + output_text7 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_audio1 = gr.Button("Retrieve") + + with gr.Column(): + inp_audio2 = gr.Audio( + value=audio_paths[1], type="filepath", interactive=False + ) + output_img8 = gr.Image(type="filepath", elem_id="img") + output_text8 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_audio2 = gr.Button("Retrieve") + + with gr.Column(): + inp_audio3 = gr.Audio( + value=audio_paths[2], type="filepath", interactive=False + ) + output_img9 = gr.Image(type="filepath", elem_id="img") + output_text9 = gr.Textbox(label="Output Text", elem_id="output-text") + btn_audio3 = gr.Button("Retrieve") + + # Click actions for buttons/Textboxes + btn_img1.click(process_image, inputs=[inp1], outputs=[output_text1, output_audio1]) + btn_img2.click(process_image, inputs=[inp2], outputs=[output_text2, output_audio2]) + btn_img3.click(process_image, inputs=[inp3], outputs=[output_text3, output_audio3]) + + input_txt1.submit( + process_text, inputs=[input_txt1], outputs=[output_img1, output_audio4] + ) + + btn_audio1.click( + process_audio, inputs=[inp_audio1], outputs=[output_img7, output_text7] + ) + btn_audio2.click( + process_audio, inputs=[inp_audio2], outputs=[output_img8, output_text8] + ) + btn_audio3.click( + process_audio, inputs=[inp_audio3], outputs=[output_img9, output_text9] + ) + +if __name__ == "__main__": + app.launch(share=True, allowed_paths=["./test_inputs/"]) diff --git a/examples/imagebind_demo/downloader.py b/examples/imagebind_demo/downloader.py new file mode 100644 index 0000000..9c650f7 --- /dev/null +++ b/examples/imagebind_demo/downloader.py @@ -0,0 +1,66 @@ +import requests +import os +from pathlib import Path + +# URL of the raw audio file on GitHub +audio_file_urls = [ + "https://github.com/raghavdixit99/assets/raw/main/bird_audio.wav", + "https://github.com/raghavdixit99/assets/raw/main/dragon-growl-37570.wav", + "https://github.com/raghavdixit99/assets/raw/main/car_audio.wav", +] +image_urls = [ + "https://github.com/raghavdixit99/assets/assets/34462078/abf47cc4-d979-4aaa-83be-53a2115bf318", + "https://github.com/raghavdixit99/assets/assets/34462078/93be928e-522b-4e37-889d-d4efd54b2112", + "https://github.com/raghavdixit99/assets/assets/34462078/025deaff-632a-4829-a86c-3de6e326402f", +] + +base_path = os.path.dirname(os.path.abspath(__file__)) + + +# Local path where you want to save the .wav file +def dowload_and_save_audio(): + audio_pths = [] + for url in audio_file_urls: + filename = url.split("/")[-1] + local_file_path = Path(f"{base_path}/test_inputs/{filename}") + local_file_path.parent.mkdir(parents=True, exist_ok=True) + # Perform the GET request + response = requests.get(url) + + # Check if the request was successful + if response.status_code == 200: + # Write the content of the response to a local file + with open(local_file_path, "wb") as audio_file: + audio_file.write(response.content) + audio_pths.append(str(local_file_path)) + print( + f"Audio file downloaded successfully and saved as '{local_file_path}'." + ) + else: + print(f"Failed to download file. Status code: {response.status_code}") + return audio_pths + + +def dowload_and_save_image(): + image_paths = [] + for url in image_urls: + filename = url.split("/")[-1] + local_file_path = Path(f"{base_path}/test_inputs/{filename}.jpeg") + + local_file_path.parent.mkdir(parents=True, exist_ok=True) + # Perform the GET request + response = requests.get(url) + + # Check if the request was successful + if response.status_code == 200: + # Write the content of the response to a local file + with open(local_file_path, "wb") as image_file: + image_file.write(response.content) + image_paths.append(str(local_file_path)) + print( + f"Image file downloaded successfully and saved as '{local_file_path}'." + ) + else: + print(f"Failed to download file. Status code: {response.status_code}") + + return image_paths diff --git a/examples/imagebind_demo/main.ipynb b/examples/imagebind_demo/main.ipynb new file mode 100644 index 0000000..d84f63b --- /dev/null +++ b/examples/imagebind_demo/main.ipynb @@ -0,0 +1,76 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imagebind demo\n", + "\n", + "This notebook runs the gradio interface for the demo app" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "! rm -rf /tmp/lancedb" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Image file downloaded successfully and saved as '/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/examples/imagebind_demo/test_inputs/abf47cc4-d979-4aaa-83be-53a2115bf318.jpeg'.\n", + "Image file downloaded successfully and saved as '/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/examples/imagebind_demo/test_inputs/93be928e-522b-4e37-889d-d4efd54b2112.jpeg'.\n", + "Image file downloaded successfully and saved as '/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/examples/imagebind_demo/test_inputs/025deaff-632a-4829-a86c-3de6e326402f.jpeg'.\n", + "Audio file downloaded successfully and saved as '/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/examples/imagebind_demo/test_inputs/bird_audio.wav'.\n", + "Audio file downloaded successfully and saved as '/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/examples/imagebind_demo/test_inputs/dragon-growl-37570.wav'.\n", + "Audio file downloaded successfully and saved as '/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/examples/imagebind_demo/test_inputs/car_audio.wav'.\n", + "/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/.env/lib/python3.10/site-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead.\n", + " warnings.warn(\n", + "/Users/raghavdixit/Desktop/open_source/vectordb-recipes_rd/.env/lib/python3.10/site-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead.\n", + " warnings.warn(\n", + "Running on local URL: http://127.0.0.1:7860\n", + "Running on public URL: https://fa004aa278890e4e7a.gradio.live\n", + "\n", + "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n", + "^C\n", + "Keyboard interruption in main thread... closing server.\n", + "Killing tunnel 127.0.0.1:7860 <> https://fa004aa278890e4e7a.gradio.live\n" + ] + } + ], + "source": [ + "! python3 ./app.py" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/imagebind_demo/requirements.txt b/examples/imagebind_demo/requirements.txt new file mode 100644 index 0000000..a9d2522 --- /dev/null +++ b/examples/imagebind_demo/requirements.txt @@ -0,0 +1,4 @@ +lancedb +gradio +pandas +imagebind@git+https://github.com/raghavdixit99/ImageBind.git \ No newline at end of file