diff --git a/README.md b/README.md index e455738..882f49b 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,11 @@ ______________________________________________________________________ + + ## Description -This project entails the development and optimization of a depth estimation model based on a UNET architecture enhanced with **Bi-directional Feature Pyramid Network** (BIFPN) and **EfficientNet** components. This project was implemented within the scope of the "Neural Network Compression with Applications" subject. +This project entails the development and optimization of a depth estimation model based on a UNET architecture enhanced with **Bi-directional Feature Pyramid Network** (BIFPN) and **EfficientNet** components. The model is trained on the NYU Depth V2 dataset and evaluated on the Structural Similarity Index (SSIM) metric. ## Installation diff --git a/notebooks/data_analysis.ipynb b/notebooks/data_analysis.ipynb index 66c883c..84f40cc 100644 --- a/notebooks/data_analysis.ipynb +++ b/notebooks/data_analysis.ipynb @@ -53,15 +53,6 @@ "df_train.shape, df_test.shape" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "50688 + 654" - ] - }, { "cell_type": "code", "execution_count": null, @@ -118,8 +109,8 @@ "metadata": {}, "outputs": [], "source": [ - "img_path = df_train.iloc[0][\"img\"]\n", - "depth_path = df_train.iloc[0][\"depth\"]\n", + "img_path = df_train.iloc[1150][\"img\"]\n", + "depth_path = df_train.iloc[1150][\"depth\"]\n", "\n", "visualize_example(img_path, depth_path)" ] diff --git a/notebooks/example_model_results.ipynb b/notebooks/example_model_results.ipynb new file mode 100644 index 0000000..821f971 --- /dev/null +++ b/notebooks/example_model_results.ipynb @@ -0,0 +1,147 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from torchvision.transforms import transforms\n", + "\n", + "from src.data.components.custom_transforms import BilinearInterpolation, NormalizeData\n", + "from src.data.components.nyu_dataset import NYUDataset\n", + "from src.models.unet_module import UNETLitModule" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_ckpt = \"./logs/train/runs/2024-04-06_18-37-38/checkpoints/epoch_015.ckpt\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model = UNETLitModule.load_from_checkpoint(model_ckpt)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model.eval()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "transforms_img = transforms.Compose([transforms.PILToTensor(), transforms.Resize((224, 224))])\n", + "\n", + "transforms_mask = transforms.Compose(\n", + " [\n", + " transforms.PILToTensor(),\n", + " NormalizeData(10_000 * (1 / 255)),\n", + " BilinearInterpolation((56, 56)),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "test_dataset = NYUDataset(\"nyu2_test.csv\", \"data/\", transforms_img, transforms_mask)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "outputs = []\n", + "\n", + "for i in range(10):\n", + " img, mask = test_dataset[i]\n", + " img = img.unsqueeze(0)\n", + " mask = mask.unsqueeze(0)\n", + " img = img.to(model.device)\n", + " out = model(img)\n", + " outputs.append(out)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def visualize_result(img, mask, out):\n", + " _, axs = plt.subplots(1, 3)\n", + " axs[0].imshow(img.squeeze().permute(1, 2, 0))\n", + " axs[0].set_title(\"Input Image\")\n", + " axs[1].imshow(mask.squeeze())\n", + " axs[1].set_title(\"Ground Truth\")\n", + " axs[2].imshow(out.squeeze().detach().cpu())\n", + " axs[2].set_title(\"Predicted Mask\")\n", + "\n", + " for ax in axs:\n", + " ax.axis(\"off\")\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for i in range(5):\n", + " visualize_result(test_dataset[i][0], test_dataset[i][1], outputs[i])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/data/depth_datamodule.py b/src/data/depth_datamodule.py index d510c12..f61c74d 100644 --- a/src/data/depth_datamodule.py +++ b/src/data/depth_datamodule.py @@ -156,7 +156,7 @@ def train_dataloader(self) -> DataLoader[Any]: num_workers=self.hparams.num_workers, pin_memory=self.hparams.pin_memory, shuffle=True, - persistent_workers=True, + # persistent_workers=True, ) def val_dataloader(self) -> DataLoader[Any]: @@ -170,7 +170,7 @@ def val_dataloader(self) -> DataLoader[Any]: num_workers=self.hparams.num_workers, pin_memory=self.hparams.pin_memory, shuffle=False, - persistent_workers=True, + # persistent_workers=True, ) def test_dataloader(self) -> DataLoader[Any]: @@ -184,7 +184,7 @@ def test_dataloader(self) -> DataLoader[Any]: num_workers=self.hparams.num_workers, pin_memory=self.hparams.pin_memory, shuffle=False, - persistent_workers=True, + # persistent_workers=True, ) def teardown(self, stage: Optional[str] = None) -> None: diff --git a/src/models/components/bifpn_decoder.py b/src/models/components/bifpn_decoder.py index 14bf09a..e17282d 100644 --- a/src/models/components/bifpn_decoder.py +++ b/src/models/components/bifpn_decoder.py @@ -142,7 +142,6 @@ def __init__(self, fpn_sizes: List[int]) -> None: self.p7_out_w2 = torch.tensor(1, dtype=torch.float, requires_grad=True) def forward(self, inputs: List[torch.Tensor]) -> List[torch.Tensor]: - print(type(inputs)) epsilon = 0.0001 P4, P5, P6 = inputs