From e6b7cd0d44512a87e2c01738ab4bb4ed0a5ad8c5 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 15:53:28 -0800
Subject: [PATCH 01/44] Attempt 1

---
 Code/main.py     | 14 +++++++++++---
 Code/training.py | 18 ++++++++++++------
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 79e7ea7..88ea65e 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -15,16 +15,23 @@
 import os
 import traceback
 import torch
+import torch.multiprocessing as mp
+import torch.distributed as dist
 
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'
 rgb_dir = '../Dataset/RGB'
 
 # Define Universal Parameters
-image_height = 400
-image_width = 600
+image_height = 4000
+image_width = 6000
 batch_size = 2
 
+def main_worker(rank, world_size):
+    # Initialize the distributed environment.
+    dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
+    main()  # Call the existing main function.
+    dist.destroy_process_group()  # Cleanup after finishing.
 
 def main():
     # Initialize Dataset Object (PyTorch Tensors)
@@ -150,4 +157,5 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    world_size = torch.cuda.device_count()  # Number of available GPUs
+    mp.spawn(main_worker, args=(world_size,), nprocs=world_size, join=True)
diff --git a/Code/training.py b/Code/training.py
index 0639c45..960288d 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -11,21 +11,27 @@
 # Import Necessary Libraries
 import torch
 import torch.nn as nn
+from torch.nn.parallel import DistributedDataParallel as DDP
+import torch.distributed as dist
 
 # Define Training Class
 class Trainer():
-    def __init__(self, model, loss_function, optimizer=None, model_save_path=None):
-        # Use All Available CUDA GPUs for Training (if Available)
-        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-        if torch.cuda.device_count() > 1:
-            model = nn.DataParallel(model)
+    def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None):
+        self.rank = rank # Rank of the current process
+        self.device = torch.device(f'cuda:{rank}' if torch.cuda.is_available() else 'cpu')
         self.model = model.to(self.device)
         # Define the loss function
         self.loss_function = loss_function
         # Define the optimizer
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
+        # Wrap model with DDP
+        if torch.cuda.device_count() > 1 and rank is not None:
+            self.model = DDP(self.model, device_ids=[rank])
         # Define the path to save the model
-        self.model_save_path = model_save_path
+        self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process
+
+    def cleanup_ddp(self):
+        dist.destroy_process_group()
 
     def save_model(self):
         # Save the model

From 3120b72960e664e18038853a4567c285932f36d6 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 15:58:18 -0800
Subject: [PATCH 02/44] Added Debugging Statements

---
 Code/training.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Code/training.py b/Code/training.py
index 960288d..781245a 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -29,6 +29,7 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
             self.model = DDP(self.model, device_ids=[rank])
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process
+        print(f'Process {self.rank} is using {self.device}')
 
     def cleanup_ddp(self):
         dist.destroy_process_group()

From c61efb4e4991854c29331307f3c37fd2eef7e0b5 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 15:59:58 -0800
Subject: [PATCH 03/44] Added Shell Script to Run Code with Environment
 Variables

---
 run_code.sh | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 run_code.sh

diff --git a/run_code.sh b/run_code.sh
new file mode 100644
index 0000000..7dcb873
--- /dev/null
+++ b/run_code.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Set environment variables
+export MASTER_ADDR=localhost
+export MASTER_PORT=12345
+
+# Run the Python script
+python Code/main.py
\ No newline at end of file

From a6911cd284522d892c636256c1aae4f6924110aa Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:02:25 -0800
Subject: [PATCH 04/44] Changes Shell Script to Set Env Variables

---
 Code/set_env_var.sh | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 Code/set_env_var.sh

diff --git a/Code/set_env_var.sh b/Code/set_env_var.sh
new file mode 100644
index 0000000..b7dcf76
--- /dev/null
+++ b/Code/set_env_var.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+# Set environment variables
+export MASTER_ADDR=localhost
+export MASTER_PORT=12345
\ No newline at end of file

From e668d494b0419fcfc5488c9d9a45daa05ce2700e Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:09:50 -0800
Subject: [PATCH 05/44] Added 'rank' as parameter in Main

---
 Code/main.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 88ea65e..9211402 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -30,10 +30,10 @@
 def main_worker(rank, world_size):
     # Initialize the distributed environment.
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
-    main()  # Call the existing main function.
-    dist.destroy_process_group()  # Cleanup after finishing.
+    print(f"Initialized process group for rank {rank}, world size {world_size}")
+    main(rank)  # Call the existing main function.
 
-def main():
+def main(rank):
     # Initialize Dataset Object (PyTorch Tensors)
     try:
         dataset = CustomDataset(grayscale_dir, rgb_dir, (image_height, image_width), batch_size)
@@ -70,17 +70,17 @@ def main():
     # Method 1 : Baseline : Mean Squared Error Loss for AutoEncoder and LSTM
     os.makedirs('../Models/Method1', exist_ok=True) # Creating Directory for Model Saving
     model_save_path_ae = '../Models/Method1/model_autoencoder_m1.pth'
-    trainer_autoencoder_baseline = Trainer(model_autoencoder, loss_mse, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae)
+    trainer_autoencoder_baseline = Trainer(model_autoencoder, loss_mse, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae, rank=rank)
     print('Method-1 AutoEncoder Trainer Initialized.')
     model_save_path_lstm = '../Models/Method1/model_lstm_m1.pth'
-    trainer_lstm_baseline = Trainer(model_lstm, loss_mse, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm)
+    trainer_lstm_baseline = Trainer(model_lstm, loss_mse, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm, rank=rank)
     print('Method-1 LSTM Trainer Initialized.')
     print('-'*10) # Makes Output Readable
 
     # Method 2 : Composite Loss (MSE + MaxEnt) for AutoEncoder and Mean Squared Error Loss for LSTM
     os.makedirs('../Models/Method2', exist_ok=True) # Creating Directory for Model Saving
     model_save_path_ae = '../Models/Method2/model_autoencoder_m2.pth'
-    trainer_autoencoder_m2 = Trainer(model=model_autoencoder, loss_function=loss_mep, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae)
+    trainer_autoencoder_m2 = Trainer(model=model_autoencoder, loss_function=loss_mep, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae, rank=rank)
     print('Method-2 AutoEncoder Trainer Initialized.')
     print('Method-2 LSTM == Method-1 LSTM')
     print('-'*10) # Makes Output Readable
@@ -89,7 +89,7 @@ def main():
     os.makedirs('../Models/Method3', exist_ok=True) # Creating Directory for Model Saving
     print('Method-3 AutoEncoder == Method-1 AutoEncoder')
     model_save_path_lstm = '../Models/Method3/model_lstm_m3.pth'
-    trainer_lstm_m3 = Trainer(model_lstm, loss_ssim, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm)
+    trainer_lstm_m3 = Trainer(model_lstm, loss_ssim, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm, rank=rank)
     print('Method-3 LSTM Trainer Initialized.')
     print('-'*10) # Makes Output Readable
 

From 6a6045c3c1a7d6718285bcc048e98f1b08953482 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:10:09 -0800
Subject: [PATCH 06/44] Updated Model Saving for Device Rank=0

---
 Code/training.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Code/training.py b/Code/training.py
index 781245a..1405a95 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -35,8 +35,9 @@ def cleanup_ddp(self):
         dist.destroy_process_group()
 
     def save_model(self):
-        # Save the model
-        torch.save(self.model.state_dict(), self.model_save_path)
+        if self.rank == 0:
+            # Save the model
+            torch.save(self.model.state_dict(), self.model_save_path)
 
     def train_autoencoder(self, epochs, train_loader, val_loader):
         # Print Names of All Available GPUs (if any) to Train the Model 

From ba830154f9ae1cfb1d079eb67a64931460b64d65 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:10:28 -0800
Subject: [PATCH 07/44] Removed Unnecessary Script

---
 run_code.sh | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 run_code.sh

diff --git a/run_code.sh b/run_code.sh
deleted file mode 100644
index 7dcb873..0000000
--- a/run_code.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-# Set environment variables
-export MASTER_ADDR=localhost
-export MASTER_PORT=12345
-
-# Run the Python script
-python Code/main.py
\ No newline at end of file

From 606e297e9f76aa9c4e1741f0f0401b60f5ff49c7 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:15:19 -0800
Subject: [PATCH 08/44] Reduced File Size for Testing

---
 Code/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 9211402..0e3cd98 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -23,8 +23,8 @@
 rgb_dir = '../Dataset/RGB'
 
 # Define Universal Parameters
-image_height = 4000
-image_width = 6000
+image_height = 400
+image_width = 600
 batch_size = 2
 
 def main_worker(rank, world_size):

From adb2e18a8eff8f3f8d112cd38700b65c0bb2d899 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:18:12 -0800
Subject: [PATCH 09/44] Attempt 2

---
 Code/main.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/Code/main.py b/Code/main.py
index 0e3cd98..91f3d0f 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -112,6 +112,8 @@ def main(rank):
     except Exception as e:
         print(f"Method-1 AutoEncoder Training Error : \n{e}")
         traceback.print_exc()
+    finally:
+        trainer_autoencoder_baseline.cleanup_ddp()
     print('-'*10) # Makes Output Readable
     try:
         epochs = 1
@@ -121,6 +123,8 @@ def main(rank):
     except Exception as e:
         print(f"Method-1 LSTM Training Error : \n{e}")
         traceback.print_exc()
+    finally:
+        trainer_lstm_baseline.cleanup_ddp()
     print('-'*20) # Makes Output Readable
 
     # Method-2
@@ -132,6 +136,8 @@ def main(rank):
     except Exception as e:
         print(f"Method-2 AutoEncoder Training Error : \n{e}")
         traceback.print_exc()
+    finally:
+        trainer_autoencoder_m2.cleanup_ddp()
     print('-'*10) # Makes Output Readable
     print("Method-2 LSTM == Method-1 LSTM, No Need To Train Again.")
     print('-'*20) # Makes Output Readable
@@ -147,6 +153,8 @@ def main(rank):
     except Exception as e:
         print(f"Method-3 LSTM Training Error : \n{e}")
         traceback.print_exc()
+    finally:
+        trainer_lstm_m3.cleanup_ddp()
     print('-'*20) # Makes Output Readable
 
     # Method-4

From 92c7c2796788d53c2443d84edfd7a3c71e21ae84 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:25:17 -0800
Subject: [PATCH 10/44] Attempt 3

---
 Code/main.py     | 3 +++
 Code/training.py | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 91f3d0f..863fccf 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -29,6 +29,9 @@
 
 def main_worker(rank, world_size):
     # Initialize the distributed environment.
+    torch.manual_seed(0)
+    torch.backends.cudnn.enabled = True
+    torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     print(f"Initialized process group for rank {rank}, world size {world_size}")
     main(rank)  # Call the existing main function.
diff --git a/Code/training.py b/Code/training.py
index 1405a95..90873e3 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -26,13 +26,14 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
-            self.model = DDP(self.model, device_ids=[rank])
+            self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process
         print(f'Process {self.rank} is using {self.device}')
 
     def cleanup_ddp(self):
-        dist.destroy_process_group()
+        if dist.is_initialized():
+            dist.destroy_process_group()
 
     def save_model(self):
         if self.rank == 0:

From 49a673f3c36c424441907b2f2a9f67cee852c054 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:50:21 -0800
Subject: [PATCH 11/44] Removed Redundant Print Statements

---
 Code/main.py     | 136 ++++++++++++++++++++++++++++++-----------------
 Code/training.py |  10 ++--
 2 files changed, 92 insertions(+), 54 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 863fccf..a137ac2 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -40,32 +40,43 @@ def main(rank):
     # Initialize Dataset Object (PyTorch Tensors)
     try:
         dataset = CustomDataset(grayscale_dir, rgb_dir, (image_height, image_width), batch_size)
-        print('Importing Dataset Complete.')
+        if rank == 0:
+            print('Importing Dataset Complete.')
     except Exception as e:
-        print(f"Importing Dataset In-Complete : \n{e}")
+        if rank == 0:
+            print(f"Importing Dataset In-Complete : \n{e}")
+    if rank == 0:
+        print('-'*20) # Makes Output Readable
     # Import Loss Functions
     try:
         loss_mse = LossMSE() # Mean Squared Error Loss
         loss_mep = LossMEP(alpha=0.4) # Maximum Entropy Loss
         loss_ssim = SSIMLoss() # Structural Similarity Index Measure Loss
-        print('Importing Loss Functions Complete.')
+        if rank == 0:
+            print('Importing Loss Functions Complete.')
     except Exception as e:
-        print(f"Importing Loss Functions In-Complete : \n{e}")
-    print('-'*20) # Makes Output Readable
+        if rank == 0:
+            print(f"Importing Loss Functions In-Complete : \n{e}")
+    if rank == 0:
+        print('-'*20) # Makes Output Readable
 
     # Initialize AutoEncoder Model and Import Dataloader (Training, Validation)
     data_autoencoder_train, data_autoencoder_val = dataset.get_autoencoder_batches(val_split=0.2)
-    print('AutoEncoder Model Data Imported.')
+    if rank == 0:
+        print('AutoEncoder Model Data Imported.')
     model_autoencoder = Grey2RGBAutoEncoder()
-    print('AutoEncoder Model Initialized.')
-    print('-'*20) # Makes Output Readable
+    if rank == 0:
+        print('AutoEncoder Model Initialized.')
+        print('-'*20) # Makes Output Readable
 
     # Initialize LSTM Model and Import Dataloader (Training, Validation)
     data_lstm_train, data_lstm_val = dataset.get_lstm_batches(val_split=0.25, sequence_length=2)
-    print('LSTM Model Data Imported.')
+    if rank == 0:
+        print('LSTM Model Data Imported.')
     model_lstm = ConvLSTM(input_dim=1, hidden_dims=[1,1,1], kernel_size=(3, 3), num_layers=3, alpha=0.5)
-    print('LSTM Model Initialized.')
-    print('-'*20) # Makes Output Readable
+    if rank == 0:
+        print('LSTM Model Initialized.')
+        print('-'*20) # Makes Output Readable
 
     '''
     Initialize Trainer Objects
@@ -74,33 +85,38 @@ def main(rank):
     os.makedirs('../Models/Method1', exist_ok=True) # Creating Directory for Model Saving
     model_save_path_ae = '../Models/Method1/model_autoencoder_m1.pth'
     trainer_autoencoder_baseline = Trainer(model_autoencoder, loss_mse, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae, rank=rank)
-    print('Method-1 AutoEncoder Trainer Initialized.')
+    if rank == 0:
+        print('Method-1 AutoEncoder Trainer Initialized.')
     model_save_path_lstm = '../Models/Method1/model_lstm_m1.pth'
     trainer_lstm_baseline = Trainer(model_lstm, loss_mse, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm, rank=rank)
-    print('Method-1 LSTM Trainer Initialized.')
-    print('-'*10) # Makes Output Readable
+    if rank == 0:
+        print('Method-1 LSTM Trainer Initialized.')
+        print('-'*10) # Makes Output Readable
 
     # Method 2 : Composite Loss (MSE + MaxEnt) for AutoEncoder and Mean Squared Error Loss for LSTM
     os.makedirs('../Models/Method2', exist_ok=True) # Creating Directory for Model Saving
     model_save_path_ae = '../Models/Method2/model_autoencoder_m2.pth'
     trainer_autoencoder_m2 = Trainer(model=model_autoencoder, loss_function=loss_mep, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae, rank=rank)
-    print('Method-2 AutoEncoder Trainer Initialized.')
-    print('Method-2 LSTM == Method-1 LSTM')
-    print('-'*10) # Makes Output Readable
+    if rank == 0:
+        print('Method-2 AutoEncoder Trainer Initialized.')
+        print('Method-2 LSTM == Method-1 LSTM')
+        print('-'*10) # Makes Output Readable
 
     # Method 3 : Mean Squared Error Loss for AutoEncoder and SSIM Loss for LSTM
     os.makedirs('../Models/Method3', exist_ok=True) # Creating Directory for Model Saving
-    print('Method-3 AutoEncoder == Method-1 AutoEncoder')
+    if rank == 0:
+        print('Method-3 AutoEncoder == Method-1 AutoEncoder')
     model_save_path_lstm = '../Models/Method3/model_lstm_m3.pth'
     trainer_lstm_m3 = Trainer(model_lstm, loss_ssim, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm, rank=rank)
-    print('Method-3 LSTM Trainer Initialized.')
-    print('-'*10) # Makes Output Readable
+    if rank == 0:
+        print('Method-3 LSTM Trainer Initialized.')
+        print('-'*10) # Makes Output Readable
 
     # Method 4 : Proposed Method : Composite Loss (MSE + MaxEnt) for AutoEncoder and SSIM Loss for LSTM
-    print('Method-4 AutoEncoder == Method-2 AutoEncoder')
-    print('Method-4 LSTM == Method-3 LSTM')
-
-    print('-'*20) # Makes Output Readable
+    if rank == 0:
+        print('Method-4 AutoEncoder == Method-2 AutoEncoder')
+        print('Method-4 LSTM == Method-3 LSTM')
+        print('-'*20) # Makes Output Readable
 
 
     '''
@@ -109,62 +125,82 @@ def main(rank):
     # Method-1
     try:
         epochs = 1
-        print('Method-1 AutoEncoder Training Start')
+        if rank == 0:
+            print('Method-1 AutoEncoder Training Start')
         model_autoencoder_m1 = trainer_autoencoder_baseline.train_autoencoder(epochs, data_autoencoder_train, data_autoencoder_val)
-        print('Method-1 AutoEncoder Training Complete.')
+        if rank == 0:
+            print('Method-1 AutoEncoder Training Complete.')
     except Exception as e:
-        print(f"Method-1 AutoEncoder Training Error : \n{e}")
+        if rank == 0:
+            print(f"Method-1 AutoEncoder Training Error : \n{e}")
         traceback.print_exc()
     finally:
-        trainer_autoencoder_baseline.cleanup_ddp()
-    print('-'*10) # Makes Output Readable
+        if rank == 0:
+            trainer_autoencoder_baseline.cleanup_ddp()
+    if rank == 0:
+        print('-'*10) # Makes Output Readable
     try:
         epochs = 1
-        print('Method-1 LSTM Training Start')
+        if rank == 0:
+            print('Method-1 LSTM Training Start')
         model_lstm_m1 = trainer_lstm_baseline.train_lstm(epochs, data_lstm_train, data_lstm_val)
-        print('Method-1 LSTM Training Complete.')
+        if rank == 0:
+            print('Method-1 LSTM Training Complete.')
     except Exception as e:
-        print(f"Method-1 LSTM Training Error : \n{e}")
+        if rank == 0:
+            print(f"Method-1 LSTM Training Error : \n{e}")
         traceback.print_exc()
     finally:
-        trainer_lstm_baseline.cleanup_ddp()
-    print('-'*20) # Makes Output Readable
+        if rank == 0:
+            trainer_lstm_baseline.cleanup_ddp()
+    if rank == 0:
+        print('-'*20) # Makes Output Readable
 
     # Method-2
     try:
         epochs = 1
-        print('Method-2 AutoEncoder Training Start')
+        if rank == 0:
+            print('Method-2 AutoEncoder Training Start')
         model_autoencoder_m2 = trainer_autoencoder_m2.train_autoencoder(epochs, data_autoencoder_train, data_autoencoder_val)
-        print('Method-2 AutoEncoder Training Complete.')
+        if rank == 0:
+            print('Method-2 AutoEncoder Training Complete.')
     except Exception as e:
-        print(f"Method-2 AutoEncoder Training Error : \n{e}")
+        if rank == 0:
+            print(f"Method-2 AutoEncoder Training Error : \n{e}")
         traceback.print_exc()
     finally:
         trainer_autoencoder_m2.cleanup_ddp()
-    print('-'*10) # Makes Output Readable
-    print("Method-2 LSTM == Method-1 LSTM, No Need To Train Again.")
-    print('-'*20) # Makes Output Readable
+    if rank == 0:
+        print('-'*10) # Makes Output Readable
+        print("Method-2 LSTM == Method-1 LSTM, No Need To Train Again.")
+        print('-'*20) # Makes Output Readable
 
     # Method-3
-    print("Method-3 AutoEncoder == Method-1 AutoEncoder, No Need To Train Again.")
-    print('-'*10) # Makes Output Readable
+    if rank == 0:
+        print("Method-3 AutoEncoder == Method-1 AutoEncoder, No Need To Train Again.")
+        print('-'*10) # Makes Output Readable
     try:
         epochs = 1
-        print('Method-3 LSTM Training Start.')
+        if rank == 0:
+            print('Method-3 LSTM Training Start.')
         model_lstm_m3 = trainer_lstm_m3.train_lstm(epochs, data_lstm_train, data_lstm_val)
-        print('Method-3 LSTM Training Complete.')
+        if rank == 0:
+            print('Method-3 LSTM Training Complete.')
     except Exception as e:
-        print(f"Method-3 LSTM Training Error : \n{e}")
+        if rank == 0:
+            print(f"Method-3 LSTM Training Error : \n{e}")
         traceback.print_exc()
     finally:
         trainer_lstm_m3.cleanup_ddp()
-    print('-'*20) # Makes Output Readable
+    if rank == 0:
+        print('-'*20) # Makes Output Readable
 
     # Method-4
-    print("Method-4 AutoEncoder == Method-2 AutoEncoder, No Need To Train Again.")
-    print('-'*10) # Makes Output Readable
-    print("Method-4 LSTM == Method-3 LSTM, No Need To Train Again.")
-    print('-'*20) # Makes Output Readable
+    if rank == 0:
+        print("Method-4 AutoEncoder == Method-2 AutoEncoder, No Need To Train Again.")
+        print('-'*10) # Makes Output Readable
+        print("Method-4 LSTM == Method-3 LSTM, No Need To Train Again.")
+        print('-'*20) # Makes Output Readable
 
 
 if __name__ == '__main__':
diff --git a/Code/training.py b/Code/training.py
index 90873e3..3f75fc2 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -42,7 +42,7 @@ def save_model(self):
 
     def train_autoencoder(self, epochs, train_loader, val_loader):
         # Print Names of All Available GPUs (if any) to Train the Model 
-        if torch.cuda.device_count() > 0:
+        if torch.cuda.device_count() > 0 and self.rank == 0:
             gpu_names = ', '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])
             print("\tGPUs being used for Training : ",gpu_names)
         best_val_loss = float('inf')  
@@ -63,7 +63,8 @@ def train_autoencoder(self, epochs, train_loader, val_loader):
                 val_loss = sum(self.loss_function(self.model(input.to(self.device)), target.to(self.device)).item() for input, target in val_loader)  # Compute Total Validation Loss
                 val_loss /= len(val_loader)  # Compute Average Validation Loss
             # Print epochs and losses
-            print(f'\tAutoEncoder Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}')
+            if self.rank == 0:
+                print(f'\tAutoEncoder Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}')
             # If the current validation loss is lower than the best validation loss, save the model
             if val_loss < best_val_loss:
                 best_val_loss = val_loss  # Update the best validation loss
@@ -73,7 +74,7 @@ def train_autoencoder(self, epochs, train_loader, val_loader):
     
     def train_lstm(self, epochs, train_loader, val_loader):
         # Print Names of All Available GPUs (if any) to Train the Model 
-        if torch.cuda.device_count() > 0:
+        if torch.cuda.device_count() > 0 and self.rank == 0:
             gpu_names = ', '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())])
             print("\tGPUs being used for Training : ",gpu_names)
         best_val_loss = float('inf')
@@ -97,7 +98,8 @@ def train_lstm(self, epochs, train_loader, val_loader):
                     val_loss += self.loss_function(output_sequence, target_sequence).item()  # Accumulate loss
                 val_loss /= len(val_loader)  # Average validation loss
             # Print epochs and losses
-            print(f'\tLSTM Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}')
+            if self.rank == 0:
+                print(f'\tLSTM Epoch {epoch+1}/{epochs} --- Training Loss: {loss.item()} --- Validation Loss: {val_loss}')
             # Model saving based on validation loss
             if val_loss < best_val_loss:
                 best_val_loss = val_loss

From ff6fc090e635dc828bd70293a4b4f683004afa94 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:56:34 -0800
Subject: [PATCH 12/44] Removed Debugging Statements

---
 Code/main.py     | 1 -
 Code/training.py | 6 ++++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index a137ac2..f12b2e3 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -33,7 +33,6 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
-    print(f"Initialized process group for rank {rank}, world size {world_size}")
     main(rank)  # Call the existing main function.
 
 def main(rank):
diff --git a/Code/training.py b/Code/training.py
index 3f75fc2..cf35cf2 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -13,6 +13,7 @@
 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
+import warnings
 
 # Define Training Class
 class Trainer():
@@ -26,10 +27,11 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
-            self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=UserWarning) # Remove Warnings Shown because of 'find_unused_parameters=True'
+                self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process
-        print(f'Process {self.rank} is using {self.device}')
 
     def cleanup_ddp(self):
         if dist.is_initialized():

From d7ddb3a713e8e70ad9ce02c9ef8e614e1bf97058 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 16:59:19 -0800
Subject: [PATCH 13/44] Removed Debugging Statement

---
 Code/training.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/Code/training.py b/Code/training.py
index cf35cf2..1ea4d76 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -13,7 +13,6 @@
 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
-import warnings
 
 # Define Training Class
 class Trainer():
@@ -27,9 +26,7 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=UserWarning) # Remove Warnings Shown because of 'find_unused_parameters=True'
-                self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
+            self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=False)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process
 

From 4100196a6747d17902feaae34175a7c1a36ea018 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 17:06:28 -0800
Subject: [PATCH 14/44] Fized Issue with find_unused_parameters in DDIP

---
 Code/main.py     | 28 ++++++++++++++++++++++++----
 Code/training.py |  6 +++---
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index f12b2e3..44e7693 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -83,11 +83,21 @@ def main(rank):
     # Method 1 : Baseline : Mean Squared Error Loss for AutoEncoder and LSTM
     os.makedirs('../Models/Method1', exist_ok=True) # Creating Directory for Model Saving
     model_save_path_ae = '../Models/Method1/model_autoencoder_m1.pth'
-    trainer_autoencoder_baseline = Trainer(model_autoencoder, loss_mse, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae, rank=rank)
+    trainer_autoencoder_baseline = Trainer(model_autoencoder, 
+                                           loss_mse, 
+                                           optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), 
+                                           model_save_path=model_save_path_ae, 
+                                           rank=rank, 
+                                           find_unused_parameters=False)
     if rank == 0:
         print('Method-1 AutoEncoder Trainer Initialized.')
     model_save_path_lstm = '../Models/Method1/model_lstm_m1.pth'
-    trainer_lstm_baseline = Trainer(model_lstm, loss_mse, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm, rank=rank)
+    trainer_lstm_baseline = Trainer(model_lstm, 
+                                    loss_mse, 
+                                    optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), 
+                                    model_save_path=model_save_path_lstm, 
+                                    rank=rank,
+                                    find_unused_parameters=True)
     if rank == 0:
         print('Method-1 LSTM Trainer Initialized.')
         print('-'*10) # Makes Output Readable
@@ -95,7 +105,12 @@ def main(rank):
     # Method 2 : Composite Loss (MSE + MaxEnt) for AutoEncoder and Mean Squared Error Loss for LSTM
     os.makedirs('../Models/Method2', exist_ok=True) # Creating Directory for Model Saving
     model_save_path_ae = '../Models/Method2/model_autoencoder_m2.pth'
-    trainer_autoencoder_m2 = Trainer(model=model_autoencoder, loss_function=loss_mep, optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), model_save_path=model_save_path_ae, rank=rank)
+    trainer_autoencoder_m2 = Trainer(model=model_autoencoder, 
+                                     loss_function=loss_mep, 
+                                     optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), 
+                                     model_save_path=model_save_path_ae, 
+                                     rank=rank,
+                                     find_unused_parameters=False)
     if rank == 0:
         print('Method-2 AutoEncoder Trainer Initialized.')
         print('Method-2 LSTM == Method-1 LSTM')
@@ -106,7 +121,12 @@ def main(rank):
     if rank == 0:
         print('Method-3 AutoEncoder == Method-1 AutoEncoder')
     model_save_path_lstm = '../Models/Method3/model_lstm_m3.pth'
-    trainer_lstm_m3 = Trainer(model_lstm, loss_ssim, optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), model_save_path=model_save_path_lstm, rank=rank)
+    trainer_lstm_m3 = Trainer(model_lstm, 
+                              loss_ssim, 
+                              optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), 
+                              model_save_path=model_save_path_lstm, 
+                              rank=rank,
+                              find_unused_parameters=True)
     if rank == 0:
         print('Method-3 LSTM Trainer Initialized.')
         print('-'*10) # Makes Output Readable
diff --git a/Code/training.py b/Code/training.py
index 1ea4d76..9f24db3 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -16,7 +16,7 @@
 
 # Define Training Class
 class Trainer():
-    def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None):
+    def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None, find_unused_parameters=True):
         self.rank = rank # Rank of the current process
         self.device = torch.device(f'cuda:{rank}' if torch.cuda.is_available() else 'cpu')
         self.model = model.to(self.device)
@@ -26,10 +26,10 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
-            self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=False)
+            self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=find_unused_parameters)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process
-
+    
     def cleanup_ddp(self):
         if dist.is_initialized():
             dist.destroy_process_group()

From 9a0b5a0eb9d690f83a1a88d9535d1e9fab388191 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 17:09:29 -0800
Subject: [PATCH 15/44] Removed Debugging Statements

---
 Code/main.py     | 12 ++++--------
 Code/training.py |  4 ++--
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 44e7693..6cf173d 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -87,8 +87,7 @@ def main(rank):
                                            loss_mse, 
                                            optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), 
                                            model_save_path=model_save_path_ae, 
-                                           rank=rank, 
-                                           find_unused_parameters=False)
+                                           rank=rank)
     if rank == 0:
         print('Method-1 AutoEncoder Trainer Initialized.')
     model_save_path_lstm = '../Models/Method1/model_lstm_m1.pth'
@@ -96,8 +95,7 @@ def main(rank):
                                     loss_mse, 
                                     optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), 
                                     model_save_path=model_save_path_lstm, 
-                                    rank=rank,
-                                    find_unused_parameters=True)
+                                    rank=rank)
     if rank == 0:
         print('Method-1 LSTM Trainer Initialized.')
         print('-'*10) # Makes Output Readable
@@ -109,8 +107,7 @@ def main(rank):
                                      loss_function=loss_mep, 
                                      optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), 
                                      model_save_path=model_save_path_ae, 
-                                     rank=rank,
-                                     find_unused_parameters=False)
+                                     rank=rank)
     if rank == 0:
         print('Method-2 AutoEncoder Trainer Initialized.')
         print('Method-2 LSTM == Method-1 LSTM')
@@ -125,8 +122,7 @@ def main(rank):
                               loss_ssim, 
                               optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), 
                               model_save_path=model_save_path_lstm, 
-                              rank=rank,
-                              find_unused_parameters=True)
+                              rank=rank)
     if rank == 0:
         print('Method-3 LSTM Trainer Initialized.')
         print('-'*10) # Makes Output Readable
diff --git a/Code/training.py b/Code/training.py
index 9f24db3..86428bc 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -16,7 +16,7 @@
 
 # Define Training Class
 class Trainer():
-    def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None, find_unused_parameters=True):
+    def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None):
         self.rank = rank # Rank of the current process
         self.device = torch.device(f'cuda:{rank}' if torch.cuda.is_available() else 'cpu')
         self.model = model.to(self.device)
@@ -26,7 +26,7 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
-            self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=find_unused_parameters)
+            self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process
     

From 47ec6ed8a8736504a67b4a29c73a555647bea9f0 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 17:15:53 -0800
Subject: [PATCH 16/44] Removed Warnings from being printed on Terminal

---
 Code/main.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Code/main.py b/Code/main.py
index 6cf173d..58abe93 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -17,6 +17,7 @@
 import torch
 import torch.multiprocessing as mp
 import torch.distributed as dist
+import warnings
 
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'
@@ -219,5 +220,8 @@ def main(rank):
 
 
 if __name__ == '__main__':
+    if dist.get_rank() == 0:
+        # Remove Warnings
+        warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel.distributed')
     world_size = torch.cuda.device_count()  # Number of available GPUs
     mp.spawn(main_worker, args=(world_size,), nprocs=world_size, join=True)

From 06f9df777d92e1a1657d4145b053ee60b501b94e Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sat, 30 Dec 2023 17:18:55 -0800
Subject: [PATCH 17/44] Changed Warning Printing Mechanish

---
 Code/main.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 58abe93..c0362b2 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -34,6 +34,9 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
+    if dist.get_rank() == 0:
+        # Remove Warnings
+        warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel')
     main(rank)  # Call the existing main function.
 
 def main(rank):
@@ -220,8 +223,5 @@ def main(rank):
 
 
 if __name__ == '__main__':
-    if dist.get_rank() == 0:
-        # Remove Warnings
-        warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel.distributed')
     world_size = torch.cuda.device_count()  # Number of available GPUs
-    mp.spawn(main_worker, args=(world_size,), nprocs=world_size, join=True)
+    mp.spawn(main_worker, args=(world_size,), nprocs=world_size, join=True)
\ No newline at end of file

From d82c40016aebd5548341434597e65011592e8e7d Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:28:27 -0800
Subject: [PATCH 18/44] Atempt to Hide UserWarnings from Console

---
 Code/main.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Code/main.py b/Code/main.py
index c0362b2..ae1b2b7 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -223,5 +223,8 @@ def main(rank):
 
 
 if __name__ == '__main__':
+    if dist.get_rank() == 0:
+        warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel.distributed')
+
     world_size = torch.cuda.device_count()  # Number of available GPUs
     mp.spawn(main_worker, args=(world_size,), nprocs=world_size, join=True)
\ No newline at end of file

From 83f9a26c0bc26471a807a123b6f5d28c030b77b7 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:32:29 -0800
Subject: [PATCH 19/44] Fixed Parameters in Trainer Initializations

---
 Code/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index ae1b2b7..8963824 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -87,16 +87,16 @@ def main(rank):
     # Method 1 : Baseline : Mean Squared Error Loss for AutoEncoder and LSTM
     os.makedirs('../Models/Method1', exist_ok=True) # Creating Directory for Model Saving
     model_save_path_ae = '../Models/Method1/model_autoencoder_m1.pth'
-    trainer_autoencoder_baseline = Trainer(model_autoencoder, 
-                                           loss_mse, 
+    trainer_autoencoder_baseline = Trainer(model=model_autoencoder, 
+                                           loss_function=loss_mse, 
                                            optimizer=torch.optim.Adam(model_autoencoder.parameters(), lr=0.001), 
                                            model_save_path=model_save_path_ae, 
                                            rank=rank)
     if rank == 0:
         print('Method-1 AutoEncoder Trainer Initialized.')
     model_save_path_lstm = '../Models/Method1/model_lstm_m1.pth'
-    trainer_lstm_baseline = Trainer(model_lstm, 
-                                    loss_mse, 
+    trainer_lstm_baseline = Trainer(model=model_lstm, 
+                                    loss_function=loss_mse, 
                                     optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), 
                                     model_save_path=model_save_path_lstm, 
                                     rank=rank)
@@ -122,8 +122,8 @@ def main(rank):
     if rank == 0:
         print('Method-3 AutoEncoder == Method-1 AutoEncoder')
     model_save_path_lstm = '../Models/Method3/model_lstm_m3.pth'
-    trainer_lstm_m3 = Trainer(model_lstm, 
-                              loss_ssim, 
+    trainer_lstm_m3 = Trainer(model=model_lstm, 
+                              loss_function=loss_ssim, 
                               optimizer=torch.optim.Adam(model_lstm.parameters(), lr=0.001), 
                               model_save_path=model_save_path_lstm, 
                               rank=rank)

From 54e8e38123ded0c55deab807498b6179ecf461ab Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:34:59 -0800
Subject: [PATCH 20/44] Fixed UserWarning for

---
 Code/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 8963824..cb5f5d5 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -34,8 +34,8 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
-    if dist.get_rank() == 0:
-        # Remove Warnings
+    # Filter out the warnings after the process group has been initialized.
+    if rank == 0:
         warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel')
     main(rank)  # Call the existing main function.
 

From fa8e299b17fceb73db2b8f2942fb5fce32c4f2c0 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:38:55 -0800
Subject: [PATCH 21/44] Added Env Variables in Script

---
 Code/main.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index cb5f5d5..2e8290c 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -29,14 +29,16 @@
 batch_size = 2
 
 def main_worker(rank, world_size):
+    # Set environment variables
+    os.environ['MASTER_ADDR'] = 'localhost'
+    os.environ['MASTER_PORT'] = '12345'
     # Initialize the distributed environment.
     torch.manual_seed(0)
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
-    # Filter out the warnings after the process group has been initialized.
-    if rank == 0:
-        warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel')
+    # Suppress warnings after initializing the process group.
+    warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel')
     main(rank)  # Call the existing main function.
 
 def main(rank):
@@ -223,8 +225,5 @@ def main(rank):
 
 
 if __name__ == '__main__':
-    if dist.get_rank() == 0:
-        warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel.distributed')
-
     world_size = torch.cuda.device_count()  # Number of available GPUs
     mp.spawn(main_worker, args=(world_size,), nprocs=world_size, join=True)
\ No newline at end of file

From f91660201c8201ef8a621a68358a0d9019a36a9e Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:43:45 -0800
Subject: [PATCH 22/44] Updated UserWarnings

---
 Code/main.py     | 4 ++--
 Code/training.py | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 2e8290c..146c7e3 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -37,8 +37,8 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
-    # Suppress warnings after initializing the process group.
-    warnings.filterwarnings("ignore", category=UserWarning, module='torch.nn.parallel')
+    # Suppress warnings about unused parameters specifically.
+    warnings.filterwarnings("ignore", message="*find_unused_parameters=True*", category=UserWarning, module='torch.nn.parallel')
     main(rank)  # Call the existing main function.
 
 def main(rank):
diff --git a/Code/training.py b/Code/training.py
index 86428bc..9733db0 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -13,6 +13,7 @@
 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
+import warnings
 
 # Define Training Class
 class Trainer():
@@ -26,6 +27,8 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
+            # Suppress warnings about unused parameters specifically.
+            warnings.filterwarnings("ignore", message="*find_unused_parameters=True*", category=UserWarning, module='torch.nn.parallel')
             self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process

From aad8403a9473b7cb474b107fde3add6bc59c8750 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:47:23 -0800
Subject: [PATCH 23/44] Fixed UserWarnings for find_unused_parameters

---
 Code/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Code/main.py b/Code/main.py
index 146c7e3..80be4dd 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -38,7 +38,8 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
-    warnings.filterwarnings("ignore", message="*find_unused_parameters=True*", category=UserWarning, module='torch.nn.parallel')
+    if rank == 0:
+        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True.*")
     main(rank)  # Call the existing main function.
 
 def main(rank):

From fa5c7f2af09ca6bc4c11a45ed65872a7ee3d152a Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:50:21 -0800
Subject: [PATCH 24/44] Fixed UserWarnings for find_unused_parameters

---
 Code/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Code/main.py b/Code/main.py
index 80be4dd..786806b 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -39,7 +39,7 @@ def main_worker(rank, world_size):
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
     if rank == 0:
-        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True.*")
+        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True.*", category=UserWarning, module='torch.nn.parallel')
     main(rank)  # Call the existing main function.
 
 def main(rank):

From 10fd12b62a65936c7c301ae8884e79189bb59395 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:51:32 -0800
Subject: [PATCH 25/44] Fixed UserWarnings for find_unused_parameters

---
 Code/training.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Code/training.py b/Code/training.py
index 9733db0..109bf33 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -28,7 +28,7 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
             # Suppress warnings about unused parameters specifically.
-            warnings.filterwarnings("ignore", message="*find_unused_parameters=True*", category=UserWarning, module='torch.nn.parallel')
+            warnings.filterwarnings("ignore", message=".*find_unused_parameters=True.*", category=UserWarning, module='torch.nn.parallel')
             self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process

From fc0fa76339b484df84cd9860db45083e93062d64 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:56:43 -0800
Subject: [PATCH 26/44] Fixed UserWarnings for find_unused_parameters

---
 Code/main.py     | 2 +-
 Code/training.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 786806b..de78b72 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -39,7 +39,7 @@ def main_worker(rank, world_size):
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
     if rank == 0:
-        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True.*", category=UserWarning, module='torch.nn.parallel')
+        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=UserWarning, module='torch.nn.parallel')
     main(rank)  # Call the existing main function.
 
 def main(rank):
diff --git a/Code/training.py b/Code/training.py
index 109bf33..fd9ed55 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -28,7 +28,7 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
             # Suppress warnings about unused parameters specifically.
-            warnings.filterwarnings("ignore", message=".*find_unused_parameters=True.*", category=UserWarning, module='torch.nn.parallel')
+            warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=UserWarning, module='torch.nn.parallel')
             self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process

From 455b6ee5b638f7ad7a67fd43d567047073444b7d Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:57:55 -0800
Subject: [PATCH 27/44] Fixed UserWarnings for find_unused_parameters

---
 Code/main.py     | 2 +-
 Code/training.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index de78b72..057b96c 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -39,7 +39,7 @@ def main_worker(rank, world_size):
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
     if rank == 0:
-        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=UserWarning, module='torch.nn.parallel')
+        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=Warning, module='torch.nn.parallel')
     main(rank)  # Call the existing main function.
 
 def main(rank):
diff --git a/Code/training.py b/Code/training.py
index fd9ed55..305061c 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -28,7 +28,7 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
             # Suppress warnings about unused parameters specifically.
-            warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=UserWarning, module='torch.nn.parallel')
+            warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=Warning, module='torch.nn.parallel')
             self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process

From b86d3a42675fb585a9cf551b15ce08b3c66f1853 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 11:59:40 -0800
Subject: [PATCH 28/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py     | 3 +--
 Code/training.py | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 057b96c..f02530d 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -38,8 +38,7 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
-    if rank == 0:
-        warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=Warning, module='torch.nn.parallel')
+    warnings.filterwarnings("ignore", message="* find_unused_parameters=True *", category=Warning)
     main(rank)  # Call the existing main function.
 
 def main(rank):
diff --git a/Code/training.py b/Code/training.py
index 305061c..f0f8a35 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -28,7 +28,7 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
             # Suppress warnings about unused parameters specifically.
-            warnings.filterwarnings("ignore", message=".*find_unused_parameters=True was specified in DDP constructor, but did not find any unused parameters in the forward pass.*", category=Warning, module='torch.nn.parallel')
+            warnings.filterwarnings("ignore", message="* find_unused_parameters=True *", category=Warning)
             self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process

From 6015b6f67e0a1414d6e1ae51c7112a238692005b Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:00:49 -0800
Subject: [PATCH 29/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py        | 2 +-
 Code/set_env_var.sh | 5 -----
 Code/training.py    | 2 --
 3 files changed, 1 insertion(+), 8 deletions(-)
 delete mode 100644 Code/set_env_var.sh

diff --git a/Code/main.py b/Code/main.py
index f02530d..59cab03 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -38,7 +38,7 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
-    warnings.filterwarnings("ignore", message="* find_unused_parameters=True *", category=Warning)
+    warnings.filterwarnings("ignore", message="*find_unused_parameters=True*", module="reducer.cpp")
     main(rank)  # Call the existing main function.
 
 def main(rank):
diff --git a/Code/set_env_var.sh b/Code/set_env_var.sh
deleted file mode 100644
index b7dcf76..0000000
--- a/Code/set_env_var.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/bash
-
-# Set environment variables
-export MASTER_ADDR=localhost
-export MASTER_PORT=12345
\ No newline at end of file
diff --git a/Code/training.py b/Code/training.py
index f0f8a35..5058d8f 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -27,8 +27,6 @@ def __init__(self, model, loss_function, optimizer=None, model_save_path=None, r
         self.optimizer = optimizer if optimizer is not None else torch.optim.Adam(self.model.parameters(), lr=0.001)
         # Wrap model with DDP
         if torch.cuda.device_count() > 1 and rank is not None:
-            # Suppress warnings about unused parameters specifically.
-            warnings.filterwarnings("ignore", message="* find_unused_parameters=True *", category=Warning)
             self.model = DDP(self.model, device_ids=[rank], find_unused_parameters=True)
         # Define the path to save the model
         self.model_save_path = model_save_path if rank == 0 else None  # Only save on master process

From c9e4d9d8af0c466c50f6209a0a067031efb0d2f2 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:02:03 -0800
Subject: [PATCH 30/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Code/main.py b/Code/main.py
index 59cab03..65908db 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -38,7 +38,7 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
-    warnings.filterwarnings("ignore", message="*find_unused_parameters=True*", module="reducer.cpp")
+    warnings.filterwarnings("ignore", message="*find_unused_parameters=True*")
     main(rank)  # Call the existing main function.
 
 def main(rank):

From 1034b2320b73445d250613cc3f5ab7a59d22bae9 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:02:35 -0800
Subject: [PATCH 31/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Code/main.py b/Code/main.py
index 65908db..f433694 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -38,7 +38,7 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
     # Suppress warnings about unused parameters specifically.
-    warnings.filterwarnings("ignore", message="*find_unused_parameters=True*")
+    # warnings.filterwarnings("ignore", message="*find_unused_parameters=True*")
     main(rank)  # Call the existing main function.
 
 def main(rank):

From cb5b55d789860866df04437852d2d8ea02c5fad7 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:06:12 -0800
Subject: [PATCH 32/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index f433694..648c20e 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -17,7 +17,10 @@
 import torch
 import torch.multiprocessing as mp
 import torch.distributed as dist
+
+# Suppress warnings about find_unused_parameters=True
 import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="torch.distributed")
 
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'
@@ -37,8 +40,6 @@ def main_worker(rank, world_size):
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.benchmark = True
     dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
-    # Suppress warnings about unused parameters specifically.
-    # warnings.filterwarnings("ignore", message="*find_unused_parameters=True*")
     main(rank)  # Call the existing main function.
 
 def main(rank):

From e0a707c08f7fa24ee44fcedb33c4c4adefacde54 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:07:05 -0800
Subject: [PATCH 33/44] Fixed Warnings for find_unused_parameters

---
 Code/training.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Code/training.py b/Code/training.py
index 5058d8f..81c4535 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -13,7 +13,10 @@
 import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
+
+# Suppress warnings about find_unused_parameters=True
 import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="torch.distributed")
 
 # Define Training Class
 class Trainer():

From 6cb35b4bcd84e39f71108c6158fd9a326c82140e Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:08:27 -0800
Subject: [PATCH 34/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py     | 2 +-
 Code/training.py | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 648c20e..f34ed58 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -20,7 +20,7 @@
 
 # Suppress warnings about find_unused_parameters=True
 import warnings
-warnings.filterwarnings("ignore", category=UserWarning, module="torch.distributed")
+warnings.filterwarnings("ignore", category=UserWarning, module="torch")
 
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'
diff --git a/Code/training.py b/Code/training.py
index 81c4535..1b45e96 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -10,13 +10,12 @@
 
 # Import Necessary Libraries
 import torch
-import torch.nn as nn
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
 
 # Suppress warnings about find_unused_parameters=True
 import warnings
-warnings.filterwarnings("ignore", category=UserWarning, module="torch.distributed")
+warnings.filterwarnings("ignore", category=UserWarning, module="torch")
 
 # Define Training Class
 class Trainer():

From ea3bc396e1549d76c2a50b3e513f5ebd169bc295 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:15:07 -0800
Subject: [PATCH 35/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py     | 8 ++++----
 Code/training.py | 4 ----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index f34ed58..fd351f6 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -10,7 +10,6 @@
 from losses import LossMSE, LossMEP, SSIMLoss
 from training import Trainer
 
-
 # Import Necessary Libraries
 import os
 import traceback
@@ -18,9 +17,10 @@
 import torch.multiprocessing as mp
 import torch.distributed as dist
 
-# Suppress warnings about find_unused_parameters=True
-import warnings
-warnings.filterwarnings("ignore", category=UserWarning, module="torch")
+# Disable below 3 Lines if you want Detailed Errors and Warnings Printed on Terminal
+import os
+import sys
+sys.stderr = open(os.devnull, 'w')
 
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'
diff --git a/Code/training.py b/Code/training.py
index 1b45e96..373eb6e 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -13,10 +13,6 @@
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
 
-# Suppress warnings about find_unused_parameters=True
-import warnings
-warnings.filterwarnings("ignore", category=UserWarning, module="torch")
-
 # Define Training Class
 class Trainer():
     def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None):

From 254c1269a22d3a9a63617192857766f7debf3b63 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:17:08 -0800
Subject: [PATCH 36/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Code/main.py b/Code/main.py
index fd351f6..0da4390 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -10,6 +10,7 @@
 from losses import LossMSE, LossMEP, SSIMLoss
 from training import Trainer
 
+
 # Import Necessary Libraries
 import os
 import traceback
@@ -17,10 +18,12 @@
 import torch.multiprocessing as mp
 import torch.distributed as dist
 
-# Disable below 3 Lines if you want Detailed Errors and Warnings Printed on Terminal
+# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
+import warnings
 import os
 import sys
 sys.stderr = open(os.devnull, 'w')
+warnings.filterwarnings("ignore", category=UserWarning, module="torch")
 
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'

From 598e364b8aca25ca08ddf6b68f4ad40d92a9ff18 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:18:35 -0800
Subject: [PATCH 37/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py     | 7 -------
 Code/training.py | 7 +++++++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 0da4390..778287c 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -18,13 +18,6 @@
 import torch.multiprocessing as mp
 import torch.distributed as dist
 
-# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
-import warnings
-import os
-import sys
-sys.stderr = open(os.devnull, 'w')
-warnings.filterwarnings("ignore", category=UserWarning, module="torch")
-
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'
 rgb_dir = '../Dataset/RGB'
diff --git a/Code/training.py b/Code/training.py
index 373eb6e..d816501 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -13,6 +13,13 @@
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
 
+# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
+# import warnings
+import os
+import sys
+sys.stderr = open(os.devnull, 'w')
+# warnings.filterwarnings("ignore", category=UserWarning, module="torch")
+
 # Define Training Class
 class Trainer():
     def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None):

From 8f1156dee40c86c957c12befc17597a57536231c Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:19:36 -0800
Subject: [PATCH 38/44] Fixed Warnings for find_unused_parameters

---
 Code/training.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Code/training.py b/Code/training.py
index d816501..a773198 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -14,11 +14,11 @@
 import torch.distributed as dist
 
 # Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
-# import warnings
+import warnings
 import os
 import sys
 sys.stderr = open(os.devnull, 'w')
-# warnings.filterwarnings("ignore", category=UserWarning, module="torch")
+warnings.filterwarnings("ignore", category=UserWarning, module="torch")
 
 # Define Training Class
 class Trainer():

From cac3c481995161c5d14dd4e2556ebf2804311f55 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:28:37 -0800
Subject: [PATCH 39/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/Code/main.py b/Code/main.py
index 778287c..43be0eb 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -3,6 +3,13 @@
 --------------------------------------------------------------------------------
 '''
 
+# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
+import warnings
+warnings.filterwarnings("ignore", category=UserWarning, module="torch")
+import os
+import sys
+sys.stderr = open(os.devnull, 'w')
+
 # Importing Custom Modules
 from data import CustomDataset
 from autoencoder_model import Grey2RGBAutoEncoder
@@ -10,7 +17,6 @@
 from losses import LossMSE, LossMEP, SSIMLoss
 from training import Trainer
 
-
 # Import Necessary Libraries
 import os
 import traceback

From 5529513cd25a0fe1b9eb72dcf333e2ccaabdbbcc Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:29:51 -0800
Subject: [PATCH 40/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py     | 2 +-
 Code/training.py | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/Code/main.py b/Code/main.py
index 43be0eb..1601427 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -5,9 +5,9 @@
 
 # Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
 import warnings
-warnings.filterwarnings("ignore", category=UserWarning, module="torch")
 import os
 import sys
+warnings.filterwarnings("ignore", category=UserWarning, module="torch")
 sys.stderr = open(os.devnull, 'w')
 
 # Importing Custom Modules
diff --git a/Code/training.py b/Code/training.py
index a773198..1fd555a 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -8,6 +8,13 @@
 Initialize Best Validation Loss to Infinity as we will save model with lowest validation loss
 '''
 
+# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
+import warnings
+import os
+import sys
+warnings.filterwarnings("ignore", category=UserWarning, module="torch")
+
+sys.stderr = open(os.devnull, 'w')
 # Import Necessary Libraries
 import torch
 from torch.nn.parallel import DistributedDataParallel as DDP

From 290180496a5ad4111582e43af20070fb0eb08bc1 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:32:43 -0800
Subject: [PATCH 41/44] Fixed Warnings for find_unused_parameters

---
 Code/main.py     | 7 +++----
 Code/training.py | 7 -------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 1601427..751f918 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -3,12 +3,11 @@
 --------------------------------------------------------------------------------
 '''
 
-# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
-import warnings
+# Disable below 4 Lines if you want Detailed Errors and Warnings Printed on Terminal
 import os
 import sys
-warnings.filterwarnings("ignore", category=UserWarning, module="torch")
-sys.stderr = open(os.devnull, 'w')
+sys.stdout = open('/dev/null', 'w')
+sys.stderr = open('/dev/null', 'w')
 
 # Importing Custom Modules
 from data import CustomDataset
diff --git a/Code/training.py b/Code/training.py
index 1fd555a..a773198 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -8,13 +8,6 @@
 Initialize Best Validation Loss to Infinity as we will save model with lowest validation loss
 '''
 
-# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
-import warnings
-import os
-import sys
-warnings.filterwarnings("ignore", category=UserWarning, module="torch")
-
-sys.stderr = open(os.devnull, 'w')
 # Import Necessary Libraries
 import torch
 from torch.nn.parallel import DistributedDataParallel as DDP

From 5f28ffaf986326bdcb0e8393de8500c5298a587f Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Sun, 31 Dec 2023 13:36:41 -0800
Subject: [PATCH 42/44] Run : python main.py 2>/dev/null

---
 Code/main.py     | 6 ------
 Code/training.py | 7 -------
 2 files changed, 13 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index 751f918..5dc44bd 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -3,12 +3,6 @@
 --------------------------------------------------------------------------------
 '''
 
-# Disable below 4 Lines if you want Detailed Errors and Warnings Printed on Terminal
-import os
-import sys
-sys.stdout = open('/dev/null', 'w')
-sys.stderr = open('/dev/null', 'w')
-
 # Importing Custom Modules
 from data import CustomDataset
 from autoencoder_model import Grey2RGBAutoEncoder
diff --git a/Code/training.py b/Code/training.py
index a773198..373eb6e 100644
--- a/Code/training.py
+++ b/Code/training.py
@@ -13,13 +13,6 @@
 from torch.nn.parallel import DistributedDataParallel as DDP
 import torch.distributed as dist
 
-# Disable below 5 Lines if you want Detailed Errors and Warnings Printed on Terminal
-import warnings
-import os
-import sys
-sys.stderr = open(os.devnull, 'w')
-warnings.filterwarnings("ignore", category=UserWarning, module="torch")
-
 # Define Training Class
 class Trainer():
     def __init__(self, model, loss_function, optimizer=None, model_save_path=None, rank=None):

From 70742a1d8b9f12c4faf25e4ddb60d0ab7921df77 Mon Sep 17 00:00:00 2001
From: Siddharth Kekre <kekresiddharth20@gmail.com>
Date: Tue, 2 Jan 2024 00:10:38 +0000
Subject: [PATCH 43/44] Changes AutoEncoder Hidden Layers

---
 Code/autoencoder_model.py | 4 ++--
 Code/main.py              | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/Code/autoencoder_model.py b/Code/autoencoder_model.py
index 975d07e..c4e34d2 100644
--- a/Code/autoencoder_model.py
+++ b/Code/autoencoder_model.py
@@ -16,9 +16,9 @@ class Grey2RGBAutoEncoder(nn.Module):
     def __init__(self):  
         super(Grey2RGBAutoEncoder, self).__init__()  
         # Define the Encoder
-        self.encoder = self._make_layers([1, 64, 128, 256])
+        self.encoder = self._make_layers([1, 8, 16, 32])
         # Define the Decoder
-        self.decoder = self._make_layers([256, 128, 64, 3], decoder=True)
+        self.decoder = self._make_layers([32, 16, 8, 3], decoder=True)
 
     # Helper function to create the encoder or decoder layers.
     def _make_layers(self, channels, decoder=False):
diff --git a/Code/main.py b/Code/main.py
index 5dc44bd..e09b6aa 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -22,8 +22,8 @@
 rgb_dir = '../Dataset/RGB'
 
 # Define Universal Parameters
-image_height = 400
-image_width = 600
+image_height = 3000
+image_width = 4500
 batch_size = 2
 
 def main_worker(rank, world_size):

From 290f3a7e93a5bbc50a77fc53f85b255cbdc01a92 Mon Sep 17 00:00:00 2001
From: iSiddharth20 <kekresiddharth20@gmail.com>
Date: Mon, 1 Jan 2024 18:15:07 -0800
Subject: [PATCH 44/44] Added Platform Check for init_process_group backend
 parameter initialization

---
 Code/main.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/Code/main.py b/Code/main.py
index e09b6aa..883d85b 100644
--- a/Code/main.py
+++ b/Code/main.py
@@ -16,16 +16,24 @@
 import torch
 import torch.multiprocessing as mp
 import torch.distributed as dist
+import platform
 
 # Define Working Directories
 grayscale_dir = '../Dataset/Greyscale'
 rgb_dir = '../Dataset/RGB'
 
 # Define Universal Parameters
-image_height = 3000
-image_width = 4500
+image_height = 4000
+image_width = 6000
 batch_size = 2
 
+def get_backend():
+    system_type = platform.system()
+    if system_type == "Linux":
+        return "nccl"
+    else:
+        return "gloo"
+
 def main_worker(rank, world_size):
     # Set environment variables
     os.environ['MASTER_ADDR'] = 'localhost'
@@ -34,7 +42,7 @@ def main_worker(rank, world_size):
     torch.manual_seed(0)
     torch.backends.cudnn.enabled = True
     torch.backends.cudnn.benchmark = True
-    dist.init_process_group(backend="nccl", init_method="env://", world_size=world_size, rank=rank)
+    dist.init_process_group(backend=get_backend(), init_method="env://", world_size=world_size, rank=rank)
     main(rank)  # Call the existing main function.
 
 def main(rank):