First Stable Release

iSiddharth20 · Dec 28, 2023 · ad55b63 · ad55b63
2 parents 90e3b04 + 60c3f67
commit ad55b63
Show file tree

Hide file tree

Showing 9 changed files with 164 additions and 307 deletions.
diff --git a/Code/RequiredResults.py b/Code/RequiredResults.py
diff --git a/Code/autoencoder_model.py b/Code/autoencoder_model.py
@@ -42,7 +42,7 @@ def _make_layers(self, channels, decoder=False):
                 layers += [nn.Conv2d(channels[i], channels[i+1], kernel_size=3, stride=1, padding=1),
                            nn.ReLU(inplace=True)]
         if decoder:
-            layers[-1] = nn.Sigmoid()  # Replace last ReLU with Sigmoid for decoder
+            layers[-1] = nn.Sigmoid() 
         return nn.Sequential(*layers)
 
     # The forward pass takes an input image, passes it through the encoder and decoder, and returns the output image

diff --git a/Code/data.py b/Code/data.py
@@ -60,62 +60,48 @@ def get_autoencoder_batches(self, val_split):
         train_dataset, val_dataset = random_split(self, [train_size, val_size])
         # Create dataloaders for the training and validation sets
         train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
-        print("Sample from autoencoder training data:")
-        for sample in train_loader:
-            print(f'Input shape: {sample[0].shape}, Target shape: {sample[1].shape}')
-            break  # Just print the first sample and break
         val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=True)
         # Return the training and validation dataloaders
         return train_loader, val_loader
 
+    # Transform a sequence of images to tensors (Functionality for LSTM)
+    def transform_sequence(self, filenames):
+        images = [self.transform(Image.open(f)) for f in filenames]
+        return torch.stack(images) # Stack to form a sequence tensor
+
     # Get batches for LSTM training
-    def get_lstm_batches(self, val_split, n=1):
-        # Calculate the number of samples to include in the validation set
-        val_size = int(val_split * (len(self) // 2))  # Half of sequences because we use every second image.
-        train_size = (len(self) // 2) - val_size
-
-        # Get indices for the odd (input) and even (target) frames.
-        odd_indices = list(range(0, len(self), 2))
-        even_indices = list(range(1, len(self), 2))
-
-        # Split the dataset indices into training and validation subsets
-        train_odd_indices = odd_indices[:train_size]
-        val_odd_indices = odd_indices[train_size:]
-
-        train_even_indices = even_indices[:train_size]
-        val_even_indices = even_indices[train_size:]
-
-        # Define a helper function to extract sequences by indices
-        def extract_sequences(indices):
-            return [self[i][0] for i in indices]  # Only return the grayscale images, not the tuples
-
-        # Use the helper function to create training and validation sets
-        train_input_seqs = torch.stack(extract_sequences(train_odd_indices))
-        train_target_seqs = torch.stack(extract_sequences(train_even_indices))
-
-        val_input_seqs = torch.stack(extract_sequences(val_odd_indices))
-        val_target_seqs = torch.stack(extract_sequences(val_even_indices))
-
-        # Create custom Dataset for the LSTM sequences
-        class LSTMDataset(Dataset):
-            def __init__(self, input_seqs, target_seqs):
-                self.input_seqs = input_seqs
-                self.target_seqs = target_seqs
-
-            def __len__(self):
-                return len(self.input_seqs)
-
-            def __getitem__(self, idx):
-                return self.input_seqs[idx], self.target_seqs[idx]
-
-        # Instantiate the custom Dataset objects
-        train_dataset = LSTMDataset(train_input_seqs, train_target_seqs)
-        val_dataset = LSTMDataset(val_input_seqs, val_target_seqs)
-
-        # Create DataLoaders for the LSTM datasets
-        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
-        val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=True)
+    def get_lstm_batches(self, val_split, sequence_length, sequence_stride=2):
+        assert sequence_length % 2 == 0, "The sequence length must be even."
+
+        # Compute the total number of sequences that can be formed, given the stride and length
+        sequence_indices = range(0, len(self.filenames) - sequence_length + 1, sequence_stride)
+        total_sequences = len(sequence_indices)
+
+        # Divide the sequences into training and validation
+        train_size = int((1.0 - val_split) * total_sequences)
+        train_indices = sequence_indices[:train_size]
+        val_indices = sequence_indices[train_size:]
+
+        # Create dataset with valid sequences only
+        train_dataset = self.create_sequence_pairs(train_indices, sequence_length)
+        val_dataset = self.create_sequence_pairs(val_indices, sequence_length)
+
+        # Create the data loaders for training and validation datasets
+        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=False)
+        val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
 
-        # Return the training and validation DataLoaders
         return train_loader, val_loader
 
+    def create_sequence_pairs(self, indices, sequence_length):
+        sequence_pairs = []
+        for start in indices:
+            end = start + sequence_length
+            # Make sure we don't go out of bounds
+            if end < len(self.filenames):
+                sequence_input = self.transform_sequence(self.filenames[start:end])
+                sequence_target = self.transform_sequence(self.filenames[start + 1:end + 1])
+                sequence_pairs.append((sequence_input, sequence_target))
+            else:
+                # Handle the last sequence by either discarding or padding
+                pass  # Choose to either discard (do nothing) or pad the sequence
+        return sequence_pairs
diff --git a/Code/evaluation.py b/Code/evaluation.py
diff --git a/Code/losses.py b/Code/losses.py
@@ -21,13 +21,14 @@ def __init__(self, alpha=0.5):
         self.alpha = alpha  # Weighting factor for the loss
 
     def forward(self, output, target):
-        mse_loss = F.mse_loss(output, target)  # Compute MSE Loss using functional API
-        # Normalize the output tensor along the last dimension to represent probabilities
-        output_normalized = torch.softmax(output, dim=-1)
-        # Compute Entropy
-        entropy = -torch.sum(target * torch.log(output_normalized + 1e-8), dim=-1).mean()
-        # Compute Composite Loss
-        composite_loss = self.alpha * mse_loss + (1 - self.alpha) * entropy
+        mse_loss = F.mse_loss(output, target)
+        # Assume output to be raw logits: calculate log_probs and use it to compute entropy
+        log_probs = F.log_softmax(output, dim=1)  # dim 1 is the channel dimension
+        probs = torch.exp(log_probs)
+        entropy_loss = -torch.sum(probs * log_probs, dim=1).mean()
+
+        # Combine MSE with entropy loss scaled by alpha factor
+        composite_loss = (1 - self.alpha) * mse_loss + self.alpha * entropy_loss
         return composite_loss
 
 '''
@@ -45,11 +46,18 @@ def forward(self, output, target):
     - In PyTorch, loss is minimized, by doing 1 - SSIM, minimizing the loss function will lead to maximization of SSIM
 '''
 class SSIMLoss(nn.Module):
-    def __init__(self, data_range=1, size_average=True):
-        super(SSIMLoss, self).__init__()
-        # Initialize SSIM module
-        self.ssim_module = SSIM(data_range=data_range, size_average=size_average)
+    def __init__(self):
+        super().__init__()
+        self.ssim_module = SSIM(data_range=1, size_average=True, channel=1)
 
-    def forward(self, img1, img2):
-        ssim_value = self.ssim_module(img1, img2)  # Compute SSIM
-        return 1 - ssim_value  # Return loss
+    def forward(self, seq1, seq2):
+        N, T = seq1.shape[:2]
+        ssim_values = []
+        for i in range(N):
+           for t in range(T):
+            seq1_slice = seq1[i, t:t+1, ...] 
+            seq2_slice = seq2[i, t:t+1, ...]
+            ssim_val = self.ssim_module(seq1_slice, seq2_slice)
+            ssim_values.append(ssim_val) # Compute SSIM for each frame in the sequence
+        avg_ssim = torch.stack(ssim_values).mean() # Average SSIM across all frames
+        return 1 - avg_ssim