Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hc-leipzig-7t-mp2rage graphs #76

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
257 changes: 257 additions & 0 deletions training/hc-leipzig-7t-mp2rage/hc-leipzig-7t-mp2rage_fold_splits.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
[
{
"train": [
"SCDATA_18_inv-1_part-mag_MP2RAGE",
"SCDATA_18_inv-2_part-mag_MP2RAGE",
"SCDATA_18_UNIT1",
"SCDATA_19_inv-1_part-mag_MP2RAGE",
"SCDATA_19_inv-2_part-mag_MP2RAGE",
"SCDATA_19_UNIT1",
"SCDATA_20_inv-1_part-mag_MP2RAGE",
"SCDATA_20_inv-2_part-mag_MP2RAGE",
"SCDATA_20_UNIT1",
"SCDATA_22_inv-1_part-mag_MP2RAGE",
"SCDATA_22_inv-2_part-mag_MP2RAGE",
"SCDATA_22_UNIT1",
"SCDATA_23_inv-1_part-mag_MP2RAGE",
"SCDATA_23_inv-2_part-mag_MP2RAGE",
"SCDATA_23_UNIT1",
"SCDATA_27_inv-1_part-mag_MP2RAGE",
"SCDATA_27_inv-2_part-mag_MP2RAGE",
"SCDATA_27_UNIT1",
"SCDATA_28_inv-1_part-mag_MP2RAGE",
"SCDATA_28_inv-2_part-mag_MP2RAGE",
"SCDATA_28_UNIT1",
"SCDATA_30_inv-1_part-mag_MP2RAGE",
"SCDATA_30_inv-2_part-mag_MP2RAGE",
"SCDATA_30_UNIT1",
"SCDATA_38_inv-1_part-mag_MP2RAGE",
"SCDATA_38_inv-2_part-mag_MP2RAGE",
"SCDATA_38_UNIT1",
"SCDATA_39_inv-1_part-mag_MP2RAGE",
"SCDATA_39_inv-2_part-mag_MP2RAGE",
"SCDATA_39_UNIT1",
"SCDATA_40_inv-1_part-mag_MP2RAGE",
"SCDATA_40_inv-2_part-mag_MP2RAGE",
"SCDATA_40_UNIT1",
"SCDATA_41_inv-1_part-mag_MP2RAGE",
"SCDATA_41_inv-2_part-mag_MP2RAGE",
"SCDATA_41_UNIT1"
],
"val": [
"SCDATA_25_inv-1_part-mag_MP2RAGE",
"SCDATA_25_inv-2_part-mag_MP2RAGE",
"SCDATA_25_UNIT1",
"SCDATA_29_inv-1_part-mag_MP2RAGE",
"SCDATA_29_inv-2_part-mag_MP2RAGE",
"SCDATA_29_UNIT1",
"SCDATA_36_inv-1_part-mag_MP2RAGE",
"SCDATA_36_inv-2_part-mag_MP2RAGE",
"SCDATA_36_UNIT1"
]
},
{
"train": [
"SCDATA_19_inv-1_part-mag_MP2RAGE",
"SCDATA_19_inv-2_part-mag_MP2RAGE",
"SCDATA_19_UNIT1",
"SCDATA_22_inv-1_part-mag_MP2RAGE",
"SCDATA_22_inv-2_part-mag_MP2RAGE",
"SCDATA_22_UNIT1",
"SCDATA_23_inv-1_part-mag_MP2RAGE",
"SCDATA_23_inv-2_part-mag_MP2RAGE",
"SCDATA_23_UNIT1",
"SCDATA_25_inv-1_part-mag_MP2RAGE",
"SCDATA_25_inv-2_part-mag_MP2RAGE",
"SCDATA_25_UNIT1",
"SCDATA_27_inv-1_part-mag_MP2RAGE",
"SCDATA_27_inv-2_part-mag_MP2RAGE",
"SCDATA_27_UNIT1",
"SCDATA_28_inv-1_part-mag_MP2RAGE",
"SCDATA_28_inv-2_part-mag_MP2RAGE",
"SCDATA_28_UNIT1",
"SCDATA_29_inv-1_part-mag_MP2RAGE",
"SCDATA_29_inv-2_part-mag_MP2RAGE",
"SCDATA_29_UNIT1",
"SCDATA_30_inv-1_part-mag_MP2RAGE",
"SCDATA_30_inv-2_part-mag_MP2RAGE",
"SCDATA_30_UNIT1",
"SCDATA_36_inv-1_part-mag_MP2RAGE",
"SCDATA_36_inv-2_part-mag_MP2RAGE",
"SCDATA_36_UNIT1",
"SCDATA_38_inv-1_part-mag_MP2RAGE",
"SCDATA_38_inv-2_part-mag_MP2RAGE",
"SCDATA_38_UNIT1",
"SCDATA_40_inv-1_part-mag_MP2RAGE",
"SCDATA_40_inv-2_part-mag_MP2RAGE",
"SCDATA_40_UNIT1",
"SCDATA_41_inv-1_part-mag_MP2RAGE",
"SCDATA_41_inv-2_part-mag_MP2RAGE",
"SCDATA_41_UNIT1"
],
"val": [
"SCDATA_18_inv-1_part-mag_MP2RAGE",
"SCDATA_18_inv-2_part-mag_MP2RAGE",
"SCDATA_18_UNIT1",
"SCDATA_20_inv-1_part-mag_MP2RAGE",
"SCDATA_20_inv-2_part-mag_MP2RAGE",
"SCDATA_20_UNIT1",
"SCDATA_39_inv-1_part-mag_MP2RAGE",
"SCDATA_39_inv-2_part-mag_MP2RAGE",
"SCDATA_39_UNIT1"
]
},
{
"train": [
"SCDATA_18_inv-1_part-mag_MP2RAGE",
"SCDATA_18_inv-2_part-mag_MP2RAGE",
"SCDATA_18_UNIT1",
"SCDATA_20_inv-1_part-mag_MP2RAGE",
"SCDATA_20_inv-2_part-mag_MP2RAGE",
"SCDATA_20_UNIT1",
"SCDATA_22_inv-1_part-mag_MP2RAGE",
"SCDATA_22_inv-2_part-mag_MP2RAGE",
"SCDATA_22_UNIT1",
"SCDATA_23_inv-1_part-mag_MP2RAGE",
"SCDATA_23_inv-2_part-mag_MP2RAGE",
"SCDATA_23_UNIT1",
"SCDATA_25_inv-1_part-mag_MP2RAGE",
"SCDATA_25_inv-2_part-mag_MP2RAGE",
"SCDATA_25_UNIT1",
"SCDATA_27_inv-1_part-mag_MP2RAGE",
"SCDATA_27_inv-2_part-mag_MP2RAGE",
"SCDATA_27_UNIT1",
"SCDATA_28_inv-1_part-mag_MP2RAGE",
"SCDATA_28_inv-2_part-mag_MP2RAGE",
"SCDATA_28_UNIT1",
"SCDATA_29_inv-1_part-mag_MP2RAGE",
"SCDATA_29_inv-2_part-mag_MP2RAGE",
"SCDATA_29_UNIT1",
"SCDATA_30_inv-1_part-mag_MP2RAGE",
"SCDATA_30_inv-2_part-mag_MP2RAGE",
"SCDATA_30_UNIT1",
"SCDATA_36_inv-1_part-mag_MP2RAGE",
"SCDATA_36_inv-2_part-mag_MP2RAGE",
"SCDATA_36_UNIT1",
"SCDATA_38_inv-1_part-mag_MP2RAGE",
"SCDATA_38_inv-2_part-mag_MP2RAGE",
"SCDATA_38_UNIT1",
"SCDATA_39_inv-1_part-mag_MP2RAGE",
"SCDATA_39_inv-2_part-mag_MP2RAGE",
"SCDATA_39_UNIT1"
],
"val": [
"SCDATA_19_inv-1_part-mag_MP2RAGE",
"SCDATA_19_inv-2_part-mag_MP2RAGE",
"SCDATA_19_UNIT1",
"SCDATA_40_inv-1_part-mag_MP2RAGE",
"SCDATA_40_inv-2_part-mag_MP2RAGE",
"SCDATA_40_UNIT1",
"SCDATA_41_inv-1_part-mag_MP2RAGE",
"SCDATA_41_inv-2_part-mag_MP2RAGE",
"SCDATA_41_UNIT1"
]
},
{
"train": [
"SCDATA_18_inv-1_part-mag_MP2RAGE",
"SCDATA_18_inv-2_part-mag_MP2RAGE",
"SCDATA_18_UNIT1",
"SCDATA_19_inv-1_part-mag_MP2RAGE",
"SCDATA_19_inv-2_part-mag_MP2RAGE",
"SCDATA_19_UNIT1",
"SCDATA_20_inv-1_part-mag_MP2RAGE",
"SCDATA_20_inv-2_part-mag_MP2RAGE",
"SCDATA_20_UNIT1",
"SCDATA_23_inv-1_part-mag_MP2RAGE",
"SCDATA_23_inv-2_part-mag_MP2RAGE",
"SCDATA_23_UNIT1",
"SCDATA_25_inv-1_part-mag_MP2RAGE",
"SCDATA_25_inv-2_part-mag_MP2RAGE",
"SCDATA_25_UNIT1",
"SCDATA_27_inv-1_part-mag_MP2RAGE",
"SCDATA_27_inv-2_part-mag_MP2RAGE",
"SCDATA_27_UNIT1",
"SCDATA_29_inv-1_part-mag_MP2RAGE",
"SCDATA_29_inv-2_part-mag_MP2RAGE",
"SCDATA_29_UNIT1",
"SCDATA_30_inv-1_part-mag_MP2RAGE",
"SCDATA_30_inv-2_part-mag_MP2RAGE",
"SCDATA_30_UNIT1",
"SCDATA_36_inv-1_part-mag_MP2RAGE",
"SCDATA_36_inv-2_part-mag_MP2RAGE",
"SCDATA_36_UNIT1",
"SCDATA_39_inv-1_part-mag_MP2RAGE",
"SCDATA_39_inv-2_part-mag_MP2RAGE",
"SCDATA_39_UNIT1",
"SCDATA_40_inv-1_part-mag_MP2RAGE",
"SCDATA_40_inv-2_part-mag_MP2RAGE",
"SCDATA_40_UNIT1",
"SCDATA_41_inv-1_part-mag_MP2RAGE",
"SCDATA_41_inv-2_part-mag_MP2RAGE",
"SCDATA_41_UNIT1"
],
"val": [
"SCDATA_22_inv-1_part-mag_MP2RAGE",
"SCDATA_22_inv-2_part-mag_MP2RAGE",
"SCDATA_22_UNIT1",
"SCDATA_28_inv-1_part-mag_MP2RAGE",
"SCDATA_28_inv-2_part-mag_MP2RAGE",
"SCDATA_28_UNIT1",
"SCDATA_38_inv-1_part-mag_MP2RAGE",
"SCDATA_38_inv-2_part-mag_MP2RAGE",
"SCDATA_38_UNIT1"
]
},
{
"train": [
"SCDATA_18_inv-1_part-mag_MP2RAGE",
"SCDATA_18_inv-2_part-mag_MP2RAGE",
"SCDATA_18_UNIT1",
"SCDATA_19_inv-1_part-mag_MP2RAGE",
"SCDATA_19_inv-2_part-mag_MP2RAGE",
"SCDATA_19_UNIT1",
"SCDATA_20_inv-1_part-mag_MP2RAGE",
"SCDATA_20_inv-2_part-mag_MP2RAGE",
"SCDATA_20_UNIT1",
"SCDATA_22_inv-1_part-mag_MP2RAGE",
"SCDATA_22_inv-2_part-mag_MP2RAGE",
"SCDATA_22_UNIT1",
"SCDATA_25_inv-1_part-mag_MP2RAGE",
"SCDATA_25_inv-2_part-mag_MP2RAGE",
"SCDATA_25_UNIT1",
"SCDATA_28_inv-1_part-mag_MP2RAGE",
"SCDATA_28_inv-2_part-mag_MP2RAGE",
"SCDATA_28_UNIT1",
"SCDATA_29_inv-1_part-mag_MP2RAGE",
"SCDATA_29_inv-2_part-mag_MP2RAGE",
"SCDATA_29_UNIT1",
"SCDATA_36_inv-1_part-mag_MP2RAGE",
"SCDATA_36_inv-2_part-mag_MP2RAGE",
"SCDATA_36_UNIT1",
"SCDATA_38_inv-1_part-mag_MP2RAGE",
"SCDATA_38_inv-2_part-mag_MP2RAGE",
"SCDATA_38_UNIT1",
"SCDATA_39_inv-1_part-mag_MP2RAGE",
"SCDATA_39_inv-2_part-mag_MP2RAGE",
"SCDATA_39_UNIT1",
"SCDATA_40_inv-1_part-mag_MP2RAGE",
"SCDATA_40_inv-2_part-mag_MP2RAGE",
"SCDATA_40_UNIT1",
"SCDATA_41_inv-1_part-mag_MP2RAGE",
"SCDATA_41_inv-2_part-mag_MP2RAGE",
"SCDATA_41_UNIT1"
],
"val": [
"SCDATA_23_inv-1_part-mag_MP2RAGE",
"SCDATA_23_inv-2_part-mag_MP2RAGE",
"SCDATA_23_UNIT1",
"SCDATA_27_inv-1_part-mag_MP2RAGE",
"SCDATA_27_inv-2_part-mag_MP2RAGE",
"SCDATA_27_UNIT1",
"SCDATA_30_inv-1_part-mag_MP2RAGE",
"SCDATA_30_inv-2_part-mag_MP2RAGE",
"SCDATA_30_UNIT1"
]
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
Subject,Train/Test,Fold_1,Fold_2,Fold_3,Fold_4,Fold_5
sub-17,Test,Test,Test,Test,Test,Test
sub-18,Train,Train,Validation,Train,Train,Train
sub-19,Train,Train,Train,Validation,Train,Train
sub-20,Train,Train,Validation,Train,Train,Train
sub-21,Excluded,Excluded,Excluded,Excluded,Excluded,Excluded
sub-22,Train,Train,Train,Train,Validation,Train
sub-23,Train,Train,Train,Train,Train,Validation
sub-24,Test,Test,Test,Test,Test,Test
sub-25,Train,Validation,Train,Train,Train,Train
sub-26,Excluded,Excluded,Excluded,Excluded,Excluded,Excluded
sub-27,Train,Train,Train,Train,Train,Validation
sub-28,Train,Train,Train,Train,Validation,Train
sub-29,Train,Validation,Train,Train,Train,Train
sub-30,Train,Train,Train,Train,Train,Validation
sub-31,Test,Test,Test,Test,Test,Test
sub-32,Excluded,Excluded,Excluded,Excluded,Excluded,Excluded
sub-33,Excluded,Excluded,Excluded,Excluded,Excluded,Excluded
sub-34,Excluded,Excluded,Excluded,Excluded,Excluded,Excluded
sub-35,Excluded,Excluded,Excluded,Excluded,Excluded,Excluded
sub-36,Train,Validation,Train,Train,Train,Train
sub-37,Test,Test,Test,Test,Test,Test
sub-38,Train,Train,Train,Train,Validation,Train
sub-39,Train,Train,Validation,Train,Train,Train
sub-40,Train,Train,Train,Validation,Train,Train
sub-41,Train,Train,Train,Validation,Train,Train
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import json

# Load the data from the CSV file
train_test_dataframe = pd.read_csv('hc-leipzig-7t-mp2rage_train-test_split.csv')

# Get the training data only
train_dataframe = train_test_dataframe[train_test_dataframe['Train/Test'] == 'Train']

# Extract unique subjects and rename them to SCDATA_<number>
subjects = train_dataframe['Subject'].unique()[:]

# Get only number from the subject name
subjects = [int(subj.split('-')[1]) for subj in subjects]

# Rename the subjects
renamed_subjects = [f"SCDATA_{subj}" for subj in subjects]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need to do this renaming? by doing it, the output csv file contains sub- but the output json file contains SCDATA. this is confusing. I would stick with the original filenames: sub-


# Define the possible contrasts for each subject
contrasts = ["inv-1_part-mag_MP2RAGE", "inv-2_part-mag_MP2RAGE", "UNIT1"]

# Expand subjects to include all contrasts
expanded_subjects = []
for subject in renamed_subjects:
for contrast in contrasts:
expanded_subjects.append(f"{subject}_{contrast}")

# Convert expanded_subjects into a numpy array for indexing
expanded_subjects = np.array(expanded_subjects)

# Initialize 5-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=7)

# Prepare the dataframe for CSV and a list for JSON
folded_dataframe = pd.DataFrame({"Subject": expanded_subjects})
fold_splits = []

# Add columns for each fold
for fold, (train_index, val_index) in enumerate(kf.split(renamed_subjects), start=1):
# Get the train and validation subjects
train_subjects = [renamed_subjects[idx] for idx in train_index]
val_subjects = [renamed_subjects[idx] for idx in val_index]

# Expand train and validation subjects with all contrasts
train_expanded = [f"{subj}_{contrast}" for subj in train_subjects for contrast in contrasts]
val_expanded = [f"{subj}_{contrast}" for subj in val_subjects for contrast in contrasts]

# Save the split for JSON
fold_splits.append({
"train": train_expanded,
"val": val_expanded
})

# Add a new column to the dataframe indicating fold membership
fold_column = f'Fold_{fold}'
folded_dataframe[fold_column] = 'Excluded' # Initialize with 'Excluded'
folded_dataframe.loc[folded_dataframe['Subject'].isin(train_expanded), fold_column] = 'Train'
folded_dataframe.loc[folded_dataframe['Subject'].isin(val_expanded), fold_column] = 'Validation'

# Save the splits to a JSON file
with open('hc-leipzig-7t-mp2rage_fold_splits.json', 'w') as json_file:
json.dump(fold_splits, json_file, indent=4)

# Print confirmation message
print("Renamed and expanded fold splits saved to 'hc-leipzig-7t-mp2rage_fold_splits_expanded_renamed.json'")

# Display the first few rows of the updated dataframe
print(folded_dataframe.head())
Loading