Merge pull request #31 from DCAN-Labs/sessions-tsv

sessions.tsv Implementation
DCAN-Labs · Feb 22, 2023 · 2f4bc21 · 2f4bc21
2 parents 52ddc95 + 287334c
commit 2f4bc21
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 38 deletions.
diff --git a/docs/participants.md b/docs/participants.md
@@ -1,28 +1,28 @@
 ## Multiple Participant Requirements
 
-### `participants.tsv`
+### `sub-{}_sessions.tsv`
 
 #### Format Specification Example
 
-| participant_id | session | age |
-|:-:|:-:|:-:|
-| sub-123456 | ses-A | 1 |
+| session | age |
+|:-:|:-:|
+| ses-A | 1 |
 
-NOTE: `sub-` and `ses-` prefixes are currently required for `participant_id` and `session` values.
+NOTE: the `ses-` prefix is currently required for `session` values.
 
 #### Content
 
-When running multiple subjects and/or sessions, the `participants.tsv` file in the `bids_dir` must include an `age` column. In that column, each row has one positive integer, the participant's age in months at that session.
+When running multiple subjects and/or sessions, the `sub-{}_sessions.tsv` file in each subject's directory (at the session directory level) must include an `age` column. In that column, each row has one positive integer, the participant's age in months at that session.
 
 <br />
 <img src="https://user-images.githubusercontent.com/102316699/184005162-0b1ebb76-3e5a-4bd3-b258-a686272e2ecc.png" width=555em style="margin-left: auto; margin-right: auto; display: block" />
 <br />
 
-If the user wants to specify the brain height (shown above) for each subject session, then the user must also include an additional `"brain_z_size"` column. That column also must have a positive integer for each row: the size of the participant's brain along the z-axis in millimeters. The `participants.tsv` file should look like the example below:
+If the user wants to specify the brain height (shown above) for each subject session, then the user must also include an additional `"brain_z_size"` column. That column also must have a positive integer for each row: the size of the participant's brain along the z-axis in millimeters. The `sessions.tsv` file for a given subject should look like the example below:
 
-| participant_id | session | age | brain_z_size |
-|:-:|:-:|:-:|:-:|
-| sub-123456 | ses-X | 1 | 120 |
-| sub-234567 | ses-X | 6 | 145 |
+| session | age | brain_z_size |
+:-:|:-:|:-:|
+| ses-X | 1 | 120 |
+| ses-X | 6 | 145 |
 
 Without a `brain_z_size` column, `CABINET` will calculate the `brain_z_size` value based on a table with [BCP](https://babyconnectomeproject.org/) participants' average head radius per age. That table is called `age_to_avg_head_radius_BCP.csv` under the `data` directory.
diff --git a/docs/usage.md b/docs/usage.md
@@ -35,8 +35,8 @@ optional arguments:
                         Positive integer, the participant's age in months. For
                         example, -age 5 would mean the participant is 5 months
                         old. Include this argument unless the age in months is
-                        specified in the participants.tsv file inside the BIDS
-                        input directory.
+                        specified in the sub-{}_sessions.tsv file inside the each subjects'
+                        BIDS input directory.
   -end {prebibsnet,bibsnet,postbibsnet}, --ending-stage {prebibsnet,bibsnet,postbibsnet}
                         Name of the stage to run last. By default, this will
                         be the postbibsnet stage. Valid choices: prebibsnet,
@@ -71,7 +71,7 @@ optional arguments:
                         copied into the "bibsnet" derivatives directory. 
                         Example: /path/to/working/directory
   -z, --brain-z-size    Include this flag to infer participants' brain height
-                        (z) using the participants.tsv brain_z_size column.
+                        (z) using the sub-{}_sessions.tsv brain_z_size column.
                         Otherwise, CABINET will estimate the brain height from
                         the participant age and averages of a large sample of
                         infant brain heights.

diff --git a/run.py b/run.py
@@ -146,7 +146,7 @@ def get_params_from_JSON(stage_names, logger):
         help=("Positive integer, the participant's age in months. For "
               "example, -age 5 would mean the participant is 5 months old. "
               "Include this argument unless the age in months is specified in "
-              "the participants.tsv file inside the BIDS input directory.")
+              "each subject's sub-{}_sessions.tsv file inside its BIDS input directory.")
     )
     parser.add_argument(
         "-end", "--ending-stage", dest="end",
@@ -195,7 +195,7 @@ def get_params_from_JSON(stage_names, logger):
     parser.add_argument(
         "-z", "--brain-z-size", action="store_true",
         help=("Include this flag to infer participants' brain height (z) "
-              "using the participants.tsv brain_z_size column. Otherwise, "
+              "using the sub-{}_sessions.tsv brain_z_size column. Otherwise, "
               "CABINET will estimate the brain height from the participant "
               "age and averages of a large sample of infant brain heights.")  # TODO rephrase
     )
@@ -282,16 +282,16 @@ def validate_cli_args(cli_args, stage_names, parser, logger):
                          "that your participant_label and session are correct."
                          .format(sub_ses_dir))
 
-        # User only needs participants.tsv if they didn't specify age_months
+        # User only needs sessions.tsv if they didn't specify age_months
         if not j_args["common"].get("age_months"): 
-            sub_ses_IDs[ix]["age_months"] = read_from_participants_tsv(
+            sub_ses_IDs[ix]["age_months"] = read_from_sessions_tsv(
                 j_args, logger, "age", *sub_ses
             )
 
-        # Infer brain_z_size for this sub_ses using participants.tsv if the 
+        # Infer brain_z_size for this sub_ses using sessions.tsv if the 
         # user said to (by using --brain-z-size flag), otherwise infer it 
         # using age_months and the age-to-head-radius table .csv file
-        sub_ses_IDs[ix]["brain_z_size"] = read_from_participants_tsv(
+        sub_ses_IDs[ix]["brain_z_size"] = read_from_sessions_tsv(
                 j_args, logger, "brain_z_size", *sub_ses
             ) if cli_args["brain_z_size"] else get_brain_z_size(
                 sub_ses_IDs[ix]["age_months"], j_args, logger
@@ -493,36 +493,32 @@ def ensure_j_args_has_bids_subdirs(j_args, derivs, sub_ses, default_parent):
     return j_args
 
 
-def read_from_participants_tsv(j_args, logger, col_name, *sub_ses):
+def read_from_sessions_tsv(j_args, logger, col_name, *sub_ses):
     """
     :param j_args: Dictionary containing all args from parameter .JSON file
     :param logger: logging.Logger object to show messages and raise warnings
-    :param col_name: String naming the column of participants.tsv to return
+    :param col_name: String naming the column of sessions.tsv to return
                      a value from (for this subject or subject-session)
+    :param sub_ses: Tuple containing subject and session labels. 
     :return: Int, either the subject's age (in months) or the subject's
-             brain_z_size (depending on col_name) as listed in participants.tsv
+             brain_z_size (depending on col_name) as listed in sessions.tsv
     """
-    columns = {x: "str" for x in (col_name, "session", "participant_id")}
+    columns = {x: "str" for x in (col_name, "session")}
 
-    # Read in participants.tsv
-    part_tsv_df = pd.read_csv(
-        os.path.join(j_args["common"]["bids_dir"],
-                     "participants.tsv"), sep="\t", dtype=columns
+    # Read in sessions.tsv
+    ses_tsv_df = pd.read_csv(
+        os.path.join(j_args["common"]["bids_dir"], sub_ses[0],
+                     "{}_sessions.tsv".format(sub_ses[0])), sep="\t", dtype=columns
     )
-    # Subject and session column names in participants.tsv
-    sub_ID_col = "participant_id"
+    # Subject and session column names in sessions.tsv
     ses_ID_col = "session"
 
-    # Get and return the col_name value from participants.tsv
-    subj_row = part_tsv_df[
-        part_tsv_df[sub_ID_col] == ensure_prefixed(sub_ses[0], "sub-")  # TODO part_tsv_df[sub_ID_col] = part_tsv_df[sub_ID_col].apply(ensure_prefixed(...))
+    # Get and return the col_name value from sessions.tsv
+    subj_row = ses_tsv_df[
+        ses_tsv_df[ses_ID_col] == ensure_prefixed(sub_ses[1], "ses-")  # TODO part_tsv_df[sub_ID_col] = part_tsv_df[sub_ID_col].apply(ensure_prefixed(...))
     ]  # select where "participant_id" matches
-    if len(sub_ses) > 1:
-        subj_row = subj_row[
-            subj_row[ses_ID_col] == ensure_prefixed(sub_ses[1], "ses-")  # TODO part_tsv_df[ses_ID_col] = part_tsv_df[ses_ID_col].apply(ensure_prefixed(...))
-        ]  # select where "session" matches
     if j_args["common"]["verbose"]:
-        logger.info(f"Subject details from participants.tsv row:\n{subj_row}")
+        logger.info(f"Subject details from sessions.tsv row:\n{subj_row}")
     return int(subj_row[col_name])