i #324 Deleted refresh function

- Reconfigured how `create_file_directory` function obtains paths from config file
sailuh · Dec 11, 2024 · 8e5f5d5 · 8e5f5d5
1 parent a6e25cc
commit 8e5f5d5
Show file tree

Hide file tree

Showing 3 changed files with 50 additions and 186 deletions.
diff --git a/R/config.R b/R/config.R
@@ -44,30 +44,30 @@ parse_config <- function(config_path) {
 #' @export
 create_file_directory <- function(conf, verbose= FALSE) {
   # Create the git_repo folder
-  create_file_path(conf$version_control$log)
+  create_file_path(conf[["version_control"]][["log"]])
 
   # Create the mailing_list directory, if needed
-  if (!is.null(conf$mailing_list)) {
+  if (!is.null(conf[["mailing_list"]])) {
     # Check if there is mod_mbox
-    if (!is.null(conf$mailing_list$mod_mbox)) {
+    if (!is.null(conf[["mailing_list"]][["mod_mbox"]])) {
       # Create for each project key
-      project_keys <- names(conf$mailing_list$mod_mbox)
+      project_keys <- names(conf[["mailing_list"]][["mod_mbox"]])
       for (key in project_keys) {
-        mailing_list <- conf$mailing_list$mod_mbox[[key]]
-        create_file_path(mailing_list$save_folder_path)
+        mailing_list <- conf[["mailing_list"]][["mod_mbox"]][[key]]
+        create_file_path(mailing_list[["save_folder_path"]])
       }
     } else {
       if (verbose) {
         message("No mod_mbox found")
       }
     }
     # Check if there is pipermail
-    if (!is.null(conf$mailing_list$pipermail)) {
+    if (!is.null(conf[["mailing_list"]][["pipermail"]])) {
       # Create for each project key
-      project_keys <- names(conf$mailing_list$pipermail)
+      project_keys <- names(conf[["mailing_list"]][["pipermail"]])
       for (key in project_keys) {
-        mailing_list <- conf$mailing_list$pipermail[[key]]
-        create_file_path(mailing_list$save_folder_path)
+        mailing_list <- conf[["mailing_list"]][["pipermail"]][[key]]
+        create_file_path(mailing_list[["save_folder_path"]])
       }
     } else {
       if (verbose) {
@@ -81,48 +81,48 @@ create_file_directory <- function(conf, verbose= FALSE) {
   }
 
   # Create the issue_tracker directory, if needed
-  if (!is.null(conf$issue_tracker)) {
+  if (!is.null(conf[["issue_tracker"]])) {
     # Check for jira
-    if (!is.null(conf$issue_tracker$jira)) {
+    if (!is.null(conf[["issue_tracker"]][["jira"]])) {
       # Create for each project key
-      project_keys <- names(conf$issue_tracker$jira)
+      project_keys <- names(conf[["issue_tracker"]][["jira"]])
       for (key in project_keys) {
-        issue_tracker <- conf$issue_tracker$jira[[key]]
-        create_file_path(issue_tracker$issues)
-        create_file_path(issue_tracker$issue_comments)
+        issue_tracker <- conf[["issue_tracker"]][["jira"]][[key]]
+        create_file_path(issue_tracker[["issues"]])
+        create_file_path(issue_tracker[["issue_comments"]])
       }
     } else {
       if (verbose) {
         message("No jira found")
       }
     }
     # Check for github
-    if (!is.null(conf$issue_tracker$github)) {
+    if (!is.null(conf[["issue_tracker"]][["github"]])) {
       # Create for each project key
-      project_keys <- names(conf$issue_tracker$github)
+      project_keys <- names(conf[["issue_tracker"]][["github"]])
       for (key in project_keys) {
-        issue_tracker <- conf$issue_tracker$github[[key]]
-        create_file_path(issue_tracker$issue_or_pr_comment)
-        create_file_path(issue_tracker$issue)
-        create_file_path(issue_tracker$issue_search)
-        create_file_path(issue_tracker$issue_event)
-        create_file_path(issue_tracker$pull_request)
-        create_file_path(issue_tracker$commit)
-        create_file_path(issue_tracker$discussion)
+        issue_tracker <- conf[["issue_tracker"]][["github"]][[key]]
+        create_file_path(issue_tracker[["issue_or_pr_comment"]])
+        create_file_path(issue_tracker[["issue"]])
+        create_file_path(issue_tracker[["issue_search"]])
+        create_file_path(issue_tracker[["issue_event"]])
+        create_file_path(issue_tracker[["pull_request"]])
+        create_file_path(issue_tracker[["commit"]])
+        create_file_path(issue_tracker[["discussion"]])
       }
     } else {
       if (verbose) {
         message("No github found")
       }
     }
     # Check for bugzilla
-    if (!is.null(conf$issue_tracker$bugzilla)) {
+    if (!is.null(conf[["issue_tracker"]][["bugzilla"]])) {
       # Create for each project key
-      project_keys <- names(conf$issue_tracker$bugzilla)
+      project_keys <- names(conf[["issue_tracker"]][["bugzilla"]])
       for (key in project_keys) {
-        issue_tracker <- conf$issue_tracker$bugzilla[[key]]
-        create_file_path(issue_tracker$issues)
-        create_file_path(issue_tracker$issue_comments)
+        issue_tracker <- conf[["issue_tracker"]][["bugzilla"]][[key]]
+        create_file_path(issue_tracker[["issues"]])
+        create_file_path(issue_tracker[["issue_comments"]])
       }
     } else {
       if (verbose) {
@@ -136,36 +136,36 @@ create_file_directory <- function(conf, verbose= FALSE) {
   }
 
   # Create the tools directory, if needed
-  if (!is.null(conf$tool)) {
+  if (!is.null(conf[["tool"]])) {
     # Check for dv8
-    if (!is.null(conf$tool$dv8)) {
-      create_file_path(conf$tool$dv8$folder_path)
+    if (!is.null(conf[["tool"]][["dv8"]])) {
+      create_file_path(conf[["tool"]][["dv8"]][["folder_path"]])
     } else {
       if (verbose) {
         message("dv8 is unused")
       }
     }
     # Check for srcml
-    if (!is.null(conf$tool$srcml)) {
-      create_file_path(conf$tool$srcml$srcml_path)
+    if (!is.null(conf[["tool"]][["srcml"]])) {
+      create_file_path(conf[["tool"]][["srcml"]][["srcml_path"]])
     } else {
       if (verbose) {
         message("srcml is unused")
       }
     }
     # Check for pattern4
-    if (!is.null(conf$tool$pattern4)) {
-      create_file_path(conf$tool$pattern4$class_folder_path)
-      create_file_path(conf$tool$pattern4$output_filepath)
+    if (!is.null(conf[["tool"]][["pattern4"]])) {
+      create_file_path(conf[["tool"]][["pattern4"]][["class_folder_path"]])
+      create_file_path(conf[["tool"]][["pattern4"]][["output_filepath"]])
     } else {
       if (verbose) {
         message("pattern4 is unused")
       }
     }
     # Check for understand
-    if (!is.null(conf$tool$understand)) {
-      create_file_path(conf$tool$understand$project_path)
-      create_file_path(conf$tool$understand$output_path)
+    if (!is.null(conf[["tool"]][["understand"]])) {
+      create_file_path(conf[["tool"]][["understand"]][["project_path"]])
+      create_file_path(conf[["tool"]][["understand"]][["output_path"]])
     }
   } else {
     if (verbose) {
@@ -184,12 +184,8 @@ create_file_directory <- function(conf, verbose= FALSE) {
 #' @export
 create_file_path <- function(filepath, verbose= TRUE) {
   if (!is.null(filepath)) {
-    # Check if the filepath already exists
-    if (dir.exists(filepath)) {
-      if (verbose) {
-        message("Filepath: ", filepath, " already exists.")
-      }
-    } else {
+    # Check if the filepath doesn't exists
+    if (!dir.exists(filepath)) {
       # Create the filepath
       dir.create(filepath, recursive= TRUE)
       if (verbose) {

diff --git a/R/github.R b/R/github.R
@@ -504,141 +504,6 @@ github_api_discussions <- function(token, owner, repo, save_folder_path, max_pag
   }
 }
 
-#' Refresh for Github Discussions downloader
-#'
-#' Download Discussions from GraphQL API endpoint.
-#' Uses a query to only obtain data defined by the user.
-#' Checks if the folder to download is empty, and calls the regular downloader it it is.
-#' It then checks the downloaded JSON filenames to compare with the most recent discussion createdAt dates,
-#' formatted as POSIXct (%Y-%m-%dT%H:%M:%SZ, UTC).
-#' GitHub API endpoints return data in pages, each containing by default 100 entries.
-#' This function by default iterates over the next page in order to download all the
-#' project's data available from the endpoint (up to the remaining
-#' available requests in the used user's token).
-#' The user can also define the maximum number of pages to download.
-#'
-#' @param token Your Github API token
-#' @param owner Github's repository owner (e.g. sailuh)
-#' @param repo Github's repository name (e.g. kaiaulu)
-#' @param save_folder_path A folder path to save the downloaded json pages "as-is".
-#' @references For details, see \url{https://docs.github.com/en/graphql/guides/using-the-graphql-api-for-discussions}
-#' @export
-github_api_discussions_refresh <- function(token, owner, repo, save_folder_path) {
-  # List all json files within the save_path_folder
-  print("List files in save_folder_path: ", list.files(save_folder_path))
-  print(save_folder_path)
-  json_contents <- list.files(save_folder_path, pattern= "\\.json$", full.names = TRUE, recursive= TRUE)
-  contents <- list.files(save_folder_path, all.files = TRUE)
-  print(json_contents)
-  print(contents)
-  print(length(contents)==0)
-
-  # If there are no json files, download all discussions
-  if (length(contents) == 0 ) {
-    # Run the regular downloader
-    discussions <- github_api_discussions(token, owner, repo, save_folder_path)
-    return (discussions)
-  }
-
-  # Get the name of the file with the most recent date
-  latest_discussion <- contents[which.max(sapply(contents, function (filename) {
-    # Use regex to get timestamp
-    as.numeric(sub(".*_(\\d+)\\.json$", "\\1", basename(filename)))
-  }))]
-
-  # Read the JSON file
-  json_data <- fromJSON(file.path(save_folder_path, latest_discussion), simplifyVector = FALSE)
-  # Get the created_at values
-  created_at <- sapply(json_data[["data"]][["repository"]][["discussions"]][["edges"]],
-                       function (edge) edge[["node"]][["createdAt"]])
-  # Find the latest created_at
-  created_at <- max(created_at)
-  # Convert to a POSIXct object
-  created_at <- as.POSIXct(created_at, format="%Y-%m-%dT%H:%M:%SZ", tz="UTC")
-  # Add one second
-  created_at <- created_at + 1
-  # Convert back to original
-  created_at <- format(created_at, "%Y-%m-%dT%H:%M:%SZ")
-
-  page_number <- 1
-  cursor <- NULL
-  while (TRUE) {
-    # Form a new query
-    query <- paste0('query {
-      repository (owner:"', owner, '", name:"', repo, '") {
-        discussions (first: 100, orderBy: {field: CREATED_AT, direction: ASC}, after: "', created_at, '") {
-          pageInfo {
-            hasNextPage
-            endCursor
-          }
-          edges {
-            node {
-              title
-              bodyText
-              author { login }
-              createdAt
-              category { name }
-              id
-              answer { id }
-              comments(first: 100) {
-                edges {
-                  node {
-                    discussion { id }
-                    bodyText
-                    author { login }
-                    id
-                    createdAt
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }')
-    # Make a new API call with the query
-    gh_response <- gh::gh("POST /graphql", query=query, .token=token)
-
-    # Check if response has new discussions
-    if (length(gh_response[["data"]][["repository"]][["discussions"]][["edges"]]) == 0 && verbose) {
-      message("No new discussions")
-      break
-    }
-
-    # Make the list of all created_dates
-    created_dates <- sapply(gh_response[["data"]][["repository"]][["discussions"]][["edges"]],
-                            function(edge) edge[["node"]][["createdAt"]])
-    # Remove NULL entries from created_dates
-    created_dates <- Filter(Negate(is.null), created_dates)
-
-    # Convert to POSIXct date objects
-    date_objects <- as.POSIXct(created_dates, format="%Y-%m-%dT%H:%M:%SZ", tz="UTC")
-
-    # Find the greatest and smallest date
-    latest_date <- max(date_objects)
-    latest_date_unix <- as.numeric(latest_date)
-    oldest_date <- min(date_objects)
-    oldest_date_unix <- as.numeric(oldest_date)
-
-    # Construct the file_name
-    file_name <- paste0(save_folder_path,
-                        owner, "_", repo, "_", oldest_date_unix, "_", latest_date_unix,
-                        ".json")
-    # Save the json to the folder path
-    write_json(gh_response, file_name, pretty=TRUE, auto_unbox=TRUE)
-
-    has_next_page <- gh_response[["data"]][["repository"]][["discussions"]][["pageInfo"]][["hasNextPage"]]
-
-    if (has_next_page){
-      cursor <- gh_response[["data"]][["repository"]][["discussions"]][["pageInfo"]][["endCursor"]]
-      page_number <- page_number + 1
-    }
-    else {
-      break
-    }
-  }
-}
-
 #' Parse Discussions JSON to Table
 #'
 #' @description This function parses through the JSON of Github Discussions
@@ -676,6 +541,9 @@ github_parse_discussions <- function(api_response) {
 #' downloaded from the `github_api_discussions` function, and turns it into
 #' a table. This function only parses through the Discussion comments within
 #' the JSON, and does not parse the discussion themselves.
+#' `parent_discussion_id` is a string value that will match with a `discussion_id`
+#' from the `github_parse_discussions` function, indicating which discussion the comment
+#' is a response to.
 #'
 #' @param api_response API response obtained from `github_api_discussions` function.
 #' @return The parsed table of Discussion Comments.

diff --git a/vignettes/download_github_comments.Rmd b/vignettes/download_github_comments.Rmd
@@ -47,14 +47,14 @@ This can be done manually, or they can be obtained from the parsed configuration
 Naturally, the file paths used for the downloaders must also exist on your local device.
 
 ```{r warning=FALSE}
+# Parsed configuration file for required information
+conf <- parse_config("../conf/kaiaulu.yml")
+
 owner <- get_github_owner(conf, "project_key_1") # Has to match github organization (e.g. github.com/sailuh)
 repo <- get_github_repo(conf, "project_key_1") # Has to match github repository (e.g. github.com/sailuh/perceive)
 # your file github_token (a text file) contains the GitHub token API
 token <- scan("~/.ssh/github_token",what="character",quiet=TRUE)
 
-# Parsed configuration file for required information
-conf <- parse_config("../conf/kaiaulu.yml")
-
 # Path you wish to save all raw data.
 save_path_issue_refresh <- get_github_issue_search_path(conf, "project_key_1")
 save_path_issue <- get_github_issue_path(conf, "project_key_1")