Deviance not calculated in cv_MRF_diag #31

shirasal · 2021-06-27T10:44:56Z

After encountering an odd problem with cv_MRF_diag with Poisson data, I found a simple solution (with the help of @hezibu). The problem I had was that NaNs were produced when dividing zero by zero while creating preds_log. Other numbers divided by zero were taken care of with is.infinite but NaNs weren't.
I used the same procedure already implemented in the function, to set infinite values to zero (when numbers were divided by 0), but with is.nan instead if is.infinite.

Here is my suggested fix [my additions in rows 299 and 324:

function (data, symmetrise, n_nodes, n_cores, sample_seed, n_folds, 
                            n_fold_runs, n_covariates, compare_null, family, plot = TRUE, 
                            cached_model, cached_predictions, mod_labels = NULL) 
{
  if (!(family %in% c("gaussian", "poisson", "binomial"))) 
    stop("Please select one of the three family options:\n         \"gaussian\", \"poisson\", \"binomial\"")
  if (missing(symmetrise)) {
    symmetrise <- "mean"
  }
  if (missing(compare_null)) {
    compare_null <- FALSE
  }
  if (missing(n_folds)) {
    n_folds <- 10
  }
  else {
    if (sign(n_folds) == 1) {
      n_folds <- ceiling(n_folds)
    }
    else {
      stop("Please provide a positive integer for n_folds")
    }
  }
  if (missing(n_fold_runs)) {
    n_fold_runs <- n_folds
  }
  else {
    if (sign(n_fold_runs) == 1) {
      n_fold_runs <- ceiling(n_fold_runs)
    }
    else {
      stop("Please provide a positive integer for n_fold_runs")
    }
  }
  if (missing(n_cores)) {
    n_cores <- 1
  }
  else {
    if (sign(n_cores) != 1) {
      stop("Please provide a positive integer for n_cores")
    }
    else {
      if (sfsmisc::is.whole(n_cores) == FALSE) {
        stop("Please provide a positive integer for n_cores")
      }
    }
  }
  if (missing(n_nodes)) {
    warning("n_nodes not specified. using ncol(data) as default, assuming no covariates", 
            call. = FALSE)
    n_nodes <- ncol(data)
    n_covariates <- 0
  }
  else {
    if (sign(n_nodes) != 1) {
      stop("Please provide a positive integer for n_nodes")
    }
    else {
      if (sfsmisc::is.whole(n_nodes) == FALSE) {
        stop("Please provide a positive integer for n_nodes")
      }
    }
  }
  if (missing(n_covariates)) {
    n_covariates <- ncol(data) - n_nodes
  }
  else {
    if (sign(n_covariates) != 1) {
      stop("Please provide a positive integer for n_covariates")
    }
    else {
      if (sfsmisc::is.whole(n_covariates) == FALSE) {
        stop("Please provide a positive integer for n_covariates")
      }
    }
  }
  if (missing(sample_seed)) {
    sample_seed <- ceiling(runif(1, 0, 1e+05))
  }
  if (missing(cached_model)) {
    cat("Generating node-optimised Conditional Random Fields model", 
        "\n", sep = "")
    if (family == "binomial") {
      mrf <- MRFcov(data = data, symmetrise = symmetrise, 
                    n_nodes = n_nodes, n_cores = n_cores, family = "binomial")
      if (compare_null) {
        cat("\nGenerating Markov Random Fields model (no covariates)", 
            "\n", sep = "")
        mrf_null <- MRFcov(data = data[, 1:n_nodes], 
                           symmetrise = symmetrise, n_nodes = n_nodes, 
                           n_cores = n_cores, family = "binomial")
      }
    }
    if (family == "poisson") {
      mrf <- MRFcov(data = data, symmetrise = symmetrise, 
                    n_nodes = n_nodes, n_cores = n_cores, family = "poisson")
      if (compare_null) {
        cat("\nGenerating Markov Random Fields model (no covariates)", 
            "\n", sep = "")
        mrf_null <- MRFcov(data = data[, 1:n_nodes], 
                           symmetrise = symmetrise, n_nodes = n_nodes, 
                           n_cores = n_cores, family = "poisson")
      }
    }
    if (family == "gaussian") {
      mrf <- MRFcov(data = data, symmetrise = symmetrise, 
                    n_nodes = n_nodes, n_cores = n_cores, family = "gaussian")
      if (compare_null) {
        cat("\nGenerating Markov Random Fields model (no covariates)", 
            "\n", sep = "")
        mrf_null <- MRFcov(data = data[, 1:n_nodes], 
                           symmetrise = symmetrise, n_nodes = n_nodes, 
                           n_cores = n_cores, family = "gaussian")
      }
    }
  }
  else {
    mrf <- cached_model$mrf
    if (compare_null) {
      mrf_null <- cached_model$mrf_null
    }
  }
  if (family == "binomial") {
    folds <- caret::createFolds(rownames(data), n_folds)
    all_predictions <- if (missing(cached_predictions)) {
      predict_MRF(data, mrf)
    }
    else {
      cached_predictions$predictions
    }
    cv_predictions <- lapply(seq_len(n_folds), function(k) {
      test_data <- data[folds[[k]], ]
      predictions <- all_predictions[[2]][folds[[k]], 
      ]
      true_pos <- (predictions == test_data[, c(1:n_nodes)])[test_data[, 
                                                                       c(1:n_nodes)] == 1]
      false_pos <- (predictions == 1)[predictions != test_data[, 
                                                               c(1:n_nodes)]]
      true_neg <- (predictions == test_data[, c(1:n_nodes)])[test_data[, 
                                                                       c(1:n_nodes)] == 0]
      false_neg <- (predictions == 0)[predictions != test_data[, 
                                                               c(1:n_nodes)]]
      pos_pred <- sum(true_pos, na.rm = TRUE)/(sum(true_pos, 
                                                   na.rm = TRUE) + sum(false_pos, na.rm = TRUE))
      neg_pred <- sum(true_neg, na.rm = TRUE)/(sum(true_neg, 
                                                   na.rm = TRUE) + sum(false_neg, na.rm = TRUE))
      sensitivity <- sum(true_pos, na.rm = TRUE)/(sum(true_pos, 
                                                      na.rm = TRUE) + sum(false_neg, na.rm = TRUE))
      specificity <- sum(true_neg, na.rm = TRUE)/(sum(true_neg, 
                                                      na.rm = TRUE) + sum(false_pos, na.rm = TRUE))
      tot_pred <- (sum(true_pos, na.rm = TRUE) + sum(true_neg, 
                                                     na.rm = TRUE))/(length(as.matrix(test_data[, 
                                                                                                c(1:n_nodes)])))
      list(mean_pos_pred = mean(pos_pred, na.rm = TRUE), 
           mean_neg_pred = mean(neg_pred, na.rm = TRUE), 
           mean_tot_pred = mean(tot_pred, na.rm = TRUE), 
           mean_sensitivity = mean(sensitivity, na.rm = TRUE), 
           mean_specificity = mean(specificity, na.rm = TRUE))
    })
    plot_dat <- purrr::map_df(cv_predictions, magrittr::extract, 
                              c("mean_pos_pred", "mean_tot_pred", "mean_sensitivity", 
                                "mean_specificity"))
    if (compare_null) {
      all_predictions <- if (missing(cached_predictions)) {
        predict_MRF(data[, 1:n_nodes], mrf_null)
      }
      else {
        cached_predictions$null_predictions
      }
      cv_predictions_null <- lapply(seq_len(n_folds), 
                                    function(k) {
                                      test_data <- data[folds[[k]], 1:n_nodes]
                                      predictions <- all_predictions[[2]][folds[[k]], 
                                      ]
                                      true_pos <- (predictions == test_data)[test_data == 
                                                                               1]
                                      false_pos <- (predictions == 1)[predictions != 
                                                                        test_data]
                                      true_neg <- (predictions == test_data)[test_data == 
                                                                               0]
                                      false_neg <- (predictions == 0)[predictions != 
                                                                        test_data]
                                      pos_pred <- sum(true_pos, na.rm = TRUE)/(sum(true_pos, 
                                                                                   na.rm = TRUE) + sum(false_pos, na.rm = TRUE))
                                      neg_pred <- sum(true_neg, na.rm = TRUE)/(sum(true_neg, 
                                                                                   na.rm = TRUE) + sum(false_neg, na.rm = TRUE))
                                      sensitivity <- sum(true_pos, na.rm = TRUE)/(sum(true_pos, 
                                                                                      na.rm = TRUE) + sum(false_neg, na.rm = TRUE))
                                      specificity <- sum(true_neg, na.rm = TRUE)/(sum(true_neg, 
                                                                                      na.rm = TRUE) + sum(false_pos, na.rm = TRUE))
                                      tot_pred <- (sum(true_pos, na.rm = TRUE) + 
                                                     sum(true_neg, na.rm = TRUE))/(length(as.matrix(test_data)))
                                      list(mean_pos_pred = mean(pos_pred, na.rm = TRUE), 
                                           mean_neg_pred = mean(neg_pred, na.rm = TRUE), 
                                           mean_tot_pred = mean(tot_pred, na.rm = TRUE), 
                                           mean_sensitivity = mean(sensitivity, na.rm = TRUE), 
                                           mean_specificity = mean(specificity, na.rm = TRUE))
                                    })
      plot_dat_null <- purrr::map_df(cv_predictions_null, 
                                     magrittr::extract, c("mean_pos_pred", "mean_tot_pred", 
                                                          "mean_sensitivity", "mean_specificity"))
      if (is.null(mod_labels)) {
        plot_dat$model <- "CRF"
        plot_dat_null$model <- "MRF (no covariates)"
      }
      else {
        plot_dat$model <- mod_labels[1]
        plot_dat_null$model <- mod_labels[2]
      }
      plot_dat <- rbind(plot_dat, plot_dat_null)
      if (plot) {
        plot_binom_cv_diag_optim(plot_dat, compare_null = TRUE)
      }
      else {
        return(plot_dat)
      }
    }
    else {
      if (plot) {
        plot_binom_cv_diag_optim(plot_dat, compare_null = FALSE)
      }
      else {
        return(plot_dat)
      }
    }
  }
  if (family == "gaussian") {
    folds <- caret::createFolds(rownames(data), n_folds)
    cv_predictions <- lapply(seq_len(n_folds), function(k) {
      test_data <- data[folds[[k]], ]
      predictions <- predict_MRF(test_data, mrf)
      Rsquared <- vector()
      MSE <- vector()
      for (i in seq_len(ncol(predictions))) {
        Rsquared[i] <- cor.test(test_data[, i], predictions[, 
                                                            i])[[4]]
        MSE[i] <- mean((test_data[, i] - predictions[, 
                                                     i])^2)
      }
      list(Rsquared = mean(Rsquared, na.rm = T), MSE = mean(MSE, 
                                                            na.rm = T))
    })
    plot_dat <- purrr::map_df(cv_predictions, magrittr::extract, 
                              c("Rsquared", "MSE"))
    if (compare_null) {
      cv_predictions_null <- lapply(seq_len(n_folds), 
                                    function(k) {
                                      test_data <- data[folds[[k]], 1:n_nodes]
                                      predictions <- predict_MRF(test_data, mrf_null)
                                      Rsquared <- vector()
                                      MSE <- vector()
                                      for (i in seq_len(ncol(predictions))) {
                                        Rsquared[i] <- cor.test(test_data[, i], 
                                                                predictions[, i])[[4]]
                                        MSE[i] <- mean((test_data[, i] - predictions[, 
                                                                                     i])^2)
                                      }
                                      list(Rsquared = mean(Rsquared, na.rm = T), 
                                           MSE = mean(MSE, na.rm = T))
                                    })
      plot_dat_null <- purrr::map_df(cv_predictions_null, 
                                     magrittr::extract, c("Rsquared", "MSE"))
      if (is.null(mod_labels)) {
        plot_dat$model <- "CRF"
        plot_dat_null$model <- "MRF (no covariates)"
      }
      else {
        plot_dat$model <- mod_labels[1]
        plot_dat_null$model <- mod_labels[2]
      }
      plot_dat <- rbind(plot_dat, plot_dat_null)
      if (plot) {
        plot_gauss_cv_diag_optim(plot_dat, compare_null = TRUE)
      }
      else {
        return(plot_dat)
      }
    }
    else {
      if (plot) {
        plot_gauss_cv_diag_optim(plot_dat, compare_null = FALSE)
      }
      else {
        return(plot_dat)
      }
    }
  }
  if (family == "poisson") {
    folds <- caret::createFolds(rownames(data), n_folds)
    cv_predictions <- lapply(seq_len(n_folds), function(k) {
      test_data <- data[folds[[k]], ]
      predictions <- predict_MRF(test_data, mrf)
      Deviance <- vector()
      MSE <- vector()
      for (i in seq_len(ncol(predictions))) {
        preds_log <- log(test_data[, i]/predictions[, 
                                                    i])
        preds_log[is.infinite(preds_log)] <- 0
        preds_log[is.nan(preds_log)] <- 0
        test_data_wzeros <- test_data
        test_data_wzeros[predictions[, i] == 0, i] <- 0
        Deviance[i] <- mean(2 * sum(test_data_wzeros[, 
                                                     i] * preds_log - (test_data_wzeros[, i] - 
                                                                         predictions[, i])))
        MSE[i] <- mean((test_data[, i] - predictions[, 
                                                     i])^2)
      }
      list(Deviance = mean(Deviance, na.rm = T), MSE = mean(MSE, 
                                                            na.rm = T))
    })
    plot_dat <- purrr::map_df(cv_predictions, magrittr::extract, 
                              c("Deviance", "MSE"))
    if (compare_null) {
      cv_predictions_null <- lapply(seq_len(n_folds), 
                                    function(k) {
                                      test_data <- data[folds[[k]], 1:n_nodes]
                                      predictions <- predict_MRF(test_data, mrf_null)
                                      Deviance <- vector()
                                      MSE <- vector()
                                      for (i in seq_len(ncol(predictions))) {
                                        preds_log <- log(test_data[, i]/predictions[, 
                                                                                    i])
                                        preds_log[is.infinite(preds_log)] <- 0
                                        preds_log[is.nan(preds_log)] <- 0
                                        test_data_wzeros <- test_data
                                        test_data_wzeros[predictions[, i] == 0, 
                                                         i] <- 0
                                        Deviance[i] <- mean(2 * sum(test_data_wzeros[, 
                                                                                     i] * preds_log - (test_data_wzeros[, i] - 
                                                                                                         predictions[, i])))
                                        MSE[i] <- mean((test_data[, i] - predictions[, 
                                                                                     i])^2)
                                      }
                                      list(Deviance = mean(Deviance, na.rm = T), 
                                           MSE = mean(MSE, na.rm = T))
                                    })
      plot_dat_null <- purrr::map_df(cv_predictions_null, 
                                     magrittr::extract, c("Deviance", "MSE"))
      if (is.null(mod_labels)) {
        plot_dat$model <- "CRF"
        plot_dat_null$model <- "MRF (no covariates)"
      }
      else {
        plot_dat$model <- mod_labels[1]
        plot_dat_null$model <- mod_labels[2]
      }
      plot_dat <- rbind(plot_dat, plot_dat_null)
      if (plot) {
        plot_poiss_cv_diag_optim(plot_dat, compare_null = TRUE)
      }
      else {
        return(plot_dat)
      }
    }
    else {
      if (plot) {
        plot_poiss_cv_diag_optim(plot_dat, compare_null = FALSE)
      }
      else {
        return(plot_dat)
      }
    }
  }
}

Does this look adequate?

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Deviance not calculated in cv_MRF_diag #31

Deviance not calculated in cv_MRF_diag #31

shirasal commented Jun 27, 2021 •

edited

Loading

Deviance not calculated in cv_MRF_diag #31

Deviance not calculated in cv_MRF_diag #31

Comments

shirasal commented Jun 27, 2021 • edited Loading

shirasal commented Jun 27, 2021 •

edited

Loading