CCBR · bianjh-cloud · Mar 23, 2026 · Mar 23, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/CITATION.cff b/CITATION.cff
diff --git a/R/convert_human_gene_list.R b/R/convert_human_gene_list.R
@@ -8,29 +8,44 @@
 #' @export
 #'
 #' @return Character vector of mapped mouse genes
-convert_human_gene_list <- function(genes) {
-  # data variables must be initialized to silence the R CMD check note:
-  #    'no visible binding for global variable'
-  gns <- NULL
+convert_human_gene_list <- function(genes = NULL) {
-convert_human_gene_list <- function(genes = NULL) {
+convert_mouse_to_human_gene_list <- function(genes = NULL) {
-convert_human_gene_list <- function(genes = NULL) {
+convert_mouse_to_human_gene_list <- function(genes = NULL) {
+  # Validate input: must be character vector
+  if (!is.character(genes)) {
+    stop(
+      "Input 'genes' must be a character vector, not ",
+      paste(class(genes), collapse = " "),
+      call. = FALSE
+    )
+  }
 
-  # TODO: make this function generic enough to convert any genome to any other
+  # Handle empty character vector
+  if (length(genes) == 0L) {
+    return(character(0))
+  }
+
+  # Map human SYMBOL to ENTREZID
   egs <- AnnotationDbi::mapIds(
     org.Hs.eg.db::org.Hs.eg.db,
-    gns,
+    genes,
     "ENTREZID",
     "SYMBOL"
   )
+
+  # Map human ENTREZID to mouse ENTREZID
   mapped <- AnnotationDbi::select(
     Orthology.eg.db::Orthology.eg.db,
-    egs,
+    as.character(egs),
     "Mus.musculus",
     "Homo.sapiens"
   )
+
+  # Map mouse ENTREZID to SYMBOL
   mapped$MUS <- AnnotationDbi::mapIds(
     org.Mm.eg.db::org.Mm.eg.db,
     as.character(mapped$Mus.musculus),
     "SYMBOL",
     "ENTREZID"
   )
+
   return(as.character(unlist(mapped$MUS)))
 }
diff --git a/R/make_volcano_plot.R b/R/make_volcano_plot.R
@@ -5,7 +5,7 @@
 #' differential expression table generated by Seurat
 #'
 #' @param de_table Differential expression table generated by Seurat containing
-#' p_val, pct.1, pct.2 avg_log2FC and p_val_adj
+#' p_val, pct.1, pct.2 avg_log_2_fc and p_val_adj
 #' @param logfc Boolean to color genes meeting logfc threshold of 1.5
 #' @param pval Boolean to color genes meeting p-value threshold of 0.05
 #' @param significant Boolean to color genes that meet both logfc and p-value
@@ -26,33 +26,36 @@ make_volcano_plot <- function(
 ) {
   log10_p <- -log10(de_table$p_val_adj)
   log10_p[which(de_table$p_val_adj == 0)] <- 500
-  avg_log2FC <- de_table$avg_log2FC
+  avg_log_2_fc <- de_table$avg_log_2_fc
   gene <- rownames(de_table)
   significance <- vector(length = length(log10_p))
   significance[] <- "NotSignificant"
   if (logfc == TRUE) {
-    significance[which(abs(de_table$avg_log2FC) > 1.5)] <- "avg_log2FC > 1.5"
+    significance[which(
+      abs(de_table$avg_log_2_fc) > 1.5
+    )] <- "avg_log_2_fc > 1.5"
   }
   if (isTRUE(pval)) {
     significance[which(de_table$p_val_adj < 0.05)] <- "p_val_adj < 0.05"
   }
   if (isTRUE(significant)) {
     significance[which(
-      abs(de_table$avg_log2FC) > 1.5 &
+      abs(de_table$avg_log_2_fc) > 1.5 &
         de_table$p_val_adj < 0.05
-    )] <- "p_val_adj < 0.05 & avg_log2FC > 1.5"
+    )] <- "p_val_adj < 0.05 & avg_log_2_fc > 1.5"
   }
-  df <- as.data.frame(cbind(avg_log2FC, log10_p, significance, gene))
+  df <- as.data.frame(cbind(avg_log_2_fc, log10_p, significance, gene))
+  colnames(df) <- c("avg_log_2_fc", "log10_p", "significance", "gene")
   rownames(df) <- rownames(de_table)
   df[, 1] <- as.numeric(df[, 1])
   df[, 2] <- as.numeric(df[, 2])
   df[, 3] <- factor(
     df[, 3],
     levels = c(
       "NotSignificant",
-      "avg_log2FC > 1.5",
+      "avg_log_2_fc > 1.5",
       "p_val_adj < 0.05",
-      "p_val_adj < 0.05 & avg_log2FC > 1.5"
+      "p_val_adj < 0.05 & avg_log_2_fc > 1.5"
     )
   )
   data_subset <- NULL
@@ -68,13 +71,13 @@ make_volcano_plot <- function(
 
   volcano <- ggplot2::ggplot(
     df,
-    ggplot2::aes(x = avg_log2FC, y = log10_p, col = significance)
+    ggplot2::aes(x = avg_log_2_fc, y = log10_p, col = significance)
   ) +
     ggplot2::geom_point() +
     #      geom_vline(xintercept = 0, color = "black", linewidth = 1.5)+
     ggplot2::xlim(
-      -ceiling(max(abs(df$avg_log2FC))),
-      ceiling(max(abs(df$avg_log2FC)))
+      -ceiling(max(abs(df$avg_log_2_fc))),
+      ceiling(max(abs(df$avg_log_2_fc)))
     )
 
   if (!is.null(data_subset)) {

diff --git a/R/seurat_clustering.R b/R/seurat_clustering.R
@@ -15,6 +15,25 @@
 #' @return The updated Seurat single cell object with recalculated PCA,
 #' neighbors, UMAP and clusters
 seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) {
+  # Validate npcs_in: must be a positive integer >= 3 (for UMAP n.components)
+  if (!is.numeric(npcs_in) || npcs_in != as.integer(npcs_in) || npcs_in <= 0) {
+    rlang::abort("npcs_in must be a positive integer")
+  }
+
+  if (npcs_in < 3) {
+    rlang::abort("npcs_in must be at least 3 for UMAP computation")
+  }
+
+  # Validate resolution: must be positive
+  if (!is.numeric(resolution) || resolution <= 0) {
+    rlang::abort("resolution must be a positive numeric value")
+  }
+
+  # Validate algorithm: must be 1, 2, or 3
+  if (!is.numeric(algorithm) || !(algorithm %in% c(1, 2, 3))) {
+    rlang::abort("algorithm must be 1 (Louvain), 2 (Leiden), or 3 (SLM)")
+  }
+
   so <- Seurat::RunPCA(
     object = so_in,
     features = Seurat::VariableFeatures(object = so_in),
@@ -24,8 +43,8 @@ seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) {
   so <- Seurat::FindNeighbors(so, dims = 1:npcs_in)
   so <- Seurat::FindClusters(
     so,
-    resolution = 0.8,
-    algorithm = 3,
+    resolution = resolution,
+    algorithm = algorithm,
     verbose = TRUE
   )
   so <- Seurat::RunUMAP(so, dims = 1:npcs_in, n.components = 3)

diff --git a/man/convert_human_gene_list.Rd b/man/convert_human_gene_list.Rd
diff --git a/man/make_volcano_plot.Rd b/man/make_volcano_plot.Rd
diff --git a/tests/testthat/fixtures/BRCA_Combine_and_Renormalize_SO_downsample.rds b/tests/testthat/fixtures/BRCA_Combine_and_Renormalize_SO_downsample.rds
diff --git a/tests/testthat/helper-selectData.R b/tests/testthat/helper-selectData.R
@@ -0,0 +1,8 @@
+selectData <- function(dataset) {
+  if (dataset == "wu_et_al_BRCA") {
+    return(readRDS(testthat::test_path(
+      "fixtures/",
+      "BRCA_Combine_and_Renormalize_SO_downsample.rds"
+    )))
+  }
+}
diff --git a/tests/testthat/test-cluster_metrics.R b/tests/testthat/test-cluster_metrics.R
@@ -0,0 +1,121 @@
+brca.data <- selectData("wu_et_al_BRCA")
+brca.clustered <- seurat_clustering(so_in = brca.data, npcs_in = 30)
+
+test_that("clustering metric is consistent for (BRCA)", {
+  clusters <- c("SCT_snn_res.0.2", "SCT_snn_res.0.8")
+
+  res_sil <- cluster_metrics(
+    so = brca.clustered,
+    cluster_list = clusters,
+    dims = 1:20,
+    reduction = "pca",
+    silhouette = TRUE
+  )
+
+  expect_true(is.matrix(res_sil))
+  expect_equal(rownames(res_sil), clusters)
+  expect_equal(
+    colnames(res_sil),
+    c("CalinskiHarabasz", "DaviesBouldin", "Silhouette")
+  )
+
+  # explicit silhouette inclusion check
+  expect_true("Silhouette" %in% colnames(res_sil))
+
+  expect_true(all(is.finite(res_sil)))
+  expect_true(is.numeric(res_sil))
+})
+
+test_that("cluster_metrics returns 2 columns when silhouette is FALSE", {
+  res <- cluster_metrics(
+    so = brca.clustered,
+    cluster_list = c("SCT_snn_res.0.2", "SCT_snn_res.0.8"),
+    dims = 1:20,
+    reduction = "pca",
+    silhouette = FALSE
+  )
+
+  expect_true(is.matrix(res))
+  expect_equal(colnames(res), c("CalinskiHarabasz", "DaviesBouldin"))
+  expect_equal(ncol(res), 2)
+
+  # explicit silhouette omission check
+  expect_false("Silhouette" %in% colnames(res))
+})
+
+test_that("cluster label coercion works for numeric, character, and factor metadata columns", {
+  so_types <- brca.clustered
+  base_label <- "SCT_snn_res.0.2"
+  base_clusters <- as.numeric(unlist(so_types[[base_label]]))
+
+  so_types[["cluster_numeric_test"]] <- base_clusters
+  so_types[["cluster_character_test"]] <- as.character(base_clusters)
+  so_types[["cluster_factor_test"]] <- factor(base_clusters)
+
+  cluster_labels <- c(
+    "cluster_numeric_test",
+    "cluster_character_test",
+    "cluster_factor_test"
+  )
+
+  res_types <- cluster_metrics(
+    so = so_types,
+    cluster_list = cluster_labels,
+    dims = 1:20,
+    reduction = "pca",
+    silhouette = FALSE
+  )
+
+  expect_true(is.matrix(res_types))
+  expect_equal(rownames(res_types), cluster_labels)
+  expect_equal(colnames(res_types), c("CalinskiHarabasz", "DaviesBouldin"))
+  expect_true(all(is.finite(res_types)))
+
+  # all encodings represent the same partition, so scores should match
+  expect_equal(
+    res_types["cluster_numeric_test", ],
+    res_types["cluster_character_test", ],
+    tolerance = 1e-8
+  )
+  expect_equal(
+    res_types["cluster_numeric_test", ],
+    res_types["cluster_factor_test", ],
+    tolerance = 1e-8
+  )
+})
+
+test_that("cluster_metrics errors on missing cluster column", {
+  expect_error(
+    cluster_metrics(
+      so = brca.clustered,
+      cluster_list = c("SCT_snn_res.0.2", "not_a_real_cluster"),
+      dims = 1:20,
+      reduction = "pca",
+      silhouette = TRUE
+    )
+  )
+})
+
+test_that("cluster_metrics errors on invalid reduction", {
+  expect_error(
+    cluster_metrics(
+      so = brca.clustered,
+      cluster_list = c("SCT_snn_res.0.2"),
+      dims = 1:20,
+      reduction = "not_a_reduction",
+      silhouette = TRUE
+    )
+  )
+})
+
+test_that("cluster_metrics errors when dims exceed available PCs", {
+  expect_error(
+    cluster_metrics(
+      so = brca.clustered,
+      cluster_list = c("SCT_snn_res.0.2"),
+      dims = 1:200,
+      reduction = "pca",
+      silhouette = TRUE
+    )
+  )
+})