-
Notifications
You must be signed in to change notification settings - Fork 0
test: add Visualization Unit Tests #49
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 13 commits
ed645ff
747e403
88bde28
f097b13
f55dab0
50c328b
5abb667
b7898e7
b31562a
89bf1c5
0faf7b8
1de3773
7fde883
d6c1fbb
ca42d15
92d67ba
9ad9411
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,29 +8,44 @@ | |
| #' @export | ||
| #' | ||
| #' @return Character vector of mapped mouse genes | ||
| convert_human_gene_list <- function(genes) { | ||
| # data variables must be initialized to silence the R CMD check note: | ||
| # 'no visible binding for global variable' | ||
| gns <- NULL | ||
| convert_human_gene_list <- function(genes = NULL) { | ||
| # Validate input: must be character vector | ||
| if (!is.character(genes)) { | ||
| stop( | ||
| "Input 'genes' must be a character vector, not ", | ||
| paste(class(genes), collapse = " "), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you'll need to wrap this message in paste0() or glue::glue() |
||
| call. = FALSE | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are you setting |
||
| ) | ||
| } | ||
|
|
||
| # TODO: make this function generic enough to convert any genome to any other | ||
| # Handle empty character vector | ||
| if (length(genes) == 0L) { | ||
| return(character(0)) | ||
| } | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not a fan of having multiple return statements in a function. Ideally you should have only one return statement at the very end. You can use an if-else block instead that sets a result variable depending on the length of the genes list, then returns that variable at the end. also perhaps in the case of empty genes, I think a warning should be emitted |
||
|
|
||
| # Map human SYMBOL to ENTREZID | ||
| egs <- AnnotationDbi::mapIds( | ||
| org.Hs.eg.db::org.Hs.eg.db, | ||
| gns, | ||
| genes, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch! |
||
| "ENTREZID", | ||
| "SYMBOL" | ||
| ) | ||
|
|
||
| # Map human ENTREZID to mouse ENTREZID | ||
| mapped <- AnnotationDbi::select( | ||
| Orthology.eg.db::Orthology.eg.db, | ||
| egs, | ||
| as.character(egs), | ||
| "Mus.musculus", | ||
| "Homo.sapiens" | ||
| ) | ||
|
|
||
| # Map mouse ENTREZID to SYMBOL | ||
| mapped$MUS <- AnnotationDbi::mapIds( | ||
| org.Mm.eg.db::org.Mm.eg.db, | ||
| as.character(mapped$Mus.musculus), | ||
| "SYMBOL", | ||
| "ENTREZID" | ||
| ) | ||
|
|
||
| return(as.character(unlist(mapped$MUS))) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,25 @@ | |
| #' @return The updated Seurat single cell object with recalculated PCA, | ||
| #' neighbors, UMAP and clusters | ||
| seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should consider renaming this function to a verb. see https://style.tidyverse.org/functions.html @wong-nw |
||
| # Validate npcs_in: must be a positive integer >= 3 (for UMAP n.components) | ||
| if (!is.numeric(npcs_in) || npcs_in != as.integer(npcs_in) || npcs_in <= 0) { | ||
| rlang::abort("npcs_in must be a positive integer") | ||
| } | ||
|
|
||
| if (npcs_in < 3) { | ||
| rlang::abort("npcs_in must be at least 3 for UMAP computation") | ||
| } | ||
|
|
||
| # Validate resolution: must be positive | ||
| if (!is.numeric(resolution) || resolution <= 0) { | ||
| rlang::abort("resolution must be a positive numeric value") | ||
| } | ||
|
|
||
| # Validate algorithm: must be 1, 2, or 3 | ||
| if (!is.numeric(algorithm) || !(algorithm %in% c(1, 2, 3))) { | ||
| rlang::abort("algorithm must be 1 (Louvain), 2 (Leiden), or 3 (SLM)") | ||
| } | ||
|
|
||
|
Comment on lines
+18
to
+36
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nice input validation! |
||
| so <- Seurat::RunPCA( | ||
| object = so_in, | ||
| features = Seurat::VariableFeatures(object = so_in), | ||
|
|
@@ -24,8 +43,8 @@ seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) { | |
| so <- Seurat::FindNeighbors(so, dims = 1:npcs_in) | ||
| so <- Seurat::FindClusters( | ||
| so, | ||
| resolution = 0.8, | ||
| algorithm = 3, | ||
| resolution = resolution, | ||
| algorithm = algorithm, | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch! using parameters is important |
||
| verbose = TRUE | ||
| ) | ||
| so <- Seurat::RunUMAP(so, dims = 1:npcs_in, n.components = 3) | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| selectData <- function(dataset) { | ||
|
bianjh-cloud marked this conversation as resolved.
Outdated
|
||
| if (dataset == "wu_et_al_BRCA") { | ||
| return(readRDS(testthat::test_path( | ||
| "fixtures/", | ||
|
bianjh-cloud marked this conversation as resolved.
Outdated
|
||
| "BRCA_Combine_and_Renormalize_SO_downsample.rds" | ||
|
bianjh-cloud marked this conversation as resolved.
Outdated
|
||
| ))) | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| brca.data <- selectData("wu_et_al_BRCA") | ||
| brca.clustered <- seurat_clustering(so_in = brca.data, npcs_in = 30) | ||
|
bianjh-cloud marked this conversation as resolved.
Outdated
|
||
|
|
||
| test_that("clustering metric is consistent for (BRCA)", { | ||
| clusters <- c("SCT_snn_res.0.2", "SCT_snn_res.0.8") | ||
|
|
||
| res_sil <- cluster_metrics( | ||
| so = brca.clustered, | ||
| cluster_list = clusters, | ||
| dims = 1:20, | ||
| reduction = "pca", | ||
| silhouette = TRUE | ||
| ) | ||
|
|
||
| expect_true(is.matrix(res_sil)) | ||
| expect_equal(rownames(res_sil), clusters) | ||
| expect_equal( | ||
| colnames(res_sil), | ||
| c("CalinskiHarabasz", "DaviesBouldin", "Silhouette") | ||
| ) | ||
|
|
||
| # explicit silhouette inclusion check | ||
| expect_true("Silhouette" %in% colnames(res_sil)) | ||
|
|
||
| expect_true(all(is.finite(res_sil))) | ||
| expect_true(is.numeric(res_sil)) | ||
| }) | ||
|
|
||
| test_that("cluster_metrics returns 2 columns when silhouette is FALSE", { | ||
| res <- cluster_metrics( | ||
| so = brca.clustered, | ||
| cluster_list = c("SCT_snn_res.0.2", "SCT_snn_res.0.8"), | ||
| dims = 1:20, | ||
| reduction = "pca", | ||
| silhouette = FALSE | ||
| ) | ||
|
|
||
| expect_true(is.matrix(res)) | ||
| expect_equal(colnames(res), c("CalinskiHarabasz", "DaviesBouldin")) | ||
| expect_equal(ncol(res), 2) | ||
|
|
||
| # explicit silhouette omission check | ||
| expect_false("Silhouette" %in% colnames(res)) | ||
| }) | ||
|
|
||
| test_that("cluster label coercion works for numeric, character, and factor metadata columns", { | ||
| so_types <- brca.clustered | ||
| base_label <- "SCT_snn_res.0.2" | ||
| base_clusters <- as.numeric(unlist(so_types[[base_label]])) | ||
|
|
||
| so_types[["cluster_numeric_test"]] <- base_clusters | ||
| so_types[["cluster_character_test"]] <- as.character(base_clusters) | ||
| so_types[["cluster_factor_test"]] <- factor(base_clusters) | ||
|
|
||
| cluster_labels <- c( | ||
| "cluster_numeric_test", | ||
| "cluster_character_test", | ||
| "cluster_factor_test" | ||
| ) | ||
|
|
||
| res_types <- cluster_metrics( | ||
| so = so_types, | ||
| cluster_list = cluster_labels, | ||
| dims = 1:20, | ||
| reduction = "pca", | ||
| silhouette = FALSE | ||
| ) | ||
|
|
||
| expect_true(is.matrix(res_types)) | ||
| expect_equal(rownames(res_types), cluster_labels) | ||
| expect_equal(colnames(res_types), c("CalinskiHarabasz", "DaviesBouldin")) | ||
| expect_true(all(is.finite(res_types))) | ||
|
|
||
| # all encodings represent the same partition, so scores should match | ||
| expect_equal( | ||
| res_types["cluster_numeric_test", ], | ||
| res_types["cluster_character_test", ], | ||
| tolerance = 1e-8 | ||
| ) | ||
| expect_equal( | ||
| res_types["cluster_numeric_test", ], | ||
| res_types["cluster_factor_test", ], | ||
| tolerance = 1e-8 | ||
| ) | ||
| }) | ||
|
|
||
| test_that("cluster_metrics errors on missing cluster column", { | ||
| expect_error( | ||
| cluster_metrics( | ||
| so = brca.clustered, | ||
| cluster_list = c("SCT_snn_res.0.2", "not_a_real_cluster"), | ||
| dims = 1:20, | ||
| reduction = "pca", | ||
| silhouette = TRUE | ||
| ) | ||
| ) | ||
| }) | ||
|
|
||
| test_that("cluster_metrics errors on invalid reduction", { | ||
| expect_error( | ||
| cluster_metrics( | ||
| so = brca.clustered, | ||
| cluster_list = c("SCT_snn_res.0.2"), | ||
| dims = 1:20, | ||
| reduction = "not_a_reduction", | ||
| silhouette = TRUE | ||
| ) | ||
| ) | ||
| }) | ||
|
|
||
| test_that("cluster_metrics errors when dims exceed available PCs", { | ||
| expect_error( | ||
| cluster_metrics( | ||
| so = brca.clustered, | ||
| cluster_list = c("SCT_snn_res.0.2"), | ||
| dims = 1:200, | ||
| reduction = "pca", | ||
| silhouette = TRUE | ||
| ) | ||
| ) | ||
| }) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since this function currently only supports mouse -> human perhaps the name should reflect that. in the future we can add a more generic function that handles any-to-any genome.