Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 0 additions & 70 deletions CITATION.cff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 22 additions & 7 deletions R/convert_human_gene_list.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,44 @@
#' @export
#'
#' @return Character vector of mapped mouse genes
convert_human_gene_list <- function(genes) {
# data variables must be initialized to silence the R CMD check note:
# 'no visible binding for global variable'
gns <- NULL
convert_human_gene_list <- function(genes = NULL) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since this function currently only supports mouse -> human perhaps the name should reflect that. in the future we can add a more generic function that handles any-to-any genome.

Suggested change
convert_human_gene_list <- function(genes = NULL) {
convert_mouse_to_human_gene_list <- function(genes = NULL) {

# Validate input: must be character vector
if (!is.character(genes)) {
stop(
"Input 'genes' must be a character vector, not ",
paste(class(genes), collapse = " "),

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you'll need to wrap this message in paste0() or glue::glue()

call. = FALSE

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why are you setting call. = FALSE?

)
}

# TODO: make this function generic enough to convert any genome to any other
# Handle empty character vector
if (length(genes) == 0L) {
return(character(0))
}

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not a fan of having multiple return statements in a function. Ideally you should have only one return statement at the very end. You can use an if-else block instead that sets a result variable depending on the length of the genes list, then returns that variable at the end.

also perhaps in the case of empty genes, I think a warning should be emitted


# Map human SYMBOL to ENTREZID
egs <- AnnotationDbi::mapIds(
org.Hs.eg.db::org.Hs.eg.db,
gns,
genes,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch!

"ENTREZID",
"SYMBOL"
)

# Map human ENTREZID to mouse ENTREZID
mapped <- AnnotationDbi::select(
Orthology.eg.db::Orthology.eg.db,
egs,
as.character(egs),
"Mus.musculus",
"Homo.sapiens"
)

# Map mouse ENTREZID to SYMBOL
mapped$MUS <- AnnotationDbi::mapIds(
org.Mm.eg.db::org.Mm.eg.db,
as.character(mapped$Mus.musculus),
"SYMBOL",
"ENTREZID"
)

return(as.character(unlist(mapped$MUS)))
}
25 changes: 14 additions & 11 deletions R/make_volcano_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' differential expression table generated by Seurat
#'
#' @param de_table Differential expression table generated by Seurat containing
#' p_val, pct.1, pct.2 avg_log2FC and p_val_adj
#' p_val, pct.1, pct.2 avg_log_2_fc and p_val_adj
#' @param logfc Boolean to color genes meeting logfc threshold of 1.5
#' @param pval Boolean to color genes meeting p-value threshold of 0.05
#' @param significant Boolean to color genes that meet both logfc and p-value
Expand All @@ -26,33 +26,36 @@ make_volcano_plot <- function(
) {
log10_p <- -log10(de_table$p_val_adj)
log10_p[which(de_table$p_val_adj == 0)] <- 500
avg_log2FC <- de_table$avg_log2FC
avg_log_2_fc <- de_table$avg_log_2_fc
gene <- rownames(de_table)
significance <- vector(length = length(log10_p))
significance[] <- "NotSignificant"
if (logfc == TRUE) {
significance[which(abs(de_table$avg_log2FC) > 1.5)] <- "avg_log2FC > 1.5"
significance[which(
abs(de_table$avg_log_2_fc) > 1.5
)] <- "avg_log_2_fc > 1.5"
}
if (isTRUE(pval)) {
significance[which(de_table$p_val_adj < 0.05)] <- "p_val_adj < 0.05"
}
if (isTRUE(significant)) {
significance[which(
abs(de_table$avg_log2FC) > 1.5 &
abs(de_table$avg_log_2_fc) > 1.5 &
de_table$p_val_adj < 0.05
)] <- "p_val_adj < 0.05 & avg_log2FC > 1.5"
)] <- "p_val_adj < 0.05 & avg_log_2_fc > 1.5"
}
df <- as.data.frame(cbind(avg_log2FC, log10_p, significance, gene))
df <- as.data.frame(cbind(avg_log_2_fc, log10_p, significance, gene))
colnames(df) <- c("avg_log_2_fc", "log10_p", "significance", "gene")
rownames(df) <- rownames(de_table)
df[, 1] <- as.numeric(df[, 1])
df[, 2] <- as.numeric(df[, 2])
df[, 3] <- factor(
df[, 3],
levels = c(
"NotSignificant",
"avg_log2FC > 1.5",
"avg_log_2_fc > 1.5",
"p_val_adj < 0.05",
"p_val_adj < 0.05 & avg_log2FC > 1.5"
"p_val_adj < 0.05 & avg_log_2_fc > 1.5"
)
)
data_subset <- NULL
Expand All @@ -68,13 +71,13 @@ make_volcano_plot <- function(

volcano <- ggplot2::ggplot(
df,
ggplot2::aes(x = avg_log2FC, y = log10_p, col = significance)
ggplot2::aes(x = avg_log_2_fc, y = log10_p, col = significance)
) +
ggplot2::geom_point() +
# geom_vline(xintercept = 0, color = "black", linewidth = 1.5)+
ggplot2::xlim(
-ceiling(max(abs(df$avg_log2FC))),
ceiling(max(abs(df$avg_log2FC)))
-ceiling(max(abs(df$avg_log_2_fc))),
ceiling(max(abs(df$avg_log_2_fc)))
)

if (!is.null(data_subset)) {
Expand Down
23 changes: 21 additions & 2 deletions R/seurat_clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,25 @@
#' @return The updated Seurat single cell object with recalculated PCA,
#' neighbors, UMAP and clusters
seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should consider renaming this function to a verb. see https://style.tidyverse.org/functions.html @wong-nw

# Validate npcs_in: must be a positive integer >= 3 (for UMAP n.components)
if (!is.numeric(npcs_in) || npcs_in != as.integer(npcs_in) || npcs_in <= 0) {
rlang::abort("npcs_in must be a positive integer")
}

if (npcs_in < 3) {
rlang::abort("npcs_in must be at least 3 for UMAP computation")
}

# Validate resolution: must be positive
if (!is.numeric(resolution) || resolution <= 0) {
rlang::abort("resolution must be a positive numeric value")
}

# Validate algorithm: must be 1, 2, or 3
if (!is.numeric(algorithm) || !(algorithm %in% c(1, 2, 3))) {
rlang::abort("algorithm must be 1 (Louvain), 2 (Leiden), or 3 (SLM)")
}

Comment on lines +18 to +36

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice input validation!

so <- Seurat::RunPCA(
object = so_in,
features = Seurat::VariableFeatures(object = so_in),
Expand All @@ -24,8 +43,8 @@ seurat_clustering <- function(so_in, npcs_in, resolution = 0.8, algorithm = 3) {
so <- Seurat::FindNeighbors(so, dims = 1:npcs_in)
so <- Seurat::FindClusters(
so,
resolution = 0.8,
algorithm = 3,
resolution = resolution,
algorithm = algorithm,

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch! using parameters is important

verbose = TRUE
)
so <- Seurat::RunUMAP(so, dims = 1:npcs_in, n.components = 3)
Expand Down
2 changes: 1 addition & 1 deletion man/convert_human_gene_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/make_volcano_plot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Binary file not shown.
8 changes: 8 additions & 0 deletions tests/testthat/helper-selectData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
selectData <- function(dataset) {
Comment thread
bianjh-cloud marked this conversation as resolved.
Outdated
if (dataset == "wu_et_al_BRCA") {
return(readRDS(testthat::test_path(
"fixtures/",
Comment thread
bianjh-cloud marked this conversation as resolved.
Outdated
"BRCA_Combine_and_Renormalize_SO_downsample.rds"
Comment thread
bianjh-cloud marked this conversation as resolved.
Outdated
)))
}
}
121 changes: 121 additions & 0 deletions tests/testthat/test-cluster_metrics.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
brca.data <- selectData("wu_et_al_BRCA")
brca.clustered <- seurat_clustering(so_in = brca.data, npcs_in = 30)
Comment thread
bianjh-cloud marked this conversation as resolved.
Outdated

test_that("clustering metric is consistent for (BRCA)", {
clusters <- c("SCT_snn_res.0.2", "SCT_snn_res.0.8")

res_sil <- cluster_metrics(
so = brca.clustered,
cluster_list = clusters,
dims = 1:20,
reduction = "pca",
silhouette = TRUE
)

expect_true(is.matrix(res_sil))
expect_equal(rownames(res_sil), clusters)
expect_equal(
colnames(res_sil),
c("CalinskiHarabasz", "DaviesBouldin", "Silhouette")
)

# explicit silhouette inclusion check
expect_true("Silhouette" %in% colnames(res_sil))

expect_true(all(is.finite(res_sil)))
expect_true(is.numeric(res_sil))
})

test_that("cluster_metrics returns 2 columns when silhouette is FALSE", {
res <- cluster_metrics(
so = brca.clustered,
cluster_list = c("SCT_snn_res.0.2", "SCT_snn_res.0.8"),
dims = 1:20,
reduction = "pca",
silhouette = FALSE
)

expect_true(is.matrix(res))
expect_equal(colnames(res), c("CalinskiHarabasz", "DaviesBouldin"))
expect_equal(ncol(res), 2)

# explicit silhouette omission check
expect_false("Silhouette" %in% colnames(res))
})

test_that("cluster label coercion works for numeric, character, and factor metadata columns", {
so_types <- brca.clustered
base_label <- "SCT_snn_res.0.2"
base_clusters <- as.numeric(unlist(so_types[[base_label]]))

so_types[["cluster_numeric_test"]] <- base_clusters
so_types[["cluster_character_test"]] <- as.character(base_clusters)
so_types[["cluster_factor_test"]] <- factor(base_clusters)

cluster_labels <- c(
"cluster_numeric_test",
"cluster_character_test",
"cluster_factor_test"
)

res_types <- cluster_metrics(
so = so_types,
cluster_list = cluster_labels,
dims = 1:20,
reduction = "pca",
silhouette = FALSE
)

expect_true(is.matrix(res_types))
expect_equal(rownames(res_types), cluster_labels)
expect_equal(colnames(res_types), c("CalinskiHarabasz", "DaviesBouldin"))
expect_true(all(is.finite(res_types)))

# all encodings represent the same partition, so scores should match
expect_equal(
res_types["cluster_numeric_test", ],
res_types["cluster_character_test", ],
tolerance = 1e-8
)
expect_equal(
res_types["cluster_numeric_test", ],
res_types["cluster_factor_test", ],
tolerance = 1e-8
)
})

test_that("cluster_metrics errors on missing cluster column", {
expect_error(
cluster_metrics(
so = brca.clustered,
cluster_list = c("SCT_snn_res.0.2", "not_a_real_cluster"),
dims = 1:20,
reduction = "pca",
silhouette = TRUE
)
)
})

test_that("cluster_metrics errors on invalid reduction", {
expect_error(
cluster_metrics(
so = brca.clustered,
cluster_list = c("SCT_snn_res.0.2"),
dims = 1:20,
reduction = "not_a_reduction",
silhouette = TRUE
)
)
})

test_that("cluster_metrics errors when dims exceed available PCs", {
expect_error(
cluster_metrics(
so = brca.clustered,
cluster_list = c("SCT_snn_res.0.2"),
dims = 1:200,
reduction = "pca",
silhouette = TRUE
)
)
})
Loading
Loading