Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/git-host/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ backon = "1.5.1"
chrono = { version = "0.4", features = ["serde"] }
db = { path = "../db" }
enum_dispatch = "0.3.13"
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tempfile = "3.21"
Expand Down
188 changes: 187 additions & 1 deletion crates/git-host/src/detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::types::ProviderKind;
/// - GitHub.com: `https://github.com/owner/repo` or `git@github.com:owner/repo.git`
/// - GitHub Enterprise: URLs containing `github.` (e.g., `https://github.qkg1.toppany.com/owner/repo`)
/// - Azure DevOps: `https://dev.azure.com/org/project/_git/repo` or legacy `https://org.visualstudio.com/...`
/// - Gitea/Forgejo: instances registered via `GITEA_URL` env var, or URLs containing
/// `/pulls/` (Gitea PR URL pattern), or `gitea.` / `forgejo.` in the hostname
pub(crate) fn detect_provider_from_url(url: &str) -> ProviderKind {
let url_lower = url.to_lowercase();

Expand All @@ -33,15 +35,81 @@ pub(crate) fn detect_provider_from_url(url: &str) -> ProviderKind {
return ProviderKind::GitHub;
}

// Gitea/Forgejo: explicit GITEA_URL match
if let Ok(gitea_url) = std::env::var("GITEA_URL") {
let gitea_host = gitea_url
.to_lowercase()
.trim_start_matches("https://")
.trim_start_matches("http://")
.trim_end_matches('/')
.to_string();
if !gitea_host.is_empty() && url_lower.contains(&gitea_host) {
return ProviderKind::Gitea;
}
}
Comment thread
cursor[bot] marked this conversation as resolved.

// Gitea PR URL pattern: /pulls/ in path (GitHub uses /pull/, Azure uses /pullrequest/)
if url_lower.contains("/pulls/") {
return ProviderKind::Gitea;
}
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated

// Well-known Gitea/Forgejo hostnames
if url_lower.contains("gitea.") || url_lower.contains("forgejo.") {
return ProviderKind::Gitea;
}

// Codeberg is a large Forgejo instance
if url_lower.contains("codeberg.org") {
return ProviderKind::Gitea;
}

ProviderKind::Unknown
}

/// Extract the base URL for a Gitea instance from a remote or PR URL.
///
/// Uses `GITEA_URL` env var only when the URL matches the configured instance.
/// Otherwise derives the base URL from the URL itself.
pub(crate) fn gitea_base_url(url: &str) -> String {
// Use env var only if the URL actually matches the configured instance
if let Ok(gitea_url) = std::env::var("GITEA_URL") {
let gitea_host = gitea_url
.to_lowercase()
.trim_start_matches("https://")
.trim_start_matches("http://")
.trim_end_matches('/')
.to_string();
if !gitea_host.is_empty() && url.to_lowercase().contains(&gitea_host) {
return gitea_url.trim_end_matches('/').to_string();
}
}

// Derive from URL
if let Ok(parsed) = url::Url::parse(url) {
let mut base = format!("{}://{}", parsed.scheme(), parsed.host_str().unwrap_or(""));
if let Some(port) = parsed.port() {
base.push_str(&format!(":{port}"));
}
return base;
}
Comment thread
cursor[bot] marked this conversation as resolved.

// SSH-style: git@host:owner/repo.git → https://host
if let Some(host_part) = url.strip_prefix("git@")
&& let Some(host) = host_part.split(':').next()
{
return format!("https://{host}");
}

url.to_string()
}

/// Detect the git hosting provider from a PR URL.
///
/// Supports:
/// - GitHub: `https://github.com/owner/repo/pull/123`
/// - GitHub Enterprise: `https://github.qkg1.toppany.com/owner/repo/pull/123`
/// - Azure DevOps: `https://dev.azure.com/org/project/_git/repo/pullrequest/123`
/// - Gitea/Forgejo: `https://gitea.example.com/owner/repo/pulls/123`
#[cfg(test)]
fn detect_provider_from_pr_url(pr_url: &str) -> ProviderKind {
let url_lower = pr_url.to_lowercase();
Expand All @@ -59,7 +127,7 @@ fn detect_provider_from_pr_url(pr_url: &str) -> ProviderKind {
return ProviderKind::AzureDevOps;
}

// Fall back to general URL detection
// Fall back to general URL detection (handles Gitea /pulls/ pattern too)
detect_provider_from_url(pr_url)
}

Expand Down Expand Up @@ -136,6 +204,38 @@ mod tests {
);
}

#[test]
fn test_gitea_well_known_hostname() {
assert_eq!(
detect_provider_from_url("https://gitea.company.com/owner/repo"),
ProviderKind::Gitea
);
assert_eq!(
detect_provider_from_url("https://forgejo.example.org/owner/repo"),
ProviderKind::Gitea
);
}

#[test]
fn test_gitea_codeberg() {
assert_eq!(
detect_provider_from_url("https://codeberg.org/owner/repo"),
ProviderKind::Gitea
);
assert_eq!(
detect_provider_from_url("git@codeberg.org:owner/repo.git"),
ProviderKind::Gitea
);
}

#[test]
fn test_gitea_pr_url_pattern() {
assert_eq!(
detect_provider_from_url("https://git.example.com/owner/repo/pulls/42"),
ProviderKind::Gitea
);
}

#[test]
fn test_unknown_provider() {
assert_eq!(
Expand Down Expand Up @@ -175,4 +275,90 @@ mod tests {
ProviderKind::AzureDevOps
);
}

#[test]
fn test_pr_url_gitea() {
assert_eq!(
detect_provider_from_pr_url("https://gitea.example.com/owner/repo/pulls/42"),
ProviderKind::Gitea
);
assert_eq!(
detect_provider_from_pr_url("https://codeberg.org/owner/repo/pulls/7"),
ProviderKind::Gitea
);
}

#[test]
fn test_gitea_base_url_from_https() {
let base = super::gitea_base_url("https://gitea.example.com/owner/repo.git");
assert_eq!(base, "https://gitea.example.com");
}

#[test]
fn test_gitea_base_url_with_port() {
let base = super::gitea_base_url("http://localhost:3000/owner/repo");
assert_eq!(base, "http://localhost:3000");
}

#[test]
fn test_gitea_base_url_from_ssh() {
let base = super::gitea_base_url("git@gitea.example.com:owner/repo.git");
assert_eq!(base, "https://gitea.example.com");
}

// Edge-case tests for GITEA_URL handling (Bugbot findings)
//
// SAFETY: These tests manipulate env vars which is unsafe in Rust 2024.
// They must run single-threaded (--test-threads=1) to avoid races.

unsafe fn set_gitea_url(val: &str) {
std::env::set_var("GITEA_URL", val);
}

unsafe fn remove_gitea_url() {
std::env::remove_var("GITEA_URL");
}

#[test]
fn test_empty_gitea_url_does_not_match_all() {
// str::contains("") is always true in Rust — ensure we guard against that
unsafe { set_gitea_url("") };
assert_eq!(
detect_provider_from_url("https://gitlab.com/owner/repo"),
ProviderKind::Unknown,
);
assert_eq!(
detect_provider_from_url("https://bitbucket.org/owner/repo"),
ProviderKind::Unknown,
);
unsafe { remove_gitea_url() };
}

#[test]
fn test_scheme_only_gitea_url_does_not_match_all() {
unsafe { set_gitea_url("https://") };
assert_eq!(
detect_provider_from_url("https://gitlab.com/owner/repo"),
ProviderKind::Unknown,
);
unsafe { remove_gitea_url() };
}

#[test]
fn test_gitea_base_url_derives_from_url_when_env_differs() {
// GITEA_URL points to one instance, but URL is for Codeberg —
// should derive base URL from the URL, not the env var
unsafe { set_gitea_url("https://gitea.company.com") };
let base = super::gitea_base_url("https://codeberg.org/owner/repo.git");
assert_eq!(base, "https://codeberg.org");
unsafe { remove_gitea_url() };
}

#[test]
fn test_gitea_base_url_uses_env_when_matching() {
unsafe { set_gitea_url("https://gitea.company.com") };
let base = super::gitea_base_url("https://gitea.company.com/owner/repo.git");
assert_eq!(base, "https://gitea.company.com");
unsafe { remove_gitea_url() };
}
}
Loading