Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/git-host/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ backon = "1.5.1"
chrono = { version = "0.4", features = ["serde"] }
db = { path = "../db" }
enum_dispatch = "0.3.13"
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tempfile = "3.21"
Expand Down
203 changes: 202 additions & 1 deletion crates/git-host/src/detection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::types::ProviderKind;
/// - GitHub.com: `https://github.com/owner/repo` or `git@github.com:owner/repo.git`
/// - GitHub Enterprise: URLs containing `github.` (e.g., `https://github.qkg1.toppany.com/owner/repo`)
/// - Azure DevOps: `https://dev.azure.com/org/project/_git/repo` or legacy `https://org.visualstudio.com/...`
/// - Gitea/Forgejo: instances registered via `GITEA_URL` env var, or well-known
/// hostnames (`gitea.*`, `forgejo.*`, `codeberg.org`)
pub(crate) fn detect_provider_from_url(url: &str) -> ProviderKind {
let url_lower = url.to_lowercase();

Expand All @@ -33,15 +35,82 @@ pub(crate) fn detect_provider_from_url(url: &str) -> ProviderKind {
return ProviderKind::GitHub;
}

// Gitea/Forgejo: explicit GITEA_URL match
if let Ok(gitea_url) = std::env::var("GITEA_URL") {
let gitea_host = gitea_url
.to_lowercase()
.trim_start_matches("https://")
.trim_start_matches("http://")
.trim_end_matches('/')
.to_string();
if !gitea_host.is_empty() && url_lower.contains(&gitea_host) {
return ProviderKind::Gitea;
}
}

// Well-known Gitea/Forgejo hostnames
if url_lower.contains("gitea.") || url_lower.contains("forgejo.") {
return ProviderKind::Gitea;
}

// Codeberg is a large Forgejo instance
if url_lower.contains("codeberg.org") {
return ProviderKind::Gitea;
}

ProviderKind::Unknown
}

/// Extract the base URL for a Gitea instance from a remote or PR URL.
///
/// Uses `GITEA_URL` env var only when the URL matches the configured instance.
/// Otherwise derives the base URL from the URL itself.
pub(crate) fn gitea_base_url(url: &str) -> String {
// Use env var only if the URL actually matches the configured instance
if let Ok(gitea_url) = std::env::var("GITEA_URL") {
let gitea_host = gitea_url
.to_lowercase()
.trim_start_matches("https://")
.trim_start_matches("http://")
.trim_end_matches('/')
.to_string();
if !gitea_host.is_empty() && url.to_lowercase().contains(&gitea_host) {
return gitea_url.trim_end_matches('/').to_string();
}
}

// Derive from URL — force HTTPS for non-HTTP schemes (ssh://, git://)
if let Ok(parsed) = url::Url::parse(url)
&& let Some(host) = parsed.host_str()
{
let scheme = match parsed.scheme() {
"http" | "https" => parsed.scheme(),
_ => "https",
};
let mut base = format!("{scheme}://{host}");
if let Some(port) = parsed.port() {
base.push_str(&format!(":{port}"));
}
return base;
}
Comment thread
cursor[bot] marked this conversation as resolved.

// SSH-style: git@host:owner/repo.git → https://host
if let Some(host_part) = url.strip_prefix("git@")
&& let Some(host) = host_part.split(':').next()
{
return format!("https://{host}");
}

url.to_string()
}

/// Detect the git hosting provider from a PR URL.
///
/// Supports:
/// - GitHub: `https://github.com/owner/repo/pull/123`
/// - GitHub Enterprise: `https://github.qkg1.toppany.com/owner/repo/pull/123`
/// - Azure DevOps: `https://dev.azure.com/org/project/_git/repo/pullrequest/123`
/// - Gitea/Forgejo: `https://gitea.example.com/owner/repo/pulls/123`
#[cfg(test)]
fn detect_provider_from_pr_url(pr_url: &str) -> ProviderKind {
let url_lower = pr_url.to_lowercase();
Expand All @@ -59,7 +128,7 @@ fn detect_provider_from_pr_url(pr_url: &str) -> ProviderKind {
return ProviderKind::AzureDevOps;
}

// Fall back to general URL detection
// Fall back to general URL detection (handles Gitea /pulls/ pattern too)
detect_provider_from_url(pr_url)
}

Expand Down Expand Up @@ -136,6 +205,39 @@ mod tests {
);
}

#[test]
fn test_gitea_well_known_hostname() {
assert_eq!(
detect_provider_from_url("https://gitea.company.com/owner/repo"),
ProviderKind::Gitea
);
assert_eq!(
detect_provider_from_url("https://forgejo.example.org/owner/repo"),
ProviderKind::Gitea
);
}

#[test]
fn test_gitea_codeberg() {
assert_eq!(
detect_provider_from_url("https://codeberg.org/owner/repo"),
ProviderKind::Gitea
);
assert_eq!(
detect_provider_from_url("git@codeberg.org:owner/repo.git"),
ProviderKind::Gitea
);
}

#[test]
fn test_unknown_url_with_pulls_not_detected_as_gitea() {
// /pulls/ alone should NOT trigger Gitea detection — prevents token leakage
assert_eq!(
detect_provider_from_url("https://evil.com/x/y/pulls/1"),
ProviderKind::Unknown
);
}

#[test]
fn test_unknown_provider() {
assert_eq!(
Expand Down Expand Up @@ -175,4 +277,103 @@ mod tests {
ProviderKind::AzureDevOps
);
}

#[test]
fn test_pr_url_gitea() {
assert_eq!(
detect_provider_from_pr_url("https://gitea.example.com/owner/repo/pulls/42"),
ProviderKind::Gitea
);
assert_eq!(
detect_provider_from_pr_url("https://codeberg.org/owner/repo/pulls/7"),
ProviderKind::Gitea
);
}

#[test]
fn test_gitea_base_url_from_https() {
let base = super::gitea_base_url("https://gitea.example.com/owner/repo.git");
assert_eq!(base, "https://gitea.example.com");
}

#[test]
fn test_gitea_base_url_with_port() {
let base = super::gitea_base_url("http://localhost:3000/owner/repo");
assert_eq!(base, "http://localhost:3000");
}

#[test]
fn test_gitea_base_url_from_ssh() {
let base = super::gitea_base_url("git@gitea.example.com:owner/repo.git");
assert_eq!(base, "https://gitea.example.com");
}

#[test]
fn test_gitea_base_url_from_ssh_scheme() {
// ssh:// URLs should produce https:// base, not ssh://
let base = super::gitea_base_url("ssh://git@gitea.example.com/owner/repo.git");
assert_eq!(base, "https://gitea.example.com");
}

#[test]
fn test_gitea_base_url_from_git_scheme() {
let base = super::gitea_base_url("git://gitea.example.com/owner/repo.git");
assert_eq!(base, "https://gitea.example.com");
}

// Edge-case tests for GITEA_URL handling (Bugbot findings)
//
// SAFETY: These tests manipulate env vars which is unsafe in Rust 2024.
// They must run single-threaded (--test-threads=1) to avoid races.

unsafe fn set_gitea_url(val: &str) {
std::env::set_var("GITEA_URL", val);
}

unsafe fn remove_gitea_url() {
std::env::remove_var("GITEA_URL");
}

#[test]
fn test_empty_gitea_url_does_not_match_all() {
// str::contains("") is always true in Rust — ensure we guard against that
unsafe { set_gitea_url("") };
assert_eq!(
detect_provider_from_url("https://gitlab.com/owner/repo"),
ProviderKind::Unknown,
);
assert_eq!(
detect_provider_from_url("https://bitbucket.org/owner/repo"),
ProviderKind::Unknown,
);
unsafe { remove_gitea_url() };
}

#[test]
fn test_scheme_only_gitea_url_does_not_match_all() {
unsafe { set_gitea_url("https://") };
assert_eq!(
detect_provider_from_url("https://gitlab.com/owner/repo"),
ProviderKind::Unknown,
);
unsafe { remove_gitea_url() };
}

#[test]
fn test_gitea_base_url_derives_from_url_when_env_differs() {
// GITEA_URL points to one instance, but URL is for Codeberg —
// should derive base URL from the URL, not the env var
unsafe { set_gitea_url("https://gitea.company.com") };
let base = super::gitea_base_url("https://codeberg.org/owner/repo.git");
assert_eq!(base, "https://codeberg.org");
unsafe { remove_gitea_url() };
}

#[test]
fn test_gitea_base_url_uses_env_when_matching() {
unsafe { set_gitea_url("https://gitea.company.com") };
let base = super::gitea_base_url("https://gitea.company.com/owner/repo.git");
assert_eq!(base, "https://gitea.company.com");
unsafe { remove_gitea_url() };
}
}
Loading