Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions commands/register.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"degoog-mcp/tools"
)

// Register registers search and scraping tools with the MCP server.
func Register(server *mcp.Server, sc *scraper.Scraper, dg *degoog.Client, cfg *config.Config) {
sh := newSearchH(dg, cfg)
mcp.AddTool(server, tools.SearchTool(), sh.handle)
Expand Down
4 changes: 4 additions & 0 deletions commands/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func (h *scrapeHandler) handle(ctx context.Context, req *mcp.CallToolRequest, in
return &mcp.CallToolResult{Content: []mcp.Content{&mcp.TextContent{Text: out.Summary}}}, out, nil
}

// scrapeCounts counts the number of successful and failed scrape results.
func scrapeCounts(results []scraper.Result) (successes, failures int) {
for _, r := range results {
if r.Error != "" {
Expand All @@ -57,6 +58,9 @@ func scrapeCounts(results []scraper.Result) (successes, failures int) {
return successes, failures
}

// scrapeSummary generates a human-readable summary of scrape results,
// including success and failure counts, failure details, and information
// about the structured content format.
func scrapeSummary(out tools.ScrapeOutput) string {
parts := []string{fmt.Sprintf("Degoog scrape returned %d successful and %d failed URL(s)", out.SuccessCount, out.FailureCount)}
if out.FailureCount > 0 {
Expand Down
4 changes: 4 additions & 0 deletions commands/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type searchHandler struct {
defaultMax int
}

// newSearchH constructs a searchHandler with the provided degoog client and configuration.
func newSearchH(c *degoog.Client, cfg *config.Config) *searchHandler {
return &searchHandler{
client: c,
Expand Down Expand Up @@ -83,6 +84,7 @@ func (h *searchHandler) pickMax(in int) int {
return h.defaultMax
}

// searchSummary generates a human-readable summary of the search results, combining the query and result count with optional metadata such as engines used, response time, and source distribution.
func searchSummary(out tools.SearchOutput) string {
parts := []string{fmt.Sprintf("Degoog %s search for %q returned %d result(s)", out.Type, out.Query, out.Meta.ReturnedResults)}
if out.Meta.ResultsBeforeCap > 0 && out.Meta.DroppedByCap > 0 {
Expand All @@ -100,6 +102,7 @@ func searchSummary(out tools.SearchOutput) string {
return strings.Join(parts, "; ") + ". Structured content contains full results, timings, related searches, and cap/source metadata; call scrape with selected URLs for article text."
}

// sourceOverlap returns sources ranked by frequency in the search results, sorted by hit count in descending order, then alphabetically by source name.
func sourceOverlap(results []degoog.Hit) []tools.SourceOverlap {
counts := make(map[string]int)
for _, r := range results {
Expand Down Expand Up @@ -127,6 +130,7 @@ func sourceOverlap(results []degoog.Hit) []tools.SourceOverlap {
return out
}

// formatSourceOverlap formats up to max entries from overlap as comma-separated source=count pairs.
func formatSourceOverlap(overlap []tools.SourceOverlap, max int) string {
if max > len(overlap) {
max = len(overlap)
Expand Down
7 changes: 7 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
Engines []string
}

// Load returns a Config populated with values from environment variables, using defaults for unset or invalid variables.
func Load() *Config {
return &Config{
BindHost: readStr(ENV_BIND_HOST, DEFAULT_BIND_HOST),
Expand All @@ -77,6 +78,7 @@
}
}

// readNonNeg reads an integer from the environment variable key and returns it if non-negative, or def otherwise.
func readNonNeg(key string, def int) int {
n := readInt(key, def)
if n < 0 {
Expand All @@ -86,6 +88,7 @@
return n
}

// readList reads an environment variable and parses it as a comma-separated list of strings, excluding empty values. It returns nil if the variable is unset or empty.
func readList(key string) []string {
v := strings.TrimSpace(os.Getenv(key))
if v == "" {
Expand All @@ -101,6 +104,7 @@
return out
}

// readStr reads the environment variable specified by key and returns its value, or def if the variable is unset or empty.
func readStr(key, def string) string {
v := os.Getenv(key)
if v == "" {
Expand All @@ -109,6 +113,7 @@
return v
}

readInt reads the environment variable with the given key and returns its integer value, or the default value if the variable is unset, empty, or cannot be parsed as an integer.

Check failure on line 116 in internal/config/config.go

View workflow job for this annotation

GitHub Actions / Tests now - BOOORING

syntax error: non-declaration statement outside function body
func readInt(key string, def int) int {
v := os.Getenv(key)
if v == "" {
Expand All @@ -122,6 +127,7 @@
return n
}

readPosInt reads an integer from an environment variable and returns it if positive, otherwise returns the default.

Check failure on line 130 in internal/config/config.go

View workflow job for this annotation

GitHub Actions / Tests now - BOOORING

syntax error: non-declaration statement outside function body
func readPosInt(key string, def int) int {
n := readInt(key, def)
if n <= 0 {
Expand All @@ -131,6 +137,7 @@
return n
}

// readDur reads an environment variable, parses it as a time.Duration, and returns the parsed value. If the environment variable is unset, empty, or cannot be parsed as a duration, the default value is returned.
func readDur(key string, def time.Duration) time.Duration {
v := os.Getenv(key)
if v == "" {
Expand Down
4 changes: 4 additions & 0 deletions internal/degoog/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ type Client struct {
http *http.Client
}

// New returns a new Client configured with the provided base URL, API key, request timeout, and maximum bytes to read from error responses. The base URL is normalized by trimming trailing slashes.
func New(base, apiKey string, timeout time.Duration, maxBytes int64) *Client {
return &Client{
base: strings.TrimRight(base, "/"),
Expand Down Expand Up @@ -257,6 +258,9 @@ func (c *Client) setHeaders(req *http.Request) {
}
}

// capResults limits the results in a Response to a maximum count.
// The Response is modified in-place. Returns the number of results that
// were dropped, or 0 if no capping was applied.
func capResults(out *Response, max int) int {
if max <= 0 || len(out.Results) <= max {
return 0
Expand Down
14 changes: 14 additions & 0 deletions internal/scraper/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@ func (g *guardRT) RoundTrip(r *http.Request) (*http.Response, error) {
return g.base.RoundTrip(r)
}

// Polyjuice creates an HTTP client with URL and IP validation and browser-like request headers.
//
// ua is the user agent string to set on outgoing requests.
func Polyjuice(ua string) *http.Client {
transport := &http.Transport{
Proxy: http.ProxyFromEnvironment,
Expand All @@ -101,6 +104,10 @@ func Polyjuice(ua string) *http.Client {
}
}

// CheckURL validates that a target URL is safe for scraping. It returns ErrBadScheme
// if the scheme is not http or https, ErrBadHost if the URL is nil or has an empty
// hostname, ErrBadIP if the hostname does not resolve to any allowed IP addresses,
// or nil if validation succeeds.
func CheckURL(ctx context.Context, target *url.URL) error {
if target == nil {
return ErrBadHost
Expand All @@ -115,6 +122,7 @@ func CheckURL(ctx context.Context, target *url.URL) error {
return err
}

// guardedDial dials a network connection by resolving the host and attempting to connect to each resolved IP address until one succeeds.
func guardedDial(ctx context.Context, network, address string) (net.Conn, error) {
host, port, err := net.SplitHostPort(address)
if err != nil {
Expand Down Expand Up @@ -142,6 +150,10 @@ func guardedDial(ctx context.Context, network, address string) (net.Conn, error)
return nil, ErrBadIP
}

// resolveHost resolves a hostname to a list of allowed IP addresses.
// The host parameter may be a hostname or a direct IP address.
// Returns the list of allowed addresses or an error if resolution fails
// or no allowed IPs remain.
func resolveHost(ctx context.Context, host string) ([]netip.Addr, error) {
if ip, err := netip.ParseAddr(host); err == nil {
return vetIPs([]netip.Addr{ip})
Expand All @@ -163,6 +175,7 @@ func resolveHost(ctx context.Context, host string) ([]netip.Addr, error) {
return vetIPs(ips)
}

// vetIPs filters the input IPs to keep only those that are allowed, returning the filtered slice or ErrBadIP if no IPs remain.
func vetIPs(ips []netip.Addr) ([]netip.Addr, error) {
allowed := make([]netip.Addr, 0, len(ips))
for _, ip := range ips {
Expand All @@ -178,6 +191,7 @@ func vetIPs(ips []netip.Addr) ([]netip.Addr, error) {
return allowed, nil
}

// isAllowedIP reports whether an IP address can be used. It returns true if the IP is valid, globally unicast, and not private, loopback, link-local, multicast, unspecified, or contained in a blocked network range; false otherwise.
func isAllowedIP(ip netip.Addr) bool {
if !ip.IsValid() || !ip.IsGlobalUnicast() || ip.IsPrivate() || ip.IsLoopback() || ip.IsLinkLocalUnicast() || ip.IsLinkLocalMulticast() || ip.IsMulticast() || ip.IsUnspecified() {
return false
Expand Down
5 changes: 5 additions & 0 deletions internal/scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,12 @@
MaxBytes int64
}

New creates a Scraper with the provided cache, user agent, timeout, and maximum markdown length, using default values for other configuration options.

Check failure on line 54 in internal/scraper/scraper.go

View workflow job for this annotation

GitHub Actions / Tests now - BOOORING

syntax error: non-declaration statement outside function body
func New(c *cache.Cache, ua string, timeout time.Duration, maxLen int) *Scraper {
return NewWithOptions(c, ua, timeout, Options{MaxLength: maxLen})
}

NewWithOptions creates a Scraper initialized with the provided cache, user agent, timeout, and options, applying defaults to unspecified option values.

Check failure on line 59 in internal/scraper/scraper.go

View workflow job for this annotation

GitHub Actions / Tests now - BOOORING

syntax error: non-declaration statement outside function body
func NewWithOptions(c *cache.Cache, ua string, timeout time.Duration, opts Options) *Scraper {
opts = fillOpts(opts)
return &Scraper{
Expand All @@ -69,6 +71,7 @@
}
}

// fillOpts returns opts with zero or negative fields populated with default values.
func fillOpts(opts Options) Options {
if opts.MaxURLs <= 0 {
opts.MaxURLs = DEFAULT_MAX_URLS
Expand Down Expand Up @@ -194,6 +197,7 @@
return res
}

// readCap reads from r and returns up to maxBytes of data, with a boolean indicating whether the stream contained additional unread data. If maxBytes is zero or negative, the entire stream is read with no truncation indication.
func readCap(r io.Reader, maxBytes int64) ([]byte, bool, error) {
if maxBytes <= 0 {
body, err := io.ReadAll(r)
Expand All @@ -210,6 +214,7 @@
return body[:maxBytes], true, nil
}

// Thanos truncates a string using middle truncation when it exceeds maxLen, keeping the start and end while inserting a truncation marker between them.
func Thanos(s string, maxLen int) string {
if maxLen <= 0 || len(s) <= maxLen {
return s
Expand Down
5 changes: 5 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ const (
IDLE_TIMEOUT = 120 * time.Second
)

// main initializes the server infrastructure, starts the HTTP listener with MCP endpoints, and handles graceful shutdown on system signals or startup errors.
func main() {
log := logger.Get()
cfg := config.Load()
Expand Down Expand Up @@ -93,10 +94,12 @@ func main() {
lightsOut(httpSrv, log)
}

// ListenAddr returns the network listen address from the configuration's bind host and port.
func listenAddr(cfg *config.Config) string {
return cfg.BindHost + ":" + cfg.Port
}

// buildMux builds an HTTP request multiplexer for serving MCP streaming, SSE, and health check endpoints.
func buildMux(srv *mcp.Server, log *logger.Logger) *http.ServeMux {
mcpHandler := mcp.NewStreamableHTTPHandler(func(*http.Request) *mcp.Server { return srv }, nil)
sseHandler := mcp.NewSSEHandler(func(*http.Request) *mcp.Server { return srv }, nil)
Expand All @@ -113,13 +116,15 @@ func buildMux(srv *mcp.Server, log *logger.Logger) *http.ServeMux {
return mux
}

// LegacySSE wraps an HTTP handler to serve the legacy SSE endpoint.
func legacySSE(next http.Handler, log *logger.Logger) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
log.Warn("http: legacy sse endpoint used path=%s", r.URL.Path)
next.ServeHTTP(w, r)
})
}

// LightsOut gracefully shuts down the HTTP server with a configured timeout and logs any error or completion message.
func lightsOut(srv *http.Server, log *logger.Logger) {
ctx, cancel := context.WithTimeout(context.Background(), SHUTDOWN_WAIT)
defer cancel()
Expand Down
1 change: 1 addition & 0 deletions tools/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ type ScrapeOutput struct {
FailureCount int `json:"failureCount"`
}

// ScrapeTool returns a configured MCP tool for fetching and converting URLs to Markdown content.
func ScrapeTool() *mcp.Tool {
return &mcp.Tool{
Name: SCRAPE_NAME,
Expand Down
1 change: 1 addition & 0 deletions tools/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type SourceOverlap struct {
Count int `json:"count"`
}

// SearchTool returns an MCP tool descriptor for the search capability.
func SearchTool() *mcp.Tool {
return &mcp.Tool{
Name: SEARCH_NAME,
Expand Down
Loading