Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

Lightweight Go sidecar that exposes [Degoog](../README.md) to LLMs via the [Model Context Protocol](https://modelcontextprotocol.io). Speaks modern MCP Streamable HTTP at `/mcp`, runs in a tiny `scratch` container, and gives any MCP-capable client two tools:

- **`search`** - fast meta-search, returns a concise text summary plus structured URLs, snippets, engine timings, cap metadata, and source overlap.
- **`search`** - fast meta-search, returns model-readable plain-text results plus structured URLs, snippets, engine timings, cap metadata, and source overlap.
Comment thread
coderabbitai[bot] marked this conversation as resolved.
- **`scrape`** - fetches URLs concurrently, returns clean Markdown plus one structured row per requested URL, including explicit error rows for failures.

**Still in beta.** Not intended for production use yet.
Expand Down Expand Up @@ -38,6 +38,7 @@ Listens on `4443` by default. Modern MCP endpoint at `/mcp`, healthcheck at `/he
| `DEGOOG_MCP_TIMEOUT` | `15s` | Per-request timeout for both Degoog calls and scraped URLs. |
| `DEGOOG_MCP_MAX_RESULTS` | `0` | Cap on merged `search` results (top-scored kept). `0` = no cap. Trims context for small-window models. Overridable per call. |
| `DEGOOG_MCP_ENGINES` | _(empty)_ | Comma-separated engine ids to restrict every `search` to (e.g. `brave,duckduckgo`). Empty = instance defaults. Overridable per call. |
| `DEGOOG_MCP_SEARCH_TEXT` | `none` | Visible `search` text. `full` returns breakdown + result rows + scrape guidance. `results` returns only titles, URLs, snippets, and scrape guidance. `breakdown` returns query/cap/source metadata, visible-text/structuredContent explanation, and scrape guidance. `none` emits no visible search text and relies on `structuredContent`; `structuredContent.summary` still includes follow-up guidance. |
| `DEGOOG_MCP_MAX_LENGTH` | `12000` | Max scraped-markdown length before head+tail truncation. |
| `DEGOOG_MCP_MAX_URLS` | `8` | Max URLs accepted by one `scrape` tool call. |
| `DEGOOG_MCP_SCRAPE_CONCURRENCY` | `4` | Max concurrent URL fetches inside one `scrape` call. |
Expand Down
137 changes: 121 additions & 16 deletions commands/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,43 +17,79 @@ import (
"degoog-mcp/tools"
)

func TestSearchHandlerReturnsConciseTextAndStructuredMetadata(t *testing.T) {
resp := degoog.Response{
func sampleSearchResponse() degoog.Response {
return degoog.Response{
Results: []degoog.Hit{
{Title: "a", URL: "https://a.example", Source: "brave", Score: 10, Sources: []string{"brave", "duckduckgo"}},
{Title: "b", URL: "https://b.example", Source: "brave", Score: 9, Sources: []string{"brave"}},
{Title: "c", URL: "https://c.example", Source: "bing", Score: 8, Sources: []string{"bing"}},
{Title: "First cast result", URL: "https://a.example/cast", Snippet: "Kurt Russell, Wyatt Russell, Anna Sawai, Kiersey Clemons, Ren Watabe, Mari Yamamoto, Anders Holm, Joe Tippett, and Elisa Lasowski star in the series.", Source: "brave", Score: 10, Sources: []string{"brave", "duckduckgo"}},
{Title: "Second cast result", URL: "https://b.example/cast", Snippet: "The Apple TV+ series includes Kurt Russell and Wyatt Russell as Lee Shaw, with Anna Sawai and Kiersey Clemons in lead roles.", Source: "brave", Score: 9, Sources: []string{"brave"}},
{Title: "Third cast result", URL: "https://c.example/cast", Snippet: "Cast and character guide for the MonsterVerse show.", Source: "bing", Score: 8, Sources: []string{"bing"}},
},
Query: "agent ergonomics",
Type: degoog.TYPE_WEB,
TotalTime: 123,
EngineTimings: []degoog.EngineTiming{{Name: "Brave", Time: 40, ResultCount: 2}},
}
}

func newSearchTestHandler(t *testing.T, cfg *config.Config) *searchHandler {
t.Helper()
resp := sampleSearchResponse()
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if err := json.NewEncoder(w).Encode(resp); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}))
defer srv.Close()
t.Cleanup(srv.Close)
return newSearchH(degoog.New(srv.URL, "", time.Second, 0), cfg)
}

h := newSearchH(degoog.New(srv.URL, "", time.Second, 0), &config.Config{})
func callSearchText(t *testing.T, h *searchHandler) (string, tools.SearchOutput, int) {
t.Helper()
call, out, err := h.handle(context.Background(), &mcp.CallToolRequest{}, tools.SearchInput{Query: "agent ergonomics", MaxResults: 2})
if err != nil {
t.Fatalf("handle: %v", err)
}
if call == nil || len(call.Content) != 1 {
t.Fatalf("expected concise text content, got %#v", call)
if call == nil {
t.Fatalf("expected call result")
}
if len(call.Content) == 0 {
return "", out, 0
}
text, ok := call.Content[0].(*mcp.TextContent)
if !ok {
t.Fatalf("content should be text, got %T", call.Content[0])
}
if strings.HasPrefix(strings.TrimSpace(text.Text), "{") {
t.Fatalf("text content should be a concise summary, not raw JSON: %s", text.Text)
}
if !strings.Contains(text.Text, "3 before maxResults cap; 1 dropped") {
t.Fatalf("summary missing cap metadata: %s", text.Text)
return text.Text, out, len(call.Content)
}

func TestSearchTextFullIncludesStatusResultsAndGuidance(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_FULL})
text, out, contentCount := callSearchText(t, h)

if contentCount != 1 {
t.Fatalf("expected one visible text block, got %d", contentCount)
}
if strings.HasPrefix(strings.TrimSpace(text), "{") {
t.Fatalf("visible text should be readable text, not raw JSON: %s", text)
}
for _, want := range []string{
"Degoog web search for",
"3 before maxResults cap; 1 dropped",
"Visible text:",
"structuredContent:",
"Results:",
"1. First cast result",
"https://a.example/cast",
"Kurt Russell",
"2. Second cast result",
"https://b.example/cast",
"For research or when snippets are insufficient",
"call scrape automatically",
} {
if !strings.Contains(text, want) {
t.Fatalf("full visible text missing %q: %s", want, text)
}
}
if out.Meta.ReturnedResults != 2 || out.Meta.ResultsBeforeCap != 3 || out.Meta.DroppedByCap != 1 || !out.Meta.CapApplied {
t.Fatalf("bad meta: %+v", out.Meta)
Expand All @@ -63,6 +99,72 @@ func TestSearchHandlerReturnsConciseTextAndStructuredMetadata(t *testing.T) {
}
}

func TestSearchTextModesAreComposable(t *testing.T) {
t.Run("results only", func(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_RESULTS})
text, _, contentCount := callSearchText(t, h)
if contentCount != 1 {
t.Fatalf("expected one visible text block, got %d", contentCount)
}
for _, want := range []string{"Results:", "1. First cast result", "https://a.example/cast", "call scrape automatically"} {
if !strings.Contains(text, want) {
t.Fatalf("results text missing %q: %s", want, text)
}
}
for _, notWant := range []string{"Degoog web search for", "Visible text:", "structuredContent:"} {
if strings.Contains(text, notWant) {
t.Fatalf("results text should not contain %q: %s", notWant, text)
}
}
})

t.Run("breakdown only", func(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_BREAKDOWN})
text, out, contentCount := callSearchText(t, h)
if contentCount != 1 {
t.Fatalf("expected one visible text block, got %d", contentCount)
}
for _, want := range []string{"Degoog web search for", "Visible text:", "structuredContent:", "call scrape automatically"} {
if !strings.Contains(text, want) {
t.Fatalf("breakdown text missing %q: %s", want, text)
}
}
if !strings.Contains(out.Summary, "call scrape automatically") {
t.Fatalf("structured summary should include follow-up guidance: %s", out.Summary)
}
if strings.Contains(text, "1. First cast result") || strings.Contains(text, "Results:") {
t.Fatalf("breakdown text should not contain result rows: %s", text)
}
})

t.Run("none", func(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_NONE})
text, out, contentCount := callSearchText(t, h)
if contentCount != 0 || text != "" {
t.Fatalf("none mode should emit no visible text, count=%d text=%q", contentCount, text)
}
if !strings.Contains(out.Summary, "call scrape automatically") {
t.Fatalf("none mode structured summary should still guide MCP clients that read structuredContent: %s", out.Summary)
}
})

t.Run("scrape disabled", func(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_BREAKDOWN, DisableScrape: true})
text, out, contentCount := callSearchText(t, h)
if contentCount != 1 {
t.Fatalf("expected one visible text block, got %d", contentCount)
}
for _, got := range []string{text, out.Summary} {
if strings.Contains(got, "call scrape") {
t.Fatalf("scrape-disabled guidance should not tell agents to call scrape: %s", got)
}
if !strings.Contains(got, "No scrape tool is available") {
t.Fatalf("scrape-disabled guidance should explain scrape is unavailable: %s", got)
}
}
})
}

func TestScrapeHelpersCountAndSummarizeFailures(t *testing.T) {
results := []scraper.Result{
{URL: "https://ok.example", Title: "ok", Content: "body"},
Expand Down Expand Up @@ -97,11 +199,14 @@ func TestRegisterCanDisableScrapeTool(t *testing.T) {

func TestToolDescriptionsGuideModelsAwayFromInventedScrapeURLs(t *testing.T) {
searchDesc := tools.SearchTool().Description
for _, want := range []string{"answer from snippets", "Do not invent URLs", "If scrape fails"} {
for _, want := range []string{"Visible text", "structuredContent", "answer from snippets", "Do not invent URLs", "If snippets are insufficient", "call scrape"} {
if !strings.Contains(searchDesc, want) {
t.Fatalf("search description missing %q: %s", want, searchDesc)
}
}
if strings.Contains(strings.ToLower(searchDesc), "concise summary") {
t.Fatalf("search description should not call visible text a summary: %s", searchDesc)
}

searchOnlyDesc := tools.SearchTool(false).Description
if !strings.Contains(searchOnlyDesc, "No scrape tool is available") {
Expand All @@ -114,7 +219,7 @@ func TestToolDescriptionsGuideModelsAwayFromInventedScrapeURLs(t *testing.T) {
}

scrapeDesc := tools.ScrapeTool().Description
for _, want := range []string{"Do not invent", "only URLs returned by search", "do not stop", "Tell the user which URLs failed"} {
for _, want := range []string{"Do not invent", "only URLs returned by search", "Use this automatically", "do not stop", "Tell the user which URLs failed"} {
if !strings.Contains(scrapeDesc, want) {
t.Fatalf("scrape description missing %q: %s", want, scrapeDesc)
}
Expand Down
98 changes: 94 additions & 4 deletions commands/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@ type searchHandler struct {
client *degoog.Client
defaultEngines []string
defaultMax int
searchText string
scrapeEnabled bool
}

func newSearchH(c *degoog.Client, cfg *config.Config) *searchHandler {
return &searchHandler{
client: c,
defaultEngines: cfg.Engines,
defaultMax: cfg.MaxResults,
searchText: cfg.SearchText,
scrapeEnabled: !cfg.DisableScrape,
}
}

Expand Down Expand Up @@ -65,8 +69,8 @@ func (h *searchHandler) handle(ctx context.Context, req *mcp.CallToolRequest, in
SourceOverlap: sourceOverlap(resp.Results),
},
}
out.Summary = searchSummary(out)
return &mcp.CallToolResult{Content: []mcp.Content{&mcp.TextContent{Text: out.Summary}}}, out, nil
out.Summary = searchStructuredSummary(out, h.scrapeEnabled)
return &mcp.CallToolResult{Content: searchVisibleContent(out, h.searchText, h.scrapeEnabled)}, out, nil
}

func (h *searchHandler) pickEngines(in []string) []string {
Expand All @@ -83,7 +87,7 @@ func (h *searchHandler) pickMax(in int) int {
return h.defaultMax
}

func searchSummary(out tools.SearchOutput) string {
func searchBreakdownLine(out tools.SearchOutput) string {
parts := []string{fmt.Sprintf("Degoog %s search for %q returned %d result(s)", out.Type, out.Query, out.Meta.ReturnedResults)}
if out.Meta.ResultsBeforeCap > 0 && out.Meta.DroppedByCap > 0 {
parts = append(parts, fmt.Sprintf("%d before maxResults cap; %d dropped", out.Meta.ResultsBeforeCap, out.Meta.DroppedByCap))
Expand All @@ -97,7 +101,93 @@ func searchSummary(out tools.SearchOutput) string {
if len(out.Meta.SourceOverlap) > 0 {
parts = append(parts, "top sources: "+formatSourceOverlap(out.Meta.SourceOverlap, 5))
}
return strings.Join(parts, "; ") + ". Structured content contains full results, timings, related searches, and cap/source metadata; call scrape with selected URLs for article text."
return strings.Join(parts, "; ") + "."
}

func searchVisibleContent(out tools.SearchOutput, mode string, scrapeEnabled bool) []mcp.Content {
text := searchVisibleText(out, mode, scrapeEnabled)
if text == "" {
return []mcp.Content{}
}
return []mcp.Content{&mcp.TextContent{Text: text}}
}

func searchVisibleText(out tools.SearchOutput, mode string, scrapeEnabled bool) string {
guidance := searchFollowupGuidance(scrapeEnabled)
switch mode {
case config.SEARCH_TEXT_NONE:
return ""
case config.SEARCH_TEXT_BREAKDOWN:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), guidance}, "\n\n")
case config.SEARCH_TEXT_RESULTS:
return strings.Join([]string{searchResultsText(out), guidance}, "\n\n")
case config.SEARCH_TEXT_FULL, "":
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), guidance}, "\n\n")
default:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), guidance}, "\n\n")
}
}

func searchOutputExplanation() string {
return "Visible text: model-readable search context. structuredContent: exact JSON with full result objects, engine timings, related searches, cap/source metadata, and follow-up guidance. Use visible text when your client/model does not reliably expose structuredContent; use structuredContent when exact fields are available."
}

func searchStructuredSummary(out tools.SearchOutput, scrapeEnabled bool) string {
return searchBreakdownLine(out) + " " + searchFollowupGuidance(scrapeEnabled)
}

func searchFollowupGuidance(scrapeEnabled bool) string {
if !scrapeEnabled {
return "No scrape tool is available on this MCP server. Use returned snippets, titles, URLs, related searches, and source metadata as the available evidence."
}
return "Use snippets for simple answers. For research or when snippets are insufficient, call scrape automatically on the most relevant returned URLs from this search; do not ask permission unless your client requires it."
}

func searchResultsText(out tools.SearchOutput) string {
if len(out.Results) == 0 {
return "Results: none returned."
}

var b strings.Builder
b.WriteString("Results:\n")
for i, r := range out.Results {
title := cleanLine(r.Title)
if title == "" {
title = "Untitled result"
}
b.WriteString(fmt.Sprintf("%d. %s\n", i+1, title))
if strings.TrimSpace(r.URL) != "" {
b.WriteString(fmt.Sprintf(" URL: %s\n", strings.TrimSpace(r.URL)))
}
if len(r.Sources) > 0 {
b.WriteString(fmt.Sprintf(" Sources: %s\n", strings.Join(r.Sources, ", ")))
} else if strings.TrimSpace(r.Source) != "" {
b.WriteString(fmt.Sprintf(" Source: %s\n", strings.TrimSpace(r.Source)))
}
if snippet := cleanLine(r.Snippet); snippet != "" {
b.WriteString(fmt.Sprintf(" Snippet: %s\n", snippet))
}
if i < len(out.Results)-1 {
b.WriteString("\n")
}
}

if len(out.RelatedSearches) > 0 {
b.WriteString("\n\nRelated searches:\n")
for _, related := range out.RelatedSearches {
if clean := cleanLine(related); clean != "" {
b.WriteString("- ")
b.WriteString(clean)
b.WriteString("\n")
}
}
}

return b.String()
}

func cleanLine(s string) string {
return strings.Join(strings.Fields(s), " ")
}

func sourceOverlap(results []degoog.Hit) []tools.SourceOverlap {
Expand Down
23 changes: 23 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ const (
ENV_ENGINES = "DEGOOG_MCP_ENGINES"
ENV_AUTH_TOKEN = "DEGOOG_MCP_AUTH_TOKEN"
ENV_DISABLE_SCRAPE = "DEGOOG_MCP_DISABLE_SCRAPE"
ENV_SEARCH_TEXT = "DEGOOG_MCP_SEARCH_TEXT"

SEARCH_TEXT_FULL = "full"
SEARCH_TEXT_RESULTS = "results"
SEARCH_TEXT_BREAKDOWN = "breakdown"
SEARCH_TEXT_NONE = "none"

DEFAULT_BIND_HOST = ""
DEFAULT_PORT = "4443"
Expand All @@ -39,6 +45,7 @@ const (
DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
DEFAULT_DEGOOG_URL = "http://degoog:4444"
DEFAULT_MAX_RESULTS = 0
DEFAULT_SEARCH_TEXT = SEARCH_TEXT_NONE

LIST_SEP = ","
)
Expand All @@ -60,6 +67,7 @@ type Config struct {
Engines []string
AuthToken string
DisableScrape bool
SearchText string
}

func Load() *Config {
Expand All @@ -80,6 +88,21 @@ func Load() *Config {
Engines: readList(ENV_ENGINES),
AuthToken: strings.TrimSpace(os.Getenv(ENV_AUTH_TOKEN)),
DisableScrape: readBool(ENV_DISABLE_SCRAPE, false),
SearchText: readSearchText(ENV_SEARCH_TEXT, DEFAULT_SEARCH_TEXT),
}
}

func readSearchText(key, def string) string {
v := strings.ToLower(strings.TrimSpace(os.Getenv(key)))
if v == "" {
return def
}
switch v {
case SEARCH_TEXT_FULL, SEARCH_TEXT_RESULTS, SEARCH_TEXT_BREAKDOWN, SEARCH_TEXT_NONE:
return v
default:
logger.Get().Warn("config: invalid search text for %s=%q, falling back to %s", key, os.Getenv(key), def)
return def
}
}

Expand Down
Loading
Loading