Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

Lightweight Go sidecar that exposes [Degoog](../README.md) to LLMs via the [Model Context Protocol](https://modelcontextprotocol.io). Speaks modern MCP Streamable HTTP at `/mcp`, runs in a tiny `scratch` container, and gives any MCP-capable client two tools:

- **`search`** - fast meta-search, returns a concise text summary plus structured URLs, snippets, engine timings, cap metadata, and source overlap.
- **`search`** - fast meta-search, returns model-readable plain-text results plus structured URLs, snippets, engine timings, cap metadata, and source overlap.
Comment thread
coderabbitai[bot] marked this conversation as resolved.
- **`scrape`** - fetches URLs concurrently, returns clean Markdown plus one structured row per requested URL, including explicit error rows for failures.

**Still in beta.** Not intended for production use yet.
Expand Down Expand Up @@ -38,6 +38,7 @@ Listens on `4443` by default. Modern MCP endpoint at `/mcp`, healthcheck at `/he
| `DEGOOG_MCP_TIMEOUT` | `15s` | Per-request timeout for both Degoog calls and scraped URLs. |
| `DEGOOG_MCP_MAX_RESULTS` | `0` | Cap on merged `search` results (top-scored kept). `0` = no cap. Trims context for small-window models. Overridable per call. |
| `DEGOOG_MCP_ENGINES` | _(empty)_ | Comma-separated engine ids to restrict every `search` to (e.g. `brave,duckduckgo`). Empty = instance defaults. Overridable per call. |
| `DEGOOG_MCP_SEARCH_TEXT` | `none` | Visible `search` text. `full` returns breakdown + result rows + scrape guidance. `results` returns only titles, URLs, snippets, and scrape guidance. `breakdown` returns only query/cap/source metadata plus the visible-text/structuredContent explanation. `none` emits no visible search text and relies on `structuredContent`. |
| `DEGOOG_MCP_MAX_LENGTH` | `12000` | Max scraped-markdown length before head+tail truncation. |
| `DEGOOG_MCP_MAX_URLS` | `8` | Max URLs accepted by one `scrape` tool call. |
| `DEGOOG_MCP_SCRAPE_CONCURRENCY` | `4` | Max concurrent URL fetches inside one `scrape` call. |
Expand Down
115 changes: 99 additions & 16 deletions commands/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,43 +17,79 @@ import (
"degoog-mcp/tools"
)

func TestSearchHandlerReturnsConciseTextAndStructuredMetadata(t *testing.T) {
resp := degoog.Response{
func sampleSearchResponse() degoog.Response {
return degoog.Response{
Results: []degoog.Hit{
{Title: "a", URL: "https://a.example", Source: "brave", Score: 10, Sources: []string{"brave", "duckduckgo"}},
{Title: "b", URL: "https://b.example", Source: "brave", Score: 9, Sources: []string{"brave"}},
{Title: "c", URL: "https://c.example", Source: "bing", Score: 8, Sources: []string{"bing"}},
{Title: "First cast result", URL: "https://a.example/cast", Snippet: "Kurt Russell, Wyatt Russell, Anna Sawai, Kiersey Clemons, Ren Watabe, Mari Yamamoto, Anders Holm, Joe Tippett, and Elisa Lasowski star in the series.", Source: "brave", Score: 10, Sources: []string{"brave", "duckduckgo"}},
{Title: "Second cast result", URL: "https://b.example/cast", Snippet: "The Apple TV+ series includes Kurt Russell and Wyatt Russell as Lee Shaw, with Anna Sawai and Kiersey Clemons in lead roles.", Source: "brave", Score: 9, Sources: []string{"brave"}},
{Title: "Third cast result", URL: "https://c.example/cast", Snippet: "Cast and character guide for the MonsterVerse show.", Source: "bing", Score: 8, Sources: []string{"bing"}},
},
Query: "agent ergonomics",
Type: degoog.TYPE_WEB,
TotalTime: 123,
EngineTimings: []degoog.EngineTiming{{Name: "Brave", Time: 40, ResultCount: 2}},
}
}

func newSearchTestHandler(t *testing.T, cfg *config.Config) *searchHandler {
t.Helper()
resp := sampleSearchResponse()
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if err := json.NewEncoder(w).Encode(resp); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}))
defer srv.Close()
t.Cleanup(srv.Close)
return newSearchH(degoog.New(srv.URL, "", time.Second, 0), cfg)
}

h := newSearchH(degoog.New(srv.URL, "", time.Second, 0), &config.Config{})
func callSearchText(t *testing.T, h *searchHandler) (string, tools.SearchOutput, int) {
t.Helper()
call, out, err := h.handle(context.Background(), &mcp.CallToolRequest{}, tools.SearchInput{Query: "agent ergonomics", MaxResults: 2})
if err != nil {
t.Fatalf("handle: %v", err)
}
if call == nil || len(call.Content) != 1 {
t.Fatalf("expected concise text content, got %#v", call)
if call == nil {
t.Fatalf("expected call result")
}
if len(call.Content) == 0 {
return "", out, 0
}
text, ok := call.Content[0].(*mcp.TextContent)
if !ok {
t.Fatalf("content should be text, got %T", call.Content[0])
}
if strings.HasPrefix(strings.TrimSpace(text.Text), "{") {
t.Fatalf("text content should be a concise summary, not raw JSON: %s", text.Text)
}
if !strings.Contains(text.Text, "3 before maxResults cap; 1 dropped") {
t.Fatalf("summary missing cap metadata: %s", text.Text)
return text.Text, out, len(call.Content)
}

func TestSearchTextFullIncludesStatusResultsAndGuidance(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_FULL})
text, out, contentCount := callSearchText(t, h)

if contentCount != 1 {
t.Fatalf("expected one visible text block, got %d", contentCount)
}
if strings.HasPrefix(strings.TrimSpace(text), "{") {
t.Fatalf("visible text should be readable text, not raw JSON: %s", text)
}
for _, want := range []string{
"Degoog web search for",
"3 before maxResults cap; 1 dropped",
"Visible text:",
"structuredContent:",
"Results:",
"1. First cast result",
"https://a.example/cast",
"Kurt Russell",
"2. Second cast result",
"https://b.example/cast",
"If snippets are not enough",
"call scrape automatically",
} {
if !strings.Contains(text, want) {
t.Fatalf("full visible text missing %q: %s", want, text)
}
}
if out.Meta.ReturnedResults != 2 || out.Meta.ResultsBeforeCap != 3 || out.Meta.DroppedByCap != 1 || !out.Meta.CapApplied {
t.Fatalf("bad meta: %+v", out.Meta)
Expand All @@ -63,6 +99,50 @@ func TestSearchHandlerReturnsConciseTextAndStructuredMetadata(t *testing.T) {
}
}

func TestSearchTextModesAreComposable(t *testing.T) {
t.Run("results only", func(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_RESULTS})
text, _, contentCount := callSearchText(t, h)
if contentCount != 1 {
t.Fatalf("expected one visible text block, got %d", contentCount)
}
for _, want := range []string{"Results:", "1. First cast result", "https://a.example/cast", "call scrape automatically"} {
if !strings.Contains(text, want) {
t.Fatalf("results text missing %q: %s", want, text)
}
}
for _, notWant := range []string{"Degoog web search for", "Visible text:", "structuredContent:"} {
if strings.Contains(text, notWant) {
t.Fatalf("results text should not contain %q: %s", notWant, text)
}
}
})

t.Run("breakdown only", func(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_BREAKDOWN})
text, _, contentCount := callSearchText(t, h)
if contentCount != 1 {
t.Fatalf("expected one visible text block, got %d", contentCount)
}
for _, want := range []string{"Degoog web search for", "Visible text:", "structuredContent:"} {
if !strings.Contains(text, want) {
t.Fatalf("breakdown text missing %q: %s", want, text)
}
}
if strings.Contains(text, "1. First cast result") || strings.Contains(text, "Results:") {
t.Fatalf("breakdown text should not contain result rows: %s", text)
}
})

t.Run("none", func(t *testing.T) {
h := newSearchTestHandler(t, &config.Config{SearchText: config.SEARCH_TEXT_NONE})
text, _, contentCount := callSearchText(t, h)
if contentCount != 0 || text != "" {
t.Fatalf("none mode should emit no visible text, count=%d text=%q", contentCount, text)
}
})
}

func TestScrapeHelpersCountAndSummarizeFailures(t *testing.T) {
results := []scraper.Result{
{URL: "https://ok.example", Title: "ok", Content: "body"},
Expand Down Expand Up @@ -97,11 +177,14 @@ func TestRegisterCanDisableScrapeTool(t *testing.T) {

func TestToolDescriptionsGuideModelsAwayFromInventedScrapeURLs(t *testing.T) {
searchDesc := tools.SearchTool().Description
for _, want := range []string{"answer from snippets", "Do not invent URLs", "If scrape fails"} {
for _, want := range []string{"Visible text", "structuredContent", "answer from snippets", "Do not invent URLs", "If snippets are insufficient", "call scrape"} {
if !strings.Contains(searchDesc, want) {
t.Fatalf("search description missing %q: %s", want, searchDesc)
}
}
if strings.Contains(strings.ToLower(searchDesc), "concise summary") {
t.Fatalf("search description should not call visible text a summary: %s", searchDesc)
}

searchOnlyDesc := tools.SearchTool(false).Description
if !strings.Contains(searchOnlyDesc, "No scrape tool is available") {
Expand All @@ -114,7 +197,7 @@ func TestToolDescriptionsGuideModelsAwayFromInventedScrapeURLs(t *testing.T) {
}

scrapeDesc := tools.ScrapeTool().Description
for _, want := range []string{"Do not invent", "only URLs returned by search", "do not stop", "Tell the user which URLs failed"} {
for _, want := range []string{"Do not invent", "only URLs returned by search", "Use this automatically", "do not stop", "Tell the user which URLs failed"} {
if !strings.Contains(scrapeDesc, want) {
t.Fatalf("scrape description missing %q: %s", want, scrapeDesc)
}
Expand Down
88 changes: 84 additions & 4 deletions commands/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,15 @@ type searchHandler struct {
client *degoog.Client
defaultEngines []string
defaultMax int
searchText string
}

func newSearchH(c *degoog.Client, cfg *config.Config) *searchHandler {
return &searchHandler{
client: c,
defaultEngines: cfg.Engines,
defaultMax: cfg.MaxResults,
searchText: cfg.SearchText,
}
}

Expand Down Expand Up @@ -65,8 +67,8 @@ func (h *searchHandler) handle(ctx context.Context, req *mcp.CallToolRequest, in
SourceOverlap: sourceOverlap(resp.Results),
},
}
out.Summary = searchSummary(out)
return &mcp.CallToolResult{Content: []mcp.Content{&mcp.TextContent{Text: out.Summary}}}, out, nil
out.Summary = searchBreakdownLine(out)
return &mcp.CallToolResult{Content: searchVisibleContent(out, h.searchText)}, out, nil
}

func (h *searchHandler) pickEngines(in []string) []string {
Expand All @@ -83,7 +85,7 @@ func (h *searchHandler) pickMax(in int) int {
return h.defaultMax
}

func searchSummary(out tools.SearchOutput) string {
func searchBreakdownLine(out tools.SearchOutput) string {
parts := []string{fmt.Sprintf("Degoog %s search for %q returned %d result(s)", out.Type, out.Query, out.Meta.ReturnedResults)}
if out.Meta.ResultsBeforeCap > 0 && out.Meta.DroppedByCap > 0 {
parts = append(parts, fmt.Sprintf("%d before maxResults cap; %d dropped", out.Meta.ResultsBeforeCap, out.Meta.DroppedByCap))
Expand All @@ -97,7 +99,85 @@ func searchSummary(out tools.SearchOutput) string {
if len(out.Meta.SourceOverlap) > 0 {
parts = append(parts, "top sources: "+formatSourceOverlap(out.Meta.SourceOverlap, 5))
}
return strings.Join(parts, "; ") + ". Structured content contains full results, timings, related searches, and cap/source metadata; call scrape with selected URLs for article text."
return strings.Join(parts, "; ") + "."
}

func searchVisibleContent(out tools.SearchOutput, mode string) []mcp.Content {
text := searchVisibleText(out, mode)
if text == "" {
return []mcp.Content{}
}
return []mcp.Content{&mcp.TextContent{Text: text}}
}

func searchVisibleText(out tools.SearchOutput, mode string) string {
switch mode {
case config.SEARCH_TEXT_NONE:
return ""
case config.SEARCH_TEXT_BREAKDOWN:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation()}, "\n\n")
case config.SEARCH_TEXT_RESULTS:
return strings.Join([]string{searchResultsText(out), searchScrapeGuidance()}, "\n\n")
case config.SEARCH_TEXT_FULL, "":
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), searchScrapeGuidance()}, "\n\n")
default:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), searchScrapeGuidance()}, "\n\n")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Honor the documented none default for unset SearchText.

searchVisibleText currently treats mode == "" as full, but the config contract for this PR says the default is none. Any caller that passes a zero-value config.Config{} into newSearchH will emit visible text unexpectedly instead of suppressing it.

Proposed fix
 func searchVisibleText(out tools.SearchOutput, mode string) string {
 	switch mode {
-	case config.SEARCH_TEXT_NONE:
+	case "", config.SEARCH_TEXT_NONE:
 		return ""
 	case config.SEARCH_TEXT_BREAKDOWN:
 		return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation()}, "\n\n")
 	case config.SEARCH_TEXT_RESULTS:
 		return strings.Join([]string{searchResultsText(out), searchScrapeGuidance()}, "\n\n")
-	case config.SEARCH_TEXT_FULL, "":
+	case config.SEARCH_TEXT_FULL:
 		return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), searchScrapeGuidance()}, "\n\n")
 	default:
-		return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), searchScrapeGuidance()}, "\n\n")
+		return ""
 	}
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
func searchVisibleText(out tools.SearchOutput, mode string) string {
switch mode {
case config.SEARCH_TEXT_NONE:
return ""
case config.SEARCH_TEXT_BREAKDOWN:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation()}, "\n\n")
case config.SEARCH_TEXT_RESULTS:
return strings.Join([]string{searchResultsText(out), searchScrapeGuidance()}, "\n\n")
case config.SEARCH_TEXT_FULL, "":
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), searchScrapeGuidance()}, "\n\n")
default:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), searchScrapeGuidance()}, "\n\n")
func searchVisibleText(out tools.SearchOutput, mode string) string {
switch mode {
case "", config.SEARCH_TEXT_NONE:
return ""
case config.SEARCH_TEXT_BREAKDOWN:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation()}, "\n\n")
case config.SEARCH_TEXT_RESULTS:
return strings.Join([]string{searchResultsText(out), searchScrapeGuidance()}, "\n\n")
case config.SEARCH_TEXT_FULL:
return strings.Join([]string{searchBreakdownLine(out), searchOutputExplanation(), searchResultsText(out), searchScrapeGuidance()}, "\n\n")
default:
return ""
}
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@commands/search.go` around lines 113 - 124, The default handling in
searchVisibleText is wrong: it currently maps an empty SearchText mode to full
output instead of the documented none behavior. Update the mode switch in
searchVisibleText so the zero-value/unset case follows config.SEARCH_TEXT_NONE
(and keep only the explicit full mode rendering all sections), ensuring callers
like newSearchH with config.Config{} do not emit visible text unexpectedly.

}
}

func searchOutputExplanation() string {
return "Visible text: model-readable search context. structuredContent: exact JSON with full result objects, engine timings, related searches, and cap/source metadata. Use visible text when your client/model does not reliably expose structuredContent; use structuredContent when exact fields are available."
}

func searchScrapeGuidance() string {
return "Use snippets for simple answers. If snippets are not enough, call scrape automatically on the most relevant returned URLs; do not ask permission unless your client requires it."
}

func searchResultsText(out tools.SearchOutput) string {
if len(out.Results) == 0 {
return "Results: none returned."
}

var b strings.Builder
b.WriteString("Results:\n")
for i, r := range out.Results {
title := cleanLine(r.Title)
if title == "" {
title = "Untitled result"
}
b.WriteString(fmt.Sprintf("%d. %s\n", i+1, title))
if strings.TrimSpace(r.URL) != "" {
b.WriteString(fmt.Sprintf(" URL: %s\n", strings.TrimSpace(r.URL)))
}
if len(r.Sources) > 0 {
b.WriteString(fmt.Sprintf(" Sources: %s\n", strings.Join(r.Sources, ", ")))
} else if strings.TrimSpace(r.Source) != "" {
b.WriteString(fmt.Sprintf(" Source: %s\n", strings.TrimSpace(r.Source)))
}
if snippet := cleanLine(r.Snippet); snippet != "" {
b.WriteString(fmt.Sprintf(" Snippet: %s\n", snippet))
}
if i < len(out.Results)-1 {
b.WriteString("\n")
}
}

if len(out.RelatedSearches) > 0 {
b.WriteString("\n\nRelated searches:\n")
for _, related := range out.RelatedSearches {
if clean := cleanLine(related); clean != "" {
b.WriteString("- ")
b.WriteString(clean)
b.WriteString("\n")
}
}
}

return b.String()
}

func cleanLine(s string) string {
return strings.Join(strings.Fields(s), " ")
}

func sourceOverlap(results []degoog.Hit) []tools.SourceOverlap {
Expand Down
23 changes: 23 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ const (
ENV_ENGINES = "DEGOOG_MCP_ENGINES"
ENV_AUTH_TOKEN = "DEGOOG_MCP_AUTH_TOKEN"
ENV_DISABLE_SCRAPE = "DEGOOG_MCP_DISABLE_SCRAPE"
ENV_SEARCH_TEXT = "DEGOOG_MCP_SEARCH_TEXT"

SEARCH_TEXT_FULL = "full"
SEARCH_TEXT_RESULTS = "results"
SEARCH_TEXT_BREAKDOWN = "breakdown"
SEARCH_TEXT_NONE = "none"

DEFAULT_BIND_HOST = ""
DEFAULT_PORT = "4443"
Expand All @@ -39,6 +45,7 @@ const (
DEFAULT_USER_AGENT = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0 Safari/537.36"
DEFAULT_DEGOOG_URL = "http://degoog:4444"
DEFAULT_MAX_RESULTS = 0
DEFAULT_SEARCH_TEXT = SEARCH_TEXT_NONE

LIST_SEP = ","
)
Expand All @@ -60,6 +67,7 @@ type Config struct {
Engines []string
AuthToken string
DisableScrape bool
SearchText string
}

func Load() *Config {
Expand All @@ -80,6 +88,21 @@ func Load() *Config {
Engines: readList(ENV_ENGINES),
AuthToken: strings.TrimSpace(os.Getenv(ENV_AUTH_TOKEN)),
DisableScrape: readBool(ENV_DISABLE_SCRAPE, false),
SearchText: readSearchText(ENV_SEARCH_TEXT, DEFAULT_SEARCH_TEXT),
}
}

func readSearchText(key, def string) string {
v := strings.ToLower(strings.TrimSpace(os.Getenv(key)))
if v == "" {
return def
}
switch v {
case SEARCH_TEXT_FULL, SEARCH_TEXT_RESULTS, SEARCH_TEXT_BREAKDOWN, SEARCH_TEXT_NONE:
return v
default:
logger.Get().Warn("config: invalid search text for %s=%q, falling back to %s", key, os.Getenv(key), def)
return def
}
}

Expand Down
Loading
Loading