askalf · askalf · Jun 9, 2026 · Jun 9, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 
 ## [Unreleased]
 
+### Added — three more keyless research adapters
+
+- **`--search=hackernews`** (alias `hn`, `src/search/hackernews.ts`) — Algolia-hosted HN search, no key. Community discussion / release threads; Ask/Show HN posts fall back to the HN thread URL. Snippet shows points/comments.
+- **`--search=stackexchange`** (aliases `stackoverflow`/`so`, `src/search/stackexchange.ts`) — Q&A search, no key (throttled). Default site `stackoverflow`; `DEEPDIVE_STACKEXCHANGE_SITE` selects another (serverfault, superuser, …). Entity-decoded titles; snippet shows score/answers/accepted.
+- **`--search=pubmed`** (`src/search/pubmed.ts`) — biomedical literature via NCBI E-utilities (esearch → esummary), no key. Kept sources are abstract pages; snippet shows authors/journal/date.
+
+All three reuse `searchTimeoutSignal`, keep their transform in an exported pure mapper, and were verified against the live APIs. deepdive now ships 12 search adapters.
+
 ### Added — `--since` recency filter
 
 - **`--since=<date|duration>`** (env `DEEPDIVE_SINCE`) — drop fetched sources published before a cutoff, building on v0.14's published-date extraction. Accepts an absolute date (`2024`, `2024-06`, `2024-06-15`) or a relative duration meaning "that long ago" (`30d`, `12h`, `2w`). A web source whose detected publication date precedes the cutoff is skipped (new `stale` `fetch.skipped` reason); sources with no detectable date are kept (no penalty for missing metadata). Doesn't apply to `--include` / `continue` sources. New pure `resolveSince` (exported); persistable as `since` in the config file. A supplied-but-unparseable `--since` is a hard error (exit 2), not a silent no-op.

diff --git a/README.md b/README.md
@@ -424,6 +424,9 @@ One adapter per backend. Default (DuckDuckGo) needs no key.
 | Wikipedia | `--search=wikipedia` | nothing | Encyclopedia-first. Best for definitional / factual sub-queries. Language via `DEEPDIVE_WIKIPEDIA_LANG` (default `en`). |
 | arXiv | `--search=arxiv` | nothing | Research-paper / preprint search via the arXiv API. Kept sources are abstract pages; the PDF path handles linked PDFs. |
 | GitHub | `--search=github` | `DEEPDIVE_GITHUB_TOKEN` (optional) | Repository search — "what project does X". Works keyless at 60 req/hr; the token raises the limit. |
+| Hacker News | `--search=hackernews` | nothing | Algolia-hosted HN search. Community discussion, release threads, primary sources. Snippet shows points/comments. |
+| Stack Exchange | `--search=stackexchange` | nothing | Q&A search (default `stackoverflow`; `DEEPDIVE_STACKEXCHANGE_SITE` for serverfault/superuser/etc). Keyless (throttled). |
+| PubMed | `--search=pubmed` | nothing | Biomedical literature via NCBI E-utilities. Kept sources are abstract pages; snippet shows authors/journal/date. |
 
 Adding a new adapter is ~30 lines: implement `SearchAdapter` in `src/search/*.ts`, register in `src/search.ts`. The full contract + a copy-paste scaffold live in [docs/search-adapter.md](docs/search-adapter.md).
 

diff --git a/src/cli.ts b/src/cli.ts
@@ -94,12 +94,12 @@ Flags:
                                 Env: DEEPDIVE_MAX_COST. Exit code 2 on cap-hit.
   --max-tokens=<n>              Output max tokens per LLM call. Default: 4096
   --search=<adapter>            Search adapter: duckduckgo | searxng | brave | tavily | exa |
-                                auto | wikipedia | arxiv | github
-                                Default: duckduckgo (no key required). wikipedia and arxiv
-                                need no key; github works keyless (set DEEPDIVE_GITHUB_TOKEN
-                                for a higher rate limit). 'auto' runs DDG first and falls
-                                back to Brave (if DEEPDIVE_BRAVE_KEY is set) on failure or
-                                empty results.
+                                auto | wikipedia | arxiv | github | hackernews |
+                                stackexchange | pubmed
+                                Default: duckduckgo (no key required). wikipedia, arxiv,
+                                hackernews, stackexchange, and pubmed need no key; github
+                                works keyless (DEEPDIVE_GITHUB_TOKEN raises the limit).
+                                'auto' runs DDG first, Brave fallback (if DEEPDIVE_BRAVE_KEY).
   --results-per-query=<n>       Results per sub-query. Default: 5
   --max-sources=<n>             Total sources to fetch. Default: 12
   --max-words-per-source=<n>    Per-source content cap before synthesis. Default: 2000
@@ -159,7 +159,7 @@ Flags:
 Environment:
   DEEPDIVE_BASE_URL, DEEPDIVE_API_KEY, DEEPDIVE_MODEL, DEEPDIVE_SEARCH,
   DEEPDIVE_SEARXNG_URL, DEEPDIVE_BRAVE_KEY, DEEPDIVE_TAVILY_KEY, DEEPDIVE_EXA_KEY,
-  DEEPDIVE_WIKIPEDIA_LANG, DEEPDIVE_GITHUB_TOKEN,
+  DEEPDIVE_WIKIPEDIA_LANG, DEEPDIVE_GITHUB_TOKEN, DEEPDIVE_STACKEXCHANGE_SITE,
   DEEPDIVE_MAX_SOURCES, DEEPDIVE_FETCH_TIMEOUT_MS, DEEPDIVE_HEADED,
   DEEPDIVE_DEEP_ROUNDS, DEEPDIVE_CONCURRENCY, DEEPDIVE_NO_CACHE,
   DEEPDIVE_CACHE_DIR, DEEPDIVE_CACHE_TTL_MS, DEEPDIVE_JSON, DEEPDIVE_VERBOSE, DEEPDIVE_TLDR,

diff --git a/src/search.ts b/src/search.ts
@@ -76,6 +76,23 @@ export async function resolveSearchAdapter(
       const { GitHubSearch } = await import("./search/github.js");
       return new GitHubSearch(env.DEEPDIVE_GITHUB_TOKEN);
     }
+    case "hackernews":
+    case "hn": {
+      const { HackerNewsSearch } = await import("./search/hackernews.js");
+      return new HackerNewsSearch();
+    }
+    case "stackexchange":
+    case "stackoverflow":
+    case "so": {
+      const { StackExchangeSearch } = await import("./search/stackexchange.js");
+      // Default site stackoverflow; override with DEEPDIVE_STACKEXCHANGE_SITE.
+      const site = (env.DEEPDIVE_STACKEXCHANGE_SITE ?? "stackoverflow").trim() || "stackoverflow";
+      return new StackExchangeSearch(site);
+    }
+    case "pubmed": {
+      const { PubMedSearch } = await import("./search/pubmed.js");
+      return new PubMedSearch();
+    }
     case "auto": {
       // DDG primary, Brave fallback. Brave is optional — if no key is set,
       // `auto` degrades to DDG-only (the pre-auto default behavior) rather

diff --git a/src/search/hackernews.ts b/src/search/hackernews.ts
@@ -0,0 +1,56 @@
+// Hacker News search adapter. No API key. Uses the Algolia-hosted HN Search
+// API (hn.algolia.com). A fit for "what does the community think of X",
+// release discussions, and primary-source threads. Defaults to story results;
+// the kept source is the story's target URL (or the HN thread itself for
+// Ask/Show HN posts with no external link).
+
+import { searchTimeoutSignal, type SearchAdapter, type SearchResult } from "../search.js";
+
+interface HNHit {
+  objectID?: string;
+  title?: string | null;
+  url?: string | null;
+  points?: number | null;
+  num_comments?: number | null;
+  author?: string | null;
+}
+
+export class HackerNewsSearch implements SearchAdapter {
+  readonly name = "hackernews";
+
+  async search(query: string, limit: number, signal?: AbortSignal): Promise<SearchResult[]> {
+    const url = new URL("https://hn.algolia.com/api/v1/search");
+    url.searchParams.set("query", query);
+    url.searchParams.set("tags", "story");
+    url.searchParams.set("hitsPerPage", String(Math.min(Math.max(limit, 1), 50)));
+    const res = await fetch(url, {
+      headers: { accept: "application/json", "user-agent": "deepdive (+https://github.com/askalf/deepdive)" },
+      signal: searchTimeoutSignal(signal),
+    });
+    if (!res.ok) throw new Error(`hackernews ${res.status} ${res.statusText}`);
+    const json = (await res.json()) as { hits?: HNHit[] };
+    return mapHNHits(json.hits ?? [], limit);
+  }
+}
+
+// Exported for unit tests. Pure mapper. Ask/Show HN posts have a null `url`;
+// fall back to the HN thread page so there's always something to fetch.
+export function mapHNHits(hits: HNHit[], limit: number): SearchResult[] {
+  return hits
+    .filter((h) => (h.title ?? "").length > 0 && (h.url || h.objectID))
+    .slice(0, limit)
+    .map((h, i) => ({
+      url: h.url || `https://news.ycombinator.com/item?id=${h.objectID}`,
+      title: h.title as string,
+      snippet: hnSnippet(h),
+      rank: i + 1,
+    }));
+}
+
+function hnSnippet(h: HNHit): string {
+  const parts: string[] = [];
+  if (typeof h.points === "number") parts.push(`${h.points} points`);
+  if (typeof h.num_comments === "number") parts.push(`${h.num_comments} comments`);
+  if (h.author) parts.push(`by ${h.author}`);
+  return parts.join(" · ");
+}
diff --git a/src/search/pubmed.ts b/src/search/pubmed.ts
@@ -0,0 +1,87 @@
+// PubMed search adapter. No API key (NCBI E-utilities allow ~3 req/s
+// unauthenticated). Two-step: esearch returns matching PMIDs, esummary
+// returns their metadata. A fit for biomedical / life-sciences questions
+// where peer-reviewed literature is the authoritative source. The kept source
+// is the PubMed abstract page.
+
+import { searchTimeoutSignal, type SearchAdapter, type SearchResult } from "../search.js";
+
+const BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils";
+
+interface ESummaryDoc {
+  uid?: string;
+  title?: string;
+  pubdate?: string;
+  source?: string;
+  authors?: { name?: string }[];
+}
+
+export class PubMedSearch implements SearchAdapter {
+  readonly name = "pubmed";
+
+  async search(query: string, limit: number, signal?: AbortSignal): Promise<SearchResult[]> {
+    const n = Math.min(Math.max(limit, 1), 50);
+    const sig = searchTimeoutSignal(signal);
+
+    const esearch = new URL(`${BASE}/esearch.fcgi`);
+    esearch.searchParams.set("db", "pubmed");
+    esearch.searchParams.set("term", query);
+    esearch.searchParams.set("retmax", String(n));
+    esearch.searchParams.set("retmode", "json");
+    esearch.searchParams.set("sort", "relevance");
+    const sres = await fetch(esearch, {
+      headers: { accept: "application/json", "user-agent": "deepdive (+https://github.com/askalf/deepdive)" },
+      signal: sig,
+    });
+    if (!sres.ok) throw new Error(`pubmed esearch ${sres.status} ${sres.statusText}`);
+    const sjson = (await sres.json()) as { esearchresult?: { idlist?: string[] } };
+    const ids = sjson.esearchresult?.idlist ?? [];
+    if (ids.length === 0) return [];
+
+    const esummary = new URL(`${BASE}/esummary.fcgi`);
+    esummary.searchParams.set("db", "pubmed");
+    esummary.searchParams.set("id", ids.join(","));
+    esummary.searchParams.set("retmode", "json");
+    const ures = await fetch(esummary, {
+      headers: { accept: "application/json", "user-agent": "deepdive (+https://github.com/askalf/deepdive)" },
+      signal: sig,
+    });
+    if (!ures.ok) throw new Error(`pubmed esummary ${ures.status} ${ures.statusText}`);
+    const ujson = (await ures.json()) as { result?: Record<string, unknown> };
+    return mapPubMedSummary(ujson.result ?? {}, ids, limit);
+  }
+}
+
+// Exported for unit tests. Pure mapper over the esummary `result` map, in the
+// PMID order esearch returned. Builds the abstract URL and a journal/date/
+// author snippet.
+export function mapPubMedSummary(
+  result: Record<string, unknown>,
+  ids: string[],
+  limit: number,
+): SearchResult[] {
+  const out: SearchResult[] = [];
+  for (const id of ids) {
+    if (out.length >= limit) break;
+    const doc = result[id] as ESummaryDoc | undefined;
+    if (!doc || typeof doc.title !== "string" || doc.title.length === 0) continue;
+    out.push({
+      url: `https://pubmed.ncbi.nlm.nih.gov/${id}/`,
+      title: doc.title.replace(/\.$/, ""),
+      snippet: pubmedSnippet(doc),
+      rank: out.length + 1,
+    });
+  }
+  return out;
+}
+
+function pubmedSnippet(doc: ESummaryDoc): string {
+  const parts: string[] = [];
+  const authors = (doc.authors ?? []).map((a) => a.name).filter(Boolean);
+  if (authors.length > 0) {
+    parts.push(authors.length > 3 ? `${authors.slice(0, 3).join(", ")}, et al.` : authors.join(", "));
+  }
+  if (doc.source) parts.push(doc.source);
+  if (doc.pubdate) parts.push(doc.pubdate);
+  return parts.join(" · ");
+}
diff --git a/src/search/stackexchange.ts b/src/search/stackexchange.ts
@@ -0,0 +1,75 @@
+// Stack Exchange search adapter. No API key (keyless requests are rate-limited
+// but fine for interactive research). Searches one site (default
+// stackoverflow; override with DEEPDIVE_STACKEXCHANGE_SITE). A fit for
+// concrete "how do I X" / error-message questions. The kept source is the
+// question page; deepdive's fetch step pulls the question + answers.
+
+import { searchTimeoutSignal, type SearchAdapter, type SearchResult } from "../search.js";
+
+interface SEItem {
+  title?: string;
+  link?: string;
+  score?: number;
+  answer_count?: number;
+  is_answered?: boolean;
+}
+
+export class StackExchangeSearch implements SearchAdapter {
+  readonly name = "stackexchange";
+  constructor(private readonly site: string = "stackoverflow") {}
+
+  async search(query: string, limit: number, signal?: AbortSignal): Promise<SearchResult[]> {
+    const url = new URL("https://api.stackexchange.com/2.3/search/advanced");
+    url.searchParams.set("order", "desc");
+    url.searchParams.set("sort", "relevance");
+    url.searchParams.set("q", query);
+    url.searchParams.set("site", this.site);
+    url.searchParams.set("pagesize", String(Math.min(Math.max(limit, 1), 50)));
+    const res = await fetch(url, {
+      headers: { accept: "application/json", "user-agent": "deepdive (+https://github.com/askalf/deepdive)" },
+      signal: searchTimeoutSignal(signal),
+    });
+    if (!res.ok) throw new Error(`stackexchange ${res.status} ${res.statusText}`);
+    const json = (await res.json()) as { items?: SEItem[]; error_message?: string };
+    if (json.error_message) throw new Error(`stackexchange: ${json.error_message}`);
+    return mapStackExchangeItems(json.items ?? [], limit);
+  }
+}
+
+// Exported for unit tests. Pure mapper. Snippet carries the score + answered
+// state so the planner/critic can weigh a well-vetted answer.
+export function mapStackExchangeItems(items: SEItem[], limit: number): SearchResult[] {
+  return items
+    .filter((it) => typeof it.link === "string" && it.link.length > 0)
+    .slice(0, limit)
+    .map((it, i) => ({
+      url: it.link as string,
+      title: decodeEntities(it.title ?? ""),
+      snippet: seSnippet(it),
+      rank: i + 1,
+    }));
+}
+
+function seSnippet(it: SEItem): string {
+  const parts: string[] = [];
+  if (typeof it.score === "number") parts.push(`score ${it.score}`);
+  if (typeof it.answer_count === "number") {
+    parts.push(`${it.answer_count} answer${it.answer_count === 1 ? "" : "s"}`);
+  }
+  if (it.is_answered) parts.push("accepted");
+  return parts.join(" · ");
+}
+
+// SE titles arrive HTML-entity-encoded (e.g. &quot;, &#39;). Decode the common
+// ones so the title reads cleanly. Pure, single-pass.
+function decodeEntities(s: string): string {
+  const named: Record<string, string> = { amp: "&", lt: "<", gt: ">", quot: '"', "#39": "'", apos: "'" };
+  return s.replace(/&(#\d+|[a-zA-Z]+);/g, (m, name) => {
+    if (name in named) return named[name];
+    if (name[0] === "#") {
+      const code = Number(name.slice(1));
+      return Number.isFinite(code) && code >= 0 && code <= 0x10ffff ? String.fromCodePoint(code) : m;
+    }
+    return m;
+  });
+}
diff --git a/test/hackernews-adapter.test.mjs b/test/hackernews-adapter.test.mjs
@@ -0,0 +1,70 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+import { mapHNHits, HackerNewsSearch } from "../dist/search/hackernews.js";
+import { resolveSearchAdapter } from "../dist/search.js";
+
+test("mapHNHits: maps url/title/snippet with points + comments", () => {
+  const out = mapHNHits(
+    [{ objectID: "1", title: "Cool thing", url: "https://x.com/y", points: 320, num_comments: 88, author: "pg" }],
+    10,
+  );
+  assert.equal(out[0].url, "https://x.com/y");
+  assert.equal(out[0].title, "Cool thing");
+  assert.match(out[0].snippet, /320 points/);
+  assert.match(out[0].snippet, /88 comments/);
+  assert.match(out[0].snippet, /by pg/);
+});
+
+test("mapHNHits: Ask/Show HN (null url) falls back to the HN thread", () => {
+  const out = mapHNHits([{ objectID: "42", title: "Ask HN: X?", url: null, points: 10 }], 10);
+  assert.equal(out[0].url, "https://news.ycombinator.com/item?id=42");
+});
+
+test("mapHNHits: drops hits with no title or no link/objectID", () => {
+  const out = mapHNHits([{ objectID: "1" }, { title: "ok", url: "https://a.com" }], 10);
+  assert.equal(out.length, 1);
+  assert.equal(out[0].title, "ok");
+});
+
+test("mapHNHits: respects limit + 1-based rank", () => {
+  const hits = Array.from({ length: 5 }, (_, i) => ({ objectID: String(i), title: "t" + i, url: `https://a/${i}` }));
+  const out = mapHNHits(hits, 2);
+  assert.equal(out.length, 2);
+  assert.deepEqual(out.map((r) => r.rank), [1, 2]);
+});
+
+test("resolveSearchAdapter: hackernews + hn alias resolve keyless", async () => {
+  assert.equal((await resolveSearchAdapter("hackernews", {})).name, "hackernews");
+  assert.equal((await resolveSearchAdapter("hn", {})).name, "hackernews");
+});
+
+test("HackerNewsSearch.search: hits the Algolia API with tags=story", async () => {
+  const calls = [];
+  const orig = globalThis.fetch;
+  globalThis.fetch = async (url) => {
+    calls.push(String(url));
+    return new Response(JSON.stringify({ hits: [{ objectID: "1", title: "T", url: "https://a.com", points: 5 }] }), {
+      status: 200,
+      headers: { "content-type": "application/json" },
+    });
+  };
+  try {
+    const out = await new HackerNewsSearch().search("rust async", 5);
+    assert.match(calls[0], /hn\.algolia\.com\/api\/v1\/search/);
+    assert.match(calls[0], /tags=story/);
+    assert.match(calls[0], /query=rust\+async/);
+    assert.equal(out[0].url, "https://a.com");
+  } finally {
+    globalThis.fetch = orig;
+  }
+});
+
+test("HackerNewsSearch.search: throws on non-2xx", async () => {
+  const orig = globalThis.fetch;
+  globalThis.fetch = async () => new Response("x", { status: 503, statusText: "Unavailable" });
+  try {
+    await assert.rejects(() => new HackerNewsSearch().search("q", 5), /hackernews 503/);
+  } finally {
+    globalThis.fetch = orig;
+  }
+});