Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
387 changes: 236 additions & 151 deletions api/chat.js

Large diffs are not rendered by default.

686 changes: 381 additions & 305 deletions api/chat/stream.js

Large diffs are not rendered by default.

138 changes: 52 additions & 86 deletions exa.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,100 +2,66 @@ import Exa from "exa-js";

const exa = new Exa(process.env.EXA_API_KEY);

// BlueJ's domains split across 3 batches for parallel discovery.
// In production they cover ~2000+ domains. includeDomains supports
// up to 1200 per call, so 3 calls covers everything.
const DOMAIN_BATCHES = [
[
"irs.gov", "treasury.gov", "congress.gov", "supremecourt.gov",
"uscourts.gov", "govinfo.gov", "federalregister.gov",
"taxnotes.com", "tax.gov", "ttb.gov",
],
[
"ftb.ca.gov", "cdtfa.ca.gov", "tax.ny.gov", "revenue.pa.gov",
"tax.illinois.gov", "comptroller.texas.gov", "dor.wa.gov",
"law.cornell.edu", "justia.com", "findlaw.com",
],
[
"taxfoundation.org", "aicpa-cima.com", "americanbar.org",
"bdo.com", "pwc.com", "deloitte.com", "ey.com", "kpmg.com",
"bloomberglaw.com", "thomsonreuters.com",
],
];

const MAX_AGE_HOURS = 336; // 2 weeks
const DISCOVERY_LIVECRAWL_TIMEOUT = 1500; // 1.5s opportunistic
const REFRESH_LIVECRAWL_TIMEOUT = 10000; // 10s targeted

/**
* Step 1: Discovery — 3x parallel /search calls across domain batches.
* maxAgeHours prefers cached content within 2 weeks. Short livecrawlTimeout
* catches easy livecrawls without blocking. Returns crawlDate per result.
* Search the web via Exa
*/
export async function discoverySearch(query, numResults = 10) {
const startTime = Date.now();
export async function searchExa(query, category, numResults = 5, searchType = "auto") {
const searchParams = {
numResults: Math.min(50, Math.max(3, numResults)),
highlights: {
maxCharacters: 4000,
},
type: searchType,
};

const batchPromises = DOMAIN_BATCHES.map(async (domains) => {
try {
const response = await exa.searchAndContents(query, {
numResults,
includeDomains: domains,
maxAgeHours: MAX_AGE_HOURS,
livecrawlTimeout: DISCOVERY_LIVECRAWL_TIMEOUT,
text: true,
highlights: { maxCharacters: 4000 },
});
return response.results || [];
} catch (err) {
console.error(`Discovery batch failed: ${err.message}`);
return [];
}
});
if (category) {
searchParams.category = category;
}

const batchResults = await Promise.all(batchPromises);
const allResults = batchResults.flat();
const timeMs = Date.now() - startTime;
const response = await exa.searchAndContents(query, searchParams);

return {
results: allResults.map((r) => ({
title: r.title,
url: r.url,
text: r.text?.slice(0, 4000) || (r.highlights || []).join("\n").slice(0, 4000),
publishedDate: r.publishedDate,
author: r.author,
crawlDate: r.crawlDate,
})),
timeMs,
};
}
if (!response.results || response.results.length === 0) {
return [];
}

/**
* Step 3: Targeted re-fetch — /contents for URLs the agent identified
* as both relevant AND stale. Full livecrawl timeout.
*/
export async function fetchFreshContents(urls) {
const startTime = Date.now();
return response.results.map((r) => ({
title: r.title,
url: r.url,
text: (r.highlights || []).join("\n").slice(0, 4000),
publishedDate: r.publishedDate,
author: r.author,
}));
}

try {
const response = await exa.getContents(urls, {
livecrawl: "always",
livecrawlTimeout: REFRESH_LIVECRAWL_TIMEOUT,
text: true,
});
// Rate limiter - max 4 requests per second
let lastRequestTime = 0;
const MIN_REQUEST_INTERVAL = 250;

return {
results: (response.results || []).map((r) => ({
title: r.title,
url: r.url,
text: r.text?.slice(0, 4000),
crawlDate: r.crawlDate,
})),
timeMs: Date.now() - startTime,
};
} catch (err) {
console.error(`Fresh contents fetch failed: ${err.message}`);
return { results: [], timeMs: Date.now() - startTime };
async function rateLimitedSearch(query, category, numResults, searchType) {
const now = Date.now();
const timeSinceLastRequest = now - lastRequestTime;
if (timeSinceLastRequest < MIN_REQUEST_INTERVAL) {
await new Promise(resolve => setTimeout(resolve, MIN_REQUEST_INTERVAL - timeSinceLastRequest));
}
lastRequestTime = Date.now();
return searchExa(query, category, numResults, searchType);
}

export { DOMAIN_BATCHES, MAX_AGE_HOURS, DISCOVERY_LIVECRAWL_TIMEOUT, REFRESH_LIVECRAWL_TIMEOUT };
/**
* Run multiple searches in parallel for faster results
*/
export async function searchMultiple(searches, searchType = "auto") {
const searchPromises = searches.map(async ({ query, category, numResults = 5 }) => {
const startTime = Date.now();
try {
const results = await searchExa(query, category, numResults, searchType);
const timeMs = Date.now() - startTime;
return { query, category, results, timeMs };
} catch (err) {
const timeMs = Date.now() - startTime;
return { query, category, results: [], timeMs, error: err.message };
}
});

return Promise.all(searchPromises);
}
Loading