Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
8b06a33
feat(chatbot-demo): switch inference from OpenRouter to Cerebras API
devin-ai-integration[bot] Mar 9, 2026
4be8cb3
feat(chatbot-demo): add Cerebras logo to UI header and assets
devin-ai-integration[bot] Mar 9, 2026
1912c15
feat(chatbot-demo): update UI with proper logos, model info, and Exa …
devin-ai-integration[bot] Mar 9, 2026
4257566
fix: add reasoning_format hidden to suppress reasoning token leakage
devin-ai-integration[bot] Mar 9, 2026
4f829f7
fix: add server-side post-processing to strip gpt-oss-120b reasoning …
devin-ai-integration[bot] Mar 10, 2026
0c34f77
fix: improve reasoning artifact cleaning to strip plain text reasonin…
devin-ai-integration[bot] Mar 10, 2026
fc532e0
fix: improve reasoning artifact cleaning with better markdown detection
devin-ai-integration[bot] Mar 10, 2026
2de2281
fix: add maxDuration config and SSE heartbeats to prevent Vercel timeout
devin-ai-integration[bot] Mar 10, 2026
dde8997
refactor: swap gpt-oss-120b to llama3.3-70b, remove all reasoning art…
devin-ai-integration[bot] Mar 10, 2026
d477d1b
fix(chatbot-demo): correct model name to llama3.1-8b (only available …
devin-ai-integration[bot] Mar 10, 2026
7bdef63
fix: use non-streaming initial call for llama3.1-8b tool detection
devin-ai-integration[bot] Mar 10, 2026
a9a02fc
fix: handle stringified nested JSON in llama3.1-8b tool call arguments
devin-ai-integration[bot] Mar 10, 2026
19ba2a3
fix: parse tool calls from content text when llama3.1-8b skips tool_c…
devin-ai-integration[bot] Mar 10, 2026
e719555
refactor: reset to original code, minimal API swap to Cerebras llama3…
devin-ai-integration[bot] Mar 10, 2026
510ac15
fix: use non-streaming initial call to prevent tool call JSON leaking…
devin-ai-integration[bot] Mar 10, 2026
1a5f325
fix: detect tool calls using 'parameters' key in addition to 'arguments'
devin-ai-integration[bot] Mar 10, 2026
2351ad0
fix: add SSE heartbeats to keep connection alive during non-streaming…
devin-ai-integration[bot] Mar 10, 2026
ce0d4a1
refactor: back to streaming with buffered content detection
devin-ai-integration[bot] Mar 10, 2026
dba0675
fix: add res.flushHeaders() to prevent browser SSE hang
devin-ai-integration[bot] Mar 10, 2026
076f0f9
fix: add SSE heartbeat comments to prevent follow-up query connection…
devin-ai-integration[bot] Mar 10, 2026
38e93c4
fix: robust tool call parsing with regex fallback for malformed JSON
devin-ai-integration[bot] Mar 10, 2026
498ce30
fix: filter tool call JSON from final response content stream
devin-ai-integration[bot] Mar 10, 2026
def8949
fix: retry once when model returns empty response (llama3.1-8b interm…
devin-ai-integration[bot] Mar 10, 2026
f24134c
Merge branch 'devin/1773098223-cerebras-inference' of https://git-man…
devin-ai-integration[bot] Mar 10, 2026
fc17480
fix: truncate assistant history to 500 chars to prevent empty follow-…
devin-ai-integration[bot] Mar 10, 2026
4f6be22
fix: normalize string array searches from llama3.1-8b tool calls
devin-ai-integration[bot] Mar 10, 2026
9b5c4a0
fix: harden tool call detection with prefix-tolerant parsing, single-…
devin-ai-integration[bot] Mar 10, 2026
a1dcebc
chore: change Market Intelligence card to AI & Robotics Fundraises
devin-ai-integration[bot] Mar 10, 2026
e8ea6fd
chore: remove OpenAI logo and value proposition bullets from header
devin-ai-integration[bot] Mar 10, 2026
107fa5a
feat: split-screen dual streaming with Exa mode toggle and latency tr…
devin-ai-integration[bot] Mar 10, 2026
2be76a7
feat: dropdown mode toggle, instant source display, server-side Exa l…
devin-ai-integration[bot] Mar 10, 2026
aa27f62
feat: flash sources banner briefly then vanish, restore bottom source…
devin-ai-integration[bot] Mar 10, 2026
da01b49
fix: match latency bar heights with fixed h-10 on both panes
devin-ai-integration[bot] Mar 10, 2026
b6a3028
chore: change suggestion card to Super Bowl question
devin-ai-integration[bot] Mar 10, 2026
0cca2e9
fix: strip leaked followups/tool-call JSON from both panes
devin-ai-integration[bot] Mar 10, 2026
eda2e14
fix: client-side cleanup of followups/tool-call JSON, validate chart …
devin-ai-integration[bot] Mar 10, 2026
e5ed3f5
fix: guard against undefined/empty code blocks in CodeBlock renderer
devin-ai-integration[bot] Mar 10, 2026
cd03cc0
fix: strip empty code fences and trailing unclosed fences from content
devin-ai-integration[bot] Mar 10, 2026
4f6120d
feat: default to fast mode, add refresh search button next to mode dr…
devin-ai-integration[bot] Mar 11, 2026
e2792bd
fix: refresh button resets to starter screen instead of re-running query
devin-ai-integration[bot] Mar 11, 2026
0e93d4b
chore: change Super Bowl question to 'Who won the Super Bowl?'
devin-ai-integration[bot] Mar 11, 2026
8eeb5fb
feat: switch to gpt-oss-120b, add 429 retry with exponential backoff
devin-ai-integration[bot] Mar 11, 2026
6225e45
chore: default Exa mode to auto
devin-ai-integration[bot] Mar 11, 2026
9a2e1d7
perf: eliminate first Cerebras call on Exa path, search directly with…
devin-ai-integration[bot] Mar 11, 2026
29c03fc
feat: use OpenRouter (Gemini Flash) for query generation, Cerebras fo…
devin-ai-integration[bot] Mar 11, 2026
1cdc2d0
chore: trigger redeploy with OPEN_ROUTER_KEY env var for preview
devin-ai-integration[bot] Mar 11, 2026
c83fcab
fix: improve Cerebras summarization - increase highlight text, use fo…
devin-ai-integration[bot] Mar 11, 2026
7344234
fix: cleaner summarize prompt - no raw URLs/source blocks, no preamble
devin-ai-integration[bot] Mar 11, 2026
e6f1e29
fix: remove startPublishedDate filter from Exa calls - query year is …
devin-ai-integration[bot] Mar 11, 2026
6ae255d
feat: default to Exa Instant, add mode toggle to home screen, reset m…
devin-ai-integration[bot] Mar 11, 2026
4c8809b
feat: revert to Cerebras for query generation, add detailed latency b…
devin-ai-integration[bot] Mar 11, 2026
0b0ab3a
style: add Cerebras logo next to Tool Call and Synthesis in latency bar
devin-ai-integration[bot] Mar 11, 2026
60397a9
fix: reduce people category usage in search prompts, require paired n…
devin-ai-integration[bot] Mar 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 128 additions & 18 deletions api/chat.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,70 @@ import OpenAI from "openai";
import Exa from "exa-js";

const client = new OpenAI({
baseURL: "https://openrouter.ai/api/v1",
apiKey: process.env.OPEN_ROUTER_KEY,
baseURL: "https://api.cerebras.ai/v1",
apiKey: process.env.CEREBRAS_API_KEY || "csk-ctnvpnrpxw5t244c83c84pdecwk9tpfdp3jkvece9kve248x",
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Hardcoded API key exposed in source code (api/chat.js)

Same hardcoded Cerebras API key csk-ctnvpnrpxw5t244c83c84pdecwk9tpfdp3jkvece9kve248x is exposed in api/chat.js:6. This is the Vercel serverless function for the non-streaming chat endpoint. The key should be loaded exclusively from environment variables.

Suggested change
apiKey: process.env.CEREBRAS_API_KEY || "csk-ctnvpnrpxw5t244c83c84pdecwk9tpfdp3jkvece9kve248x",
apiKey: process.env.CEREBRAS_API_KEY,
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

});

const exa = new Exa(process.env.EXA_API_KEY);

const DEFAULT_MODEL = "google/gemini-2.5-flash";
const DEFAULT_MODEL = "llama3.1-8b";

/**
* Attempt to parse a tool call from content text that the model output
* instead of using the structured tool_calls field.
* Handles multiple malformed JSON formats from llama3.1-8b.
* Returns { name, arguments } or null.
*/
function tryExtractToolCallFromContent(content) {
const trimmed = content.trim();

// Find the first '{' - model may prefix with "assistant", role text, etc.
const jsonStart = trimmed.indexOf("{");
if (jsonStart === -1) return null;
const jsonCandidate = trimmed.slice(jsonStart);

// Try 1: Direct JSON.parse
try {
const parsed = JSON.parse(jsonCandidate);
let name = parsed.name;
let args = parsed.arguments || parsed.parameters;
if (!name && parsed.function) {
name = parsed.function.name;
args = parsed.function.arguments || parsed.function.parameters;
}
if (name && args) {
return {
name,
arguments: typeof args === 'string' ? args : JSON.stringify(args),
};
}
} catch (_) {}

// Try 2: Regex extraction for malformed JSON (unescaped inner quotes, single quotes, etc.)
const nameMatch = jsonCandidate.match(/["']?name["']?\s*:\s*["']([^"']+)["']/);
if (!nameMatch) return null;
const name = nameMatch[1];
if (name !== "web_search") return null;

const queries = [];
// Match query values with both double and single quotes
const queryRegex = /["']?query["']?\s*:\s*["']((?:[^"'\\]|\\.)*)["']/g;
let match;
while ((match = queryRegex.exec(jsonCandidate)) !== null) {
const q = match[1].replace(/\\["']/g, m => m[1]).replace(/\\\\/g, '\\');
if (q.trim()) queries.push(q.trim());
}

if (queries.length > 0) {
const searches = queries.map(q => ({ query: q, numResults: 5 }));
return {
name,
arguments: JSON.stringify({ searches }),
};
}

return null;
}

const freshnessDefaults = {
tweet: 48,
Expand Down Expand Up @@ -206,9 +263,12 @@ export default async function handler(req, res) {
try {
const { message, history = [], exaEnabled = true, model = DEFAULT_MODEL } = req.body;

const recentHistory = history.slice(-20).map(msg => ({
// Truncate assistant messages to avoid overwhelming the 8B model with long context
const recentHistory = history.slice(-10).map(msg => ({
role: msg.role,
content: msg.content,
content: msg.role === 'assistant' && msg.content && msg.content.length > 500
? msg.content.slice(0, 500) + '...'
: msg.content,
}));

const messages = [
Expand All @@ -217,25 +277,62 @@ export default async function handler(req, res) {
{ role: "user", content: message },
];

const response = await client.chat.completions.create({
model,
messages,
tools: exaEnabled ? [getSearchTool()] : undefined,
});
// Helper: call model and detect tool calls
async function callAndParse() {
const resp = await client.chat.completions.create({
model,
messages,
tools: exaEnabled ? [getSearchTool()] : undefined,
});

const ch = resp.choices[0];
let tcList = ch.message.tool_calls;
let msg = ch.message;

if (!tcList && ch.message.content) {
const extracted = tryExtractToolCallFromContent(ch.message.content);
if (extracted) {
tcList = [{ id: "manual_tool_call_0", type: "function", function: extracted }];
msg = { role: "assistant", content: null, tool_calls: tcList };
}
}

const choice = response.choices[0];
return { toolCallsList: tcList, assistantMessage: msg, content: ch.message.content };
}

if (!choice.message.tool_calls) {
return res.json({ content: choice.message.content, searches: null, exaUsed: false });
// Retry once if model returns empty (llama3.1-8b intermittently returns nothing)
let { toolCallsList, assistantMessage, content } = await callAndParse();
if (!toolCallsList && (!content || !content.trim())) {
console.log("[Chat] Empty response from model, retrying once...");
({ toolCallsList, assistantMessage, content } = await callAndParse());
}

if (!toolCallsList) {
return res.json({ content: content, searches: null, exaUsed: false });
}

const allSearches = [];
const toolCallIds = [];
for (const toolCall of choice.message.tool_calls) {
for (const toolCall of toolCallsList) {
try {
const args = JSON.parse(toolCall.function.arguments);
let searches = args.searches;

if (typeof searches === 'string') {
try { searches = JSON.parse(searches); } catch (_) {
// Fallback: Python-style dict with single quotes
try { searches = JSON.parse(searches.replace(/'/g, '"')); } catch (_2) {
// Last resort: regex extract queries from the string
const qr = /["']query["']\s*:\s*["']([^"']+)["']/g;
let qm; const qMatches = [];
while ((qm = qr.exec(searches)) !== null) {
qMatches.push({ query: qm[1].trim(), numResults: 5 });
}
if (qMatches.length > 0) searches = qMatches;
}
}
}

if (searches && !Array.isArray(searches)) {
searches = [searches];
}
Expand All @@ -244,8 +341,12 @@ export default async function handler(req, res) {
}

if (Array.isArray(searches)) {
const validSearches = searches.filter(s => s && typeof s.query === 'string' && s.query.trim());
allSearches.push(...validSearches);
const normalized = searches.map(s => {
if (typeof s === 'string' && s.trim()) return { query: s.trim() };
if (s && typeof s.query === 'string' && s.query.trim()) return s;
return null;
}).filter(Boolean);
allSearches.push(...normalized);
}
toolCallIds.push(toolCall.id);
} catch (e) {
Expand Down Expand Up @@ -288,13 +389,22 @@ export default async function handler(req, res) {
model,
messages: [
...messages,
choice.message,
assistantMessage,
...toolMessages,
],
});

let finalContent = finalResponse.choices[0].message.content || "";
const trimmedFinal = finalContent.trimStart();
if (trimmedFinal.startsWith("{") && trimmedFinal.includes("}")) {
const afterJson = trimmedFinal.slice(trimmedFinal.lastIndexOf("}") + 1);
finalContent = afterJson.replace(/^\s*assistant\s*/i, "").trim();
} else {
finalContent = trimmedFinal.replace(/^\s*assistant\s*/i, "").trimStart();
}

res.json({
content: finalResponse.choices[0].message.content,
content: finalContent,
searches: searchResults.map(({ query, category, results, timeMs }) => ({
query,
category,
Expand Down
Loading