@@ -42,11 +42,13 @@ export interface ConnectionTestResult {
4242 /**
4343 * `models` when the standard GET /models probe succeeded.
4444 * `chat_completion_degraded` when /models 404'd but POST /chat/completions
45- * proved the endpoint is alive (e.g. Zhipu GLM's gateway — no public /models
46- * but /chat/completions works fine). The renderer surfaces this so users
47- * know /models is unavailable even though generation will work.
45+ * proved the openai-chat wire is alive (e.g. Zhipu GLM — no public /models).
46+ * `responses_degraded` when /models 404'd but POST /responses proved the
47+ * openai-responses wire is alive. We probe the wire's real inference
48+ * endpoint so a gateway that only implements /chat/completions can't
49+ * false-positive for a user whose provider is on the Responses API.
4850 */
49- probeMethod ?: 'models' | 'chat_completion_degraded' ;
51+ probeMethod ?: 'models' | 'chat_completion_degraded' | 'responses_degraded' ;
5052}
5153
5254export interface ConnectionTestError {
@@ -494,15 +496,19 @@ export async function runProviderTest(
494496 // not degrade anthropic — its /v1/models is standard, and skipping it
495497 // would mask real path-shape mistakes.
496498 if ( res . status === 404 && ( creds . wire === 'openai-chat' || creds . wire === 'openai-responses' ) ) {
497- const probe = await probeChatCompletion ( normalizedBaseUrl , headers ) ;
499+ const probe = await probeInferenceEndpoint ( creds . wire , normalizedBaseUrl , headers ) ;
498500 if ( probe . kind === 'pass' ) {
499- return { ok : true , probeMethod : 'chat_completion_degraded' } ;
501+ return {
502+ ok : true ,
503+ probeMethod :
504+ creds . wire === 'openai-responses' ? 'responses_degraded' : 'chat_completion_degraded' ,
505+ } ;
500506 }
501507 if ( probe . kind === 'http' && probe . status !== 404 ) {
502508 const { code, hint } = classifyHttpError ( probe . status ) ;
503509 return { ok : false , code, message : `HTTP ${ probe . status } ` , hint } ;
504510 }
505- // /chat/completions also 404'd (or the network dropped) — fall through
511+ // Inference endpoint also 404'd (or the network dropped) — fall through
506512 // and report the original /models 404.
507513 }
508514 const { code, hint } = classifyHttpError ( res . status ) ;
@@ -517,28 +523,45 @@ type ProbeResult =
517523 | { kind : 'network' ; message : string } ;
518524
519525/**
520- * POST a minimal chat-completion request to verify the endpoint is alive
521- * when GET /models returned 404. A 2xx response or any API-originated 4xx
522- * (400 model_unknown, 402 insufficient credits, 422, 429 — and 401/403 too,
523- * which we surface as an auth error instead of the misleading 404 hint)
524- * counts as "endpoint reachable". Only 404 and 5xx count as a real failure.
526+ * POST a minimal inference request to verify the endpoint is alive when GET
527+ * /models returned 404. We dispatch by wire so that providers on the
528+ * Responses API (which may not implement /chat/completions at all) can't
529+ * false-positive via a gateway that only speaks the other shape. A 2xx
530+ * response or any API-originated 4xx (400 model_unknown, 402 insufficient
531+ * credits, 422, 429 — and 401/403 too, which we surface as auth) counts as
532+ * "endpoint reachable". Only 404 and 5xx count as a real failure. The
533+ * request body is intentionally minimal; if the gateway rejects the payload
534+ * shape with a 4xx we still know the route exists.
525535 */
526- async function probeChatCompletion (
536+ async function probeInferenceEndpoint (
537+ wire : 'openai-chat' | 'openai-responses' ,
527538 normalizedBaseUrl : string ,
528539 headers : Record < string , string > ,
529540) : Promise < ProbeResult > {
530- const url = `${ normalizedBaseUrl } /chat/completions` ;
541+ const url =
542+ wire === 'openai-responses'
543+ ? `${ normalizedBaseUrl } /responses`
544+ : `${ normalizedBaseUrl } /chat/completions` ;
545+ const body =
546+ wire === 'openai-responses'
547+ ? JSON . stringify ( {
548+ model : 'probe' ,
549+ input : [ { role : 'user' , content : [ { type : 'input_text' , text : 'ping' } ] } ] ,
550+ max_output_tokens : 1 ,
551+ stream : false ,
552+ } )
553+ : JSON . stringify ( {
554+ model : 'probe' ,
555+ messages : [ { role : 'user' , content : 'ping' } ] ,
556+ max_tokens : 1 ,
557+ stream : false ,
558+ } ) ;
531559 let res : Response ;
532560 try {
533561 res = await fetchWithTimeout ( url , {
534562 method : 'POST' ,
535563 headers : { ...headers , 'content-type' : 'application/json' } ,
536- body : JSON . stringify ( {
537- model : 'probe' ,
538- messages : [ { role : 'user' , content : 'ping' } ] ,
539- max_tokens : 1 ,
540- stream : false ,
541- } ) ,
564+ body,
542565 } ) ;
543566 } catch ( err ) {
544567 return { kind : 'network' , message : err instanceof Error ? err . message : String ( err ) } ;
0 commit comments