Restore error handling for CUDA weight_sharing_across_methods set_option

mergennachin · claude · mergennachin · commit b5285d522692 · 2026-05-19T15:37:56.000-04:00
The error checks were dropped during refactoring. Without them, a failed
set_option silently disables weight sharing, causing prefill and decode
to allocate separate KV-cache buffers (OOM at runtime with no diagnostic).

Also resolve &lt;turn|&gt; EOS token ID from the tokenizer at startup instead
of hardcoding token 106.

Co-authored-by: Claude &lt;noreply@anthropic.com&gt;
diff --git a/examples/models/gemma4_31b/main.cpp b/examples/models/gemma4_31b/main.cpp
@@ -190,8 +190,24 @@ int main(int argc, char** argv) {
   }
   {
     executorch::runtime::BackendOptions<1> backend_options;
-    backend_options.set_option("weight_sharing_across_methods", true);
-    executorch::runtime::set_option("CudaBackend", backend_options.view());
+    auto set_err =
+        backend_options.set_option("weight_sharing_across_methods", true);
+    if (set_err != Error::Ok) {
+      ET_LOG(
+          Error,
+          "Failed to set weight_sharing_across_methods: %d",
+          static_cast<int>(set_err));
+      return 1;
+    }
+    auto opt_err =
+        executorch::runtime::set_option("CudaBackend", backend_options.view());
+    if (opt_err != Error::Ok) {
+      ET_LOG(
+          Error,
+          "Failed to enable weight_sharing_across_methods: %d",
+          static_cast<int>(opt_err));
+      return 1;
+    }
   }
   printf("Loading methods...\n");
   if (module->load_method("prefill") != Error::Ok) {