huggingface · pjdurden · Jun 7, 2026 · Jun 7, 2026
diff --git a/candle-core/src/quantized/gguf_file.rs b/candle-core/src/quantized/gguf_file.rs
@@ -91,24 +91,44 @@ impl TensorInfo {
         tensor_data_offset: u64,
         device: &Device,
     ) -> Result<QTensor> {
-        let tensor_elems = self.shape.elem_count();
-        let block_size = self.ggml_dtype.block_size();
+        // The dimensions come straight from the (untrusted) file and are only
+        // bounded in count, not in value, so compute the element and byte counts
+        // with checked arithmetic. `Shape::elem_count` multiplies with wrapping
+        // semantics, which a crafted GGUF can drive past `usize` to wrap (release)
+        // or panic (debug); reject the overflow with a clean error instead.
+        let block_size = self.ggml_dtype.block_size() as u64;
+        let type_size = self.ggml_dtype.type_size() as u64;
+        let mut tensor_elems: u64 = 1;
+        for &dim in self.shape.dims() {
+            tensor_elems = tensor_elems
+                .checked_mul(dim as u64)
+                .context("gguf: tensor element count overflows u64")?;
+        }
         if !tensor_elems.is_multiple_of(block_size) {
             crate::bail!(
             "the number of elements {tensor_elems} is not divisible by the block size {block_size}"
         )
         }
-        let size_in_bytes = tensor_elems / block_size * self.ggml_dtype.type_size();
-        let tensor_start = tensor_data_offset.saturating_add(self.offset);
+        let size_in_bytes = (tensor_elems / block_size)
+            .checked_mul(type_size)
+            .context("gguf: tensor byte size overflows u64")?;
+        // Gate the declared size against what is physically left in the file
+        // before allocating, mirroring the length checks in `read_string` and
+        // `Value::read` so a crafted size can't drive a huge allocation.
+        let data_start = tensor_data_offset
+            .checked_add(self.offset)
+            .context("gguf: tensor data offset overflows u64")?;
         let file_size = reader.seek(std::io::SeekFrom::End(0))?;
-        let remaining = file_size.saturating_sub(tensor_start);
-        if size_in_bytes as u64 > remaining {
+        let remaining = file_size.saturating_sub(data_start);
+        if size_in_bytes > remaining {
             crate::bail!(
-                "tensor needs {size_in_bytes} bytes at offset {tensor_start}, only {remaining} remaining in file"
+                "gguf: tensor data size {size_in_bytes} exceeds remaining file bytes {remaining}"
             )
         }
+        let size_in_bytes = usize::try_from(size_in_bytes)
+            .context("gguf: tensor byte size does not fit in usize")?;
         let mut raw_data = vec![0u8; size_in_bytes];
-        reader.seek(std::io::SeekFrom::Start(tensor_start))?;
+        reader.seek(std::io::SeekFrom::Start(data_start))?;
         reader.read_exact(&mut raw_data)?;
         super::ggml_file::qtensor_from_ggml(
             self.ggml_dtype,

diff --git a/candle-core/tests/gguf_tests.rs b/candle-core/tests/gguf_tests.rs
@@ -134,3 +134,44 @@ fn rejects_string_length_above_remaining_file_bytes() {
     pad(&mut buf, 64);
     assert_rejects(buf, "string length");
 }
+
+/// Build a valid single-tensor GGUF V3 whose tensor info declares the given
+/// dimensions and dtype, with no actual tensor data appended. The header parses
+/// cleanly; the dimensions only bite when the tensor data is read.
+fn single_tensor_file(dims: &[u64], dtype: u32) -> Vec<u8> {
+    let mut buf = header(1, 0);
+    buf.extend(length_prefixed(b"t")); // tensor name
+    buf.extend_from_slice(&(dims.len() as u32).to_le_bytes()); // n_dimensions
+    for &d in dims {
+        buf.extend_from_slice(&d.to_le_bytes()); // V3 dims are u64
+    }
+    buf.extend_from_slice(&dtype.to_le_bytes()); // ggml_dtype
+    buf.extend_from_slice(&0u64.to_le_bytes()); // offset
+    buf
+}
+
+fn assert_tensor_read_rejects(dims: &[u64], dtype: u32, msg_contains: &str) {
+    let mut cursor = Cursor::new(single_tensor_file(dims, dtype));
+    let content = Content::read(&mut cursor).expect("header should parse");
+    let err = content
+        .tensor(&mut cursor, "t", &Device::Cpu)
+        .expect_err("expected Err");
+    let msg = format!("{err}");
+    assert!(msg.contains(msg_contains), "unexpected error: {msg}");
+}
+
+#[test]
+fn rejects_tensor_with_elem_count_overflow() {
+    // Two dims within GGUF_MAX_TENSOR_DIMS whose product overflows usize/u64.
+    // Computing the element/byte count must report an error instead of wrapping
+    // (release) or panicking (debug). F32 = dtype 0, block_size 1, type_size 4.
+    assert_tensor_read_rejects(&[1u64 << 33, 1u64 << 33], 0, "overflow");
+}
+
+#[test]
+fn rejects_tensor_size_above_remaining_file_bytes() {
+    // A single modest dim: no overflow, but the declared tensor (256 KB) is far
+    // larger than what is physically left in the file. The size must be rejected
+    // before allocating, mirroring the string/array length checks.
+    assert_tensor_read_rejects(&[1u64 << 16], 0, "remaining file bytes");
+}