Skip to content

Commit c2d80a7

Browse files
virt: add irqfd trait and implement for mshv and KVM
Add irqfd support enabling the kernel to inject MSIs directly into the guest when an eventfd is signaled, without a userspace transition. This is required for VFIO device passthrough where physical device interrupts must be delivered to the guest via the hypervisor's irqfd mechanism. New traits in virt crate: - IrqFd: allocates GSIs and registers eventfds as irqfd routes - IrqFdRoute: updates/clears MSI routing (address/data) per GSI - Partition::irqfd(): returns the irqfd interface if supported mshv implementation (virt_mshv/src/irqfd.rs): - GSI allocation table (2048 slots) - MSHV_IRQFD ioctl for register/unregister eventfds - MSHV_SET_MSI_ROUTING ioctl to push full routing table - Automatic cleanup on route drop (unregister + free GSI) - VmFd changed to Arc<VmFd> for shared ownership KVM implementation (virt_kvm/src/gsi.rs): - KvmIrqFdState wraps existing GsiRoute infrastructure - KvmIrqFdRoute delegates to GsiRoute::enable/disable - Partition::irqfd() wired in x86_64 arch module Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 433934f commit c2d80a7

File tree

10 files changed

+529
-7
lines changed

10 files changed

+529
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8916,6 +8916,7 @@ dependencies = [
89168916
"arrayvec",
89178917
"build_rs_guest_arch",
89188918
"guestmem",
8919+
"headervec",
89198920
"hv1_emulator",
89208921
"hv1_hypercall",
89218922
"hvdef",

vmm_core/virt/src/generic.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use crate::io::CpuIo;
1212
use crate::irqcon::ControlGic;
1313
use crate::irqcon::IoApicRouting;
1414
use crate::irqcon::MsiRequest;
15+
use crate::irqfd::IrqFd;
1516
use crate::x86::DebugState;
1617
use crate::x86::HardwareBreakpoint;
1718
use guestmem::DoorbellRegistration;
@@ -295,6 +296,17 @@ pub trait Partition: 'static + Hv1 + Inspect + Send + Sync {
295296
None
296297
}
297298

299+
/// Returns an irqfd routing interface for this partition.
300+
///
301+
/// irqfd allows the kernel to inject MSIs directly into the guest when an
302+
/// eventfd is signaled, without a userspace transition. This is used for
303+
/// device passthrough with VFIO.
304+
///
305+
/// Not all partitions support this.
306+
fn irqfd(&self) -> Option<Arc<dyn IrqFd>> {
307+
None
308+
}
309+
298310
/// Get the partition capabilities for this partition.
299311
fn caps(&self) -> &PartitionCapabilities;
300312

vmm_core/virt/src/irqfd.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// Licensed under the MIT License.
3+
4+
//! Traits for irqfd-based interrupt delivery.
5+
//!
6+
//! irqfd allows a hypervisor to directly inject an MSI into a guest when an
7+
//! event is signaled, without involving userspace in the interrupt delivery
8+
//! path. This is used for device passthrough (e.g., VFIO) where the physical
9+
//! device signals an event and the hypervisor injects the corresponding MSI
10+
//! into the guest VM.
11+
12+
use pal_event::Event;
13+
14+
/// Trait for partitions that support irqfd-based interrupt delivery.
15+
///
16+
/// An irqfd associates an event with a GSI (Global System Interrupt), and a
17+
/// GSI routing table maps GSIs to MSI addresses and data values. When the
18+
/// event is signaled, the kernel looks up the GSI routing and injects the
19+
/// configured MSI into the guest without a usermode transition.
20+
pub trait IrqFd: Send + Sync {
21+
/// Creates a new irqfd route.
22+
///
23+
/// Allocates a GSI, creates an event, and registers the event with the
24+
/// hypervisor so that signaling it injects the configured MSI into the
25+
/// guest.
26+
///
27+
/// The caller retrieves the event via [`IrqFdRoute::event`] to pass to
28+
/// VFIO or other interrupt sources.
29+
///
30+
/// When the route is dropped, the irqfd is unregistered and the GSI is
31+
/// freed.
32+
fn new_irqfd_route(&self) -> anyhow::Result<Box<dyn IrqFdRoute>>;
33+
}
34+
35+
/// A handle to a registered irqfd route.
36+
///
37+
/// Each route represents a single GSI with an associated event. When the
38+
/// event is signaled (e.g., by VFIO on a device interrupt), the kernel injects
39+
/// the MSI configured via [`set_msi`](IrqFdRoute::set_msi) into the guest.
40+
///
41+
/// Dropping this handle unregisters the irqfd and frees the GSI.
42+
pub trait IrqFdRoute: Send + Sync {
43+
/// Returns the event that triggers interrupt injection when signaled.
44+
///
45+
/// Pass this to VFIO `map_msix` or any other interrupt source. On Linux,
46+
/// this is an eventfd created by the implementation. On WHP (future), this
47+
/// is the event handle returned by `WHvCreateTrigger`.
48+
fn event(&self) -> &Event;
49+
50+
/// Sets the MSI routing for this irqfd's GSI.
51+
///
52+
/// `address` and `data` are the x86 MSI address and data values that the
53+
/// kernel will use when injecting the interrupt into the guest.
54+
fn set_msi(&self, address: u64, data: u32) -> anyhow::Result<()>;
55+
56+
/// Clears the MSI routing for this irqfd's GSI.
57+
///
58+
/// The irqfd remains registered but interrupt delivery is disabled until
59+
/// a new route is configured via [`set_msi`](IrqFdRoute::set_msi).
60+
fn clear_msi(&self) -> anyhow::Result<()>;
61+
62+
/// Masks the route.
63+
///
64+
/// While masked, interrupts arriving on the event are not injected into
65+
/// the guest. The caller should use [`consume_pending`](IrqFdRoute::consume_pending)
66+
/// to check whether an interrupt arrived while masked and store the
67+
/// result in the MSI-X PBA. On unmask, the caller should deliver any
68+
/// pending interrupt from the PBA before re-enabling the route.
69+
fn mask(&self) -> anyhow::Result<()>;
70+
71+
/// Unmasks the route and re-enables interrupt injection.
72+
fn unmask(&self) -> anyhow::Result<()>;
73+
74+
/// Drains the pending interrupt state and returns whether an interrupt
75+
/// was pending.
76+
///
77+
/// This atomically reads and clears the event's counter. The caller
78+
/// should store the result in the MSI-X PBA (Pending Bit Array).
79+
/// Repeated calls after the first drain will return `false` until a
80+
/// new interrupt arrives, so the caller must persist the pending state
81+
/// externally (e.g., in the MSI-X emulator's PBA bits).
82+
fn consume_pending(&self) -> bool {
83+
self.event().try_wait()
84+
}
85+
}

vmm_core/virt/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ mod cpuid;
88
mod generic;
99
pub mod io;
1010
pub mod irqcon;
11+
pub mod irqfd;
1112
pub mod state;
1213
pub mod synic;
1314
pub mod x86;

vmm_core/virt_kvm/src/arch/x86_64/mod.rs

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ impl ProtoPartition for KvmProtoPartition<'_> {
438438

439439
let partition = KvmPartition {
440440
synic_ports: Arc::new(virt::synic::SynicPorts::new(partition.clone())),
441+
irqfd_state: Arc::new(crate::gsi::KvmIrqFdState::new(partition.clone())),
441442
inner: partition,
442443
};
443444

@@ -514,6 +515,10 @@ impl Partition for KvmPartition {
514515
Some(self.inner.clone())
515516
}
516517

518+
fn irqfd(&self) -> Option<Arc<dyn virt::irqfd::IrqFd>> {
519+
Some(self.irqfd_state.clone())
520+
}
521+
517522
fn caps(&self) -> &virt::PartitionCapabilities {
518523
&self.inner.caps
519524
}
@@ -739,14 +744,14 @@ impl KvmProcessor<'_> {
739744
}
740745
}
741746

742-
struct KvmMsi {
743-
address_lo: u32,
744-
address_hi: u32,
745-
data: u32,
747+
pub(crate) struct KvmMsi {
748+
pub(crate) address_lo: u32,
749+
pub(crate) address_hi: u32,
750+
pub(crate) data: u32,
746751
}
747752

748753
impl KvmMsi {
749-
fn new(request: MsiRequest) -> Self {
754+
pub(crate) fn new(request: MsiRequest) -> Self {
750755
let request_address = MsiAddress::from(request.address as u32);
751756
let request_data = MsiData::from(request.data);
752757

vmm_core/virt_kvm/src/gsi.rs

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
//! Implements GSI routing management for KVM VMs.
55
66
use crate::KvmPartitionInner;
7+
use anyhow::Context;
78
use pal_event::Event;
89
use parking_lot::Mutex;
910
use std::os::unix::prelude::*;
1011
use std::sync::Arc;
1112
use std::sync::Weak;
1213
use std::sync::atomic::AtomicBool;
1314
use std::sync::atomic::Ordering;
15+
use virt::irqfd::IrqFd;
16+
use virt::irqfd::IrqFdRoute;
1417

1518
const NUM_GSIS: usize = 2048;
1619

@@ -80,7 +83,6 @@ impl GsiRouting {
8083

8184
impl KvmPartitionInner {
8285
/// Reserves a new route, optionally with an associated irqfd event.
83-
#[expect(dead_code)]
8486
pub(crate) fn new_route(self: &Arc<Self>, irqfd_event: Option<Event>) -> Option<GsiRoute> {
8587
let gsi = self.gsi_routing.lock().alloc()?;
8688
Some(GsiRoute {
@@ -139,7 +141,6 @@ impl GsiRoute {
139141
}
140142

141143
/// Enables the route and associated irqfd.
142-
#[expect(dead_code)]
143144
pub fn enable(&self, entry: kvm::RoutingEntry) {
144145
let partition = self.set_entry(Some(entry));
145146
let _lock = self.enable_mutex.lock();
@@ -203,3 +204,87 @@ impl GsiRoute {
203204
}
204205
}
205206
}
207+
208+
/// irqfd routing interface for a KVM partition.
209+
///
210+
/// Wraps the existing [`GsiRoute`] infrastructure to implement the
211+
/// [`IrqFd`]/[`IrqFdRoute`] traits.
212+
pub struct KvmIrqFdState {
213+
partition: Arc<KvmPartitionInner>,
214+
}
215+
216+
impl KvmIrqFdState {
217+
pub fn new(partition: Arc<KvmPartitionInner>) -> Self {
218+
Self { partition }
219+
}
220+
}
221+
222+
impl IrqFd for KvmIrqFdState {
223+
fn new_irqfd_route(&self) -> anyhow::Result<Box<dyn IrqFdRoute>> {
224+
let event = Event::new();
225+
let route = self
226+
.partition
227+
.new_route(Some(event.clone()))
228+
.context("no free GSIs available for irqfd")?;
229+
Ok(Box::new(KvmIrqFdRoute {
230+
route,
231+
event,
232+
last_entry: Mutex::new(None),
233+
}))
234+
}
235+
}
236+
237+
/// A registered irqfd route backed by a KVM [`GsiRoute`].
238+
///
239+
/// Cleanup (disable irqfd, clear route, free GSI) is handled by
240+
/// [`GsiRoute::drop`].
241+
struct KvmIrqFdRoute {
242+
route: GsiRoute,
243+
event: Event,
244+
/// The last routing entry configured via `set_msi`, used to restore
245+
/// routing on `unmask`.
246+
last_entry: Mutex<Option<kvm::RoutingEntry>>,
247+
}
248+
249+
impl IrqFdRoute for KvmIrqFdRoute {
250+
fn event(&self) -> &Event {
251+
&self.event
252+
}
253+
254+
fn set_msi(&self, address: u64, data: u32) -> anyhow::Result<()> {
255+
let crate::arch::KvmMsi {
256+
address_lo,
257+
address_hi,
258+
data,
259+
} = crate::arch::KvmMsi::new(virt::irqcon::MsiRequest { address, data });
260+
let entry = kvm::RoutingEntry::Msi {
261+
address_lo,
262+
address_hi,
263+
data,
264+
};
265+
*self.last_entry.lock() = Some(entry);
266+
self.route.enable(entry);
267+
Ok(())
268+
}
269+
270+
fn clear_msi(&self) -> anyhow::Result<()> {
271+
*self.last_entry.lock() = None;
272+
self.route.disable();
273+
self.route.set_entry(None);
274+
Ok(())
275+
}
276+
277+
fn mask(&self) -> anyhow::Result<()> {
278+
// Disable the irqfd but preserve the last routing entry for unmask.
279+
self.route.disable();
280+
Ok(())
281+
}
282+
283+
fn unmask(&self) -> anyhow::Result<()> {
284+
// Re-enable the irqfd with the previously configured routing entry.
285+
if let Some(entry) = *self.last_entry.lock() {
286+
self.route.enable(entry);
287+
}
288+
Ok(())
289+
}
290+
}

vmm_core/virt_kvm/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ pub struct KvmPartition {
8484
inner: Arc<KvmPartitionInner>,
8585
#[inspect(skip)]
8686
synic_ports: Arc<virt::synic::SynicPorts<KvmPartitionInner>>,
87+
#[cfg(guest_arch = "x86_64")]
88+
#[inspect(skip)]
89+
irqfd_state: Arc<dyn virt::irqfd::IrqFd>,
8790
}
8891

8992
#[derive(Inspect)]

vmm_core/virt_mshv/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition.workspace = true
77
rust-version.workspace = true
88

99
[target.'cfg(target_os = "linux")'.dependencies]
10+
headervec.workspace = true
1011
hv1_emulator.workspace = true
1112
hv1_hypercall.workspace = true
1213
hvdef.workspace = true

0 commit comments

Comments
 (0)