From 10dccf3e23f341d67eb7815311821d89c449a432 Mon Sep 17 00:00:00 2001 From: Mads Marquart Date: Thu, 5 Mar 2026 09:38:03 +0100 Subject: [PATCH] Use IOSurface Write directly into a shared memory buffer that can be shared directly with the compositor and avoids copying bytes when presenting (QuartzCore was copying stuff before). This also implements double and triple buffering of the surface, to avoid creating a bunch of unnecessary buffers. The compositor seems to sometimes work on two buffers at the same time? A bit unsure why. The way we wait for the compositor to stop using the buffer(s) is kinda bad, but that can be resolved later, and shouldn't be a problem for applications that do proper frame-pacing (which Winit doesn't yet provide though). --- Cargo.toml | 11 ++ src/backends/cg.rs | 373 ++++++++++++++++++++++++++++++--------------- src/lib.rs | 14 +- 3 files changed, 269 insertions(+), 129 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d13d4307..8336f8e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -105,6 +105,9 @@ objc2-core-graphics = { version = "0.3.2", default-features = false, features = objc2-core-foundation = { version = "0.3.2", default-features = false, features = [ "std", "CFCGTypes", + "CFNumber", + "CFString", + "CFDictionary", ] } objc2-foundation = { version = "0.3.2", default-features = false, features = [ "std", @@ -122,6 +125,14 @@ objc2-quartz-core = { version = "0.3.2", default-features = false, features = [ "CALayer", "CATransaction", ] } +objc2-io-surface = { version = "0.3.2", default-features = false, features = [ + "std", + "libc", + "objc2", + "objc2-core-foundation", + "IOSurfaceRef", + "IOSurfaceTypes", +] } # Web dependencies. [target.'cfg(target_family = "wasm")'.dependencies] diff --git a/src/backends/cg.rs b/src/backends/cg.rs index 88169630..af8d3480 100644 --- a/src/backends/cg.rs +++ b/src/backends/cg.rs @@ -1,29 +1,31 @@ //! Softbuffer implementation using CoreGraphics. use crate::error::InitError; use crate::{backend_interface::*, AlphaMode}; -use crate::{util, Pixel, Rect, SoftBufferError}; +use crate::{Pixel, Rect, SoftBufferError}; use objc2::rc::Retained; use objc2::runtime::{AnyObject, Bool}; use objc2::{define_class, msg_send, AllocAnyThread, DefinedClass, MainThreadMarker, Message}; -use objc2_core_foundation::{CFRetained, CGPoint}; -use objc2_core_graphics::{ - CGBitmapInfo, CGColorRenderingIntent, CGColorSpace, CGDataProvider, CGImage, CGImageAlphaInfo, - CGImageByteOrderInfo, CGImageComponentInfo, CGImagePixelFormatInfo, -}; +use objc2_core_foundation::{CFMutableDictionary, CFNumber, CFRetained, CFString, CFType, CGPoint}; +use objc2_core_graphics::CGColorSpace; use objc2_foundation::{ ns_string, NSDictionary, NSKeyValueChangeKey, NSKeyValueChangeNewKey, NSKeyValueObservingOptions, NSNumber, NSObject, NSObjectNSKeyValueObserverRegistration, NSString, NSValue, }; +use objc2_io_surface::{ + kIOSurfaceBytesPerElement, kIOSurfaceCacheMode, kIOSurfaceColorSpace, kIOSurfaceHeight, + kIOSurfaceMapWriteCombineCache, kIOSurfacePixelFormat, kIOSurfaceWidth, IOSurfaceLockOptions, + IOSurfaceRef, +}; use objc2_quartz_core::{kCAGravityTopLeft, CALayer, CATransaction}; use raw_window_handle::{HasDisplayHandle, HasWindowHandle, RawWindowHandle}; use std::ffi::c_void; use std::marker::PhantomData; -use std::mem::size_of; +use std::mem::{size_of, ManuallyDrop}; use std::num::NonZeroU32; use std::ops::Deref; -use std::ptr::{self, slice_from_raw_parts_mut, NonNull}; +use std::ptr; use std::slice; define_class!( @@ -108,18 +110,19 @@ pub struct CGImpl { /// The buffers that we may render into. /// /// This contains an unbounded number of buffers, since we don't get any feedback from - /// QuartzCore about when it's done using the buffer, other than the retain count of the data - /// provider (which is a weak signal). It shouldn't be needed (QuartzCore seems to copy the data - /// from `CGImage` once), though theoretically there might be cases where it would have a - /// multi-stage pipeline where it processes the image once, retains it, and sends it onwards to - /// be processed again later (and such things might change depending on OS version), so we do - /// this to be safe. + /// QuartzCore about when it's done using the buffer, other than the IOSurface's `is_in_use` + /// (which is a weak signal). Three buffers should be enough, though theoretically there might + /// be cases where QuartzCore would have a multi-stage pipeline where it processes the surface + /// in more steps (and such things might change depending on OS version), so we do this to be + /// safe. /// /// Anecdotally, if the user renders 3 times within a single frame (which they probably - /// shouldn't do, but would be safe), we need 4 buffers according to the retain counts. + /// shouldn't do, but would be safe), we end up needing 5 buffers. /// /// Note that having more buffers here shouldn't add any presentation delay, since we still go /// directly from drawing to the back buffer and presenting the front buffer. + /// + /// buffers: Vec, /// The width of the current buffers. width: u32, @@ -256,14 +259,37 @@ impl SurfaceInterface for CGImpl< let width = (size.width * scale_factor) as u32; let height = (size.height * scale_factor) as u32; + // FIXME(madsmtm): Allow setting this: + // https://github.com/rust-windowing/softbuffer/pull/320 + let write_combine_cache = false; + let properties = Buffer::properties( + width, + height, + kCVPixelFormatType_32BGRA, + 4, + &color_space, + write_combine_cache, + ); + Ok(Self { layer: SendCALayer(layer), root_layer: SendCALayer(root_layer), observer, color_space, - // We'll usually do double-buffering, but might end up needing more buffers if the user - // renders multiple times per frame. - buffers: vec![Buffer::new(width, height), Buffer::new(width, height)], + // This is triple-buffered because that's what QuartzCore / the compositor seems to + // require: + // - The front buffer is what's currently assigned to `CALayer.contents`, and was + // submitted to the compositor in the previous iteration of the run loop. + // - The middle buffer is what the compositor is currently drawing from (assuming a 1 + // frame delay). + // - The back buffer is what we'll be drawing into. + // + // TODO: Would it ever make sense to control this so it's just double-buffered? + buffers: vec![ + Buffer::new(&properties), + Buffer::new(&properties), + Buffer::new(&properties), + ], width, height, _display: PhantomData, @@ -277,8 +303,9 @@ impl SurfaceInterface for CGImpl< } #[inline] - fn supports_alpha_mode(&self, _alpha_mode: AlphaMode) -> bool { - true + fn supports_alpha_mode(&self, alpha_mode: AlphaMode) -> bool { + // IOSurface doesn't support `Ignored` nor `Postmultiplied`. + matches!(alpha_mode, AlphaMode::Opaque | AlphaMode::Premultiplied) } fn configure( @@ -287,7 +314,13 @@ impl SurfaceInterface for CGImpl< height: NonZeroU32, alpha_mode: AlphaMode, ) -> Result<(), SoftBufferError> { - let opaque = matches!(alpha_mode, AlphaMode::Opaque | AlphaMode::Ignored); + let opaque = match alpha_mode { + AlphaMode::Opaque => true, + AlphaMode::Premultiplied => false, + AlphaMode::Ignored | AlphaMode::Postmultiplied => { + unreachable!("unsupported alpha mode") + } + }; self.layer.setOpaque(opaque); // TODO: Set opaque-ness on root layer too? Is that our responsibility, or Winit's? // self.root_layer.setOpaque(opaque); @@ -300,39 +333,64 @@ impl SurfaceInterface for CGImpl< return Ok(()); } - // Recreate buffers. It's fine to release the old ones, `CALayer.contents` is going to keep - // a reference if they're still in use. - self.buffers = vec![Buffer::new(width, height), Buffer::new(width, height)]; + // Recreate buffers. It's fine to release the old ones, `CALayer.contents` and/or the + // compositor is going to keep a reference if they're still in use. + let properties = Buffer::properties( + width, + height, + kCVPixelFormatType_32BGRA, + 4, + &self.color_space, + false, // write_combine_cache + ); + self.buffers = vec![ + Buffer::new(&properties), + Buffer::new(&properties), + Buffer::new(&properties), + ]; self.width = width; self.height = height; Ok(()) } - fn next_buffer(&mut self, alpha_mode: AlphaMode) -> Result, SoftBufferError> { - // If the backmost buffer might be in use, allocate a new buffer. + fn next_buffer(&mut self, _alpha_mode: AlphaMode) -> Result, SoftBufferError> { + // If the back buffer might be in use by the compositor, allocate a new buffer. + // + // TODO: Add an `unsafe` option to disable this, and always assume 2 (or 3?) buffers? + // TODO: Is this actually the check we want to do? It seems like the compositor doesn't + // properly set the usage state when the application loses focus, even if you continue + // rendering there? // - // TODO: Add an `unsafe` option to disable this, and always assume 2 buffers? - if self.buffers.last().unwrap().might_be_in_use() { - self.buffers.push(Buffer::new(self.width, self.height)); + // Should we instead set up a `CVDisplayLink`, and only allow using the back buffer once a + // certain number of frames have passed since it was presented? Would be better though not + // perfect, `CVDisplayLink` isn't guaranteed to actually match the display's refresh rate: + // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/CoreVideo/CVProg_Concepts/CVProg_Concepts.html#//apple_ref/doc/uid/TP40001536-CH202-DontLinkElementID_2 + // + // Another option would be to block here waiting for the buffer to no longer be in use. + if self.buffers.last().unwrap().surface.is_in_use() { + let write_combine_cache = false; + let properties = Buffer::properties( + self.width, + self.height, + kCVPixelFormatType_32BGRA, + 4, + &self.color_space, + write_combine_cache, + ); + self.buffers.push(Buffer::new(&properties)); // This should have no effect on latency, but it will affect the `buffer.age()` that // users see, and unbounded allocation is undesirable too, so we should try to avoid it. tracing::warn!("had to allocate extra buffer in `next_buffer`, you might be rendering faster than the display rate?"); } + // Lock the back buffer to allow writing to it. + // + // Either unlocked in `BufferImpl`s `Drop` or `present_with_damage`. + self.buffers.last().unwrap().lock(); + Ok(BufferImpl { buffers: &mut self.buffers, - width: self.width, - height: self.height, - color_space: &self.color_space, - alpha_info: match (alpha_mode, cfg!(target_endian = "little")) { - (AlphaMode::Opaque | AlphaMode::Ignored, true) => CGImageAlphaInfo::NoneSkipFirst, - (AlphaMode::Opaque | AlphaMode::Ignored, false) => CGImageAlphaInfo::NoneSkipLast, - (AlphaMode::Premultiplied, true) => CGImageAlphaInfo::PremultipliedFirst, - (AlphaMode::Premultiplied, false) => CGImageAlphaInfo::PremultipliedLast, - (AlphaMode::Postmultiplied, true) => CGImageAlphaInfo::First, - (AlphaMode::Postmultiplied, false) => CGImageAlphaInfo::Last, - }, layer: &mut self.layer, }) } @@ -342,37 +400,38 @@ impl SurfaceInterface for CGImpl< #[derive(Debug)] pub struct BufferImpl<'surface> { buffers: &'surface mut Vec, - width: u32, - height: u32, - color_space: &'surface CGColorSpace, - alpha_info: CGImageAlphaInfo, layer: &'surface mut SendCALayer, } +impl Drop for BufferImpl<'_> { + fn drop(&mut self) { + // Unlock the buffer we locked above. + self.buffers.last().unwrap().unlock(); + } +} + impl BufferInterface for BufferImpl<'_> { fn byte_stride(&self) -> NonZeroU32 { - NonZeroU32::new(util::byte_stride(self.width)).unwrap() + let back = self.buffers.last().unwrap(); + // A multiple of the cache line size, which is `64` on x86_64 and `128` on Aarch64. + // Check with `sysctl hw.cachelinesize`. + NonZeroU32::new(back.surface.bytes_per_row() as u32).unwrap() } fn width(&self) -> NonZeroU32 { - NonZeroU32::new(self.width).unwrap() + let back = self.buffers.last().unwrap(); + NonZeroU32::new(back.surface.width() as u32).unwrap() } fn height(&self) -> NonZeroU32 { - NonZeroU32::new(self.height).unwrap() + let back = self.buffers.last().unwrap(); + NonZeroU32::new(back.surface.height() as u32).unwrap() } fn pixels_mut(&mut self) -> &mut [Pixel] { - let back = self.buffers.last_mut().unwrap(); - - // Should've been verified by `next_buffer` above. - debug_assert!(!back.might_be_in_use()); - - let num_bytes = util::byte_stride(self.width) as usize * (self.height as usize); - // SAFETY: The pointer is valid, and we know that we're the only owners of the back buffer's - // data provider. This, combined with taking `&mut self` in this function, means that we can - // safely write to the buffer. - unsafe { slice::from_raw_parts_mut(back.data_ptr, num_bytes / size_of::()) } + let back_buffer = self.buffers.last_mut().unwrap(); + // SAFETY: Called on the back buffer. + unsafe { back_buffer.data() } } fn age(&self) -> u8 { @@ -381,10 +440,21 @@ impl BufferInterface for BufferImpl<'_> { } fn present_with_damage(self, _damage: &[Rect]) -> Result<(), SoftBufferError> { + // Unlock the buffer now, and avoid the `unlock` in `Drop`. + // + // Note that unlocking effectively flushes the changes, without this, the contents might not + // be visible to the compositor. + // + // The code here would be prettier with https://github.com/rust-lang/rfcs/pull/3466. + let this = &mut *ManuallyDrop::new(self); + let buffers = &mut *this.buffers; + let layer = &mut *this.layer; + buffers.last().unwrap().unlock(); + // Rotate buffers such that the back buffer is now the front buffer. - self.buffers.rotate_right(1); + buffers.rotate_right(1); - let (front, rest) = self.buffers.split_first_mut().unwrap(); + let (front, rest) = buffers.split_first_mut().unwrap(); front.age = 1; // This buffer (previously the back buffer) was just rendered into. // Bump the age of the other buffers. @@ -394,44 +464,17 @@ impl BufferInterface for BufferImpl<'_> { } } - // `CGBitmapInfo` consists of a combination of `CGImageAlphaInfo`, `CGImageComponentInfo` - // `CGImageByteOrderInfo` and `CGImagePixelFormatInfo` (see e.g. `CGBitmapInfoMake`). - // - // TODO: Use `CGBitmapInfo::new` once the next version of objc2-core-graphics is released. - let bitmap_info = CGBitmapInfo( - self.alpha_info.0 - | CGImageComponentInfo::Integer.0 - | CGImageByteOrderInfo::Order32Host.0 - | CGImagePixelFormatInfo::Packed.0, - ); - - // CGImage is (intended to be) immutable, so we re-create it on each present. - // SAFETY: The `decode` pointer is NULL. - let image = unsafe { - CGImage::new( - self.width as usize, - self.height as usize, - 8, - 32, - util::byte_stride(self.width) as usize, - Some(self.color_space), - bitmap_info, - Some(&front.data_provider), - ptr::null(), - false, - CGColorRenderingIntent::RenderingIntentDefault, - ) - } - .unwrap(); - // The CALayer has a default action associated with a change in the layer contents, causing // a quarter second fade transition to happen every time a new buffer is applied. This can // be avoided by wrapping the operation in a transaction and disabling all actions. CATransaction::begin(); CATransaction::setDisableActions(true); - // SAFETY: The contents is `CGImage`, which is a valid class for `contents`. - unsafe { self.layer.setContents(Some(image.as_ref())) }; + // SAFETY: We set `CALayer.contents` to an `IOSurface`, which is an undocumented option, but + // it's done in browsers and GDK: + // https://gitlab.gnome.org/GNOME/gtk/-/blob/4266c3c7b15299736df16c9dec57cd8ec7c7ebde/gdk/macos/GdkMacosTile.c#L44 + // And tested to work at least as far back as macOS 10.12. + unsafe { layer.setContents(Some(front.surface.as_ref())) }; CATransaction::commit(); Ok(()) @@ -439,57 +482,141 @@ impl BufferInterface for BufferImpl<'_> { } /// A single buffer in Softbuffer. +/// +/// Buffers are backed by an `IOSurface`, which is a shared memory buffer that can be passed to the +/// compositor without copying. The best official documentation I've found for how this works is +/// probably this keynote: +/// +/// +/// The first ~10mins of this keynote is also pretty good, it describes CA and the render server: +/// +/// +/// +/// See also these links: +/// - +/// - +/// - +/// - +/// - #[derive(Debug)] struct Buffer { - data_provider: CFRetained, - data_ptr: *mut Pixel, + surface: CFRetained, age: u8, } -// SAFETY: We only mutate the `CGDataProvider` when we know it's not referenced by anything else, +// SAFETY: `IOSurface` is marked `NS_SWIFT_SENDABLE`, and we only mutate it when we know it's not referenced by anything else, // and only then behind `&mut`. unsafe impl Send for Buffer {} // SAFETY: Same as above. unsafe impl Sync for Buffer {} impl Buffer { - fn new(width: u32, height: u32) -> Self { - unsafe extern "C-unwind" fn release( - _info: *mut c_void, - data: NonNull, - size: usize, - ) { - let data = data.cast::(); - let slice = slice_from_raw_parts_mut(data.as_ptr(), size / size_of::()); - // SAFETY: This is the same slice that we passed to `Box::into_raw` below. - drop(unsafe { Box::from_raw(slice) }) + fn new(properties: &CFMutableDictionary) -> Self { + let surface = unsafe { IOSurfaceRef::new(properties.as_opaque()) }.unwrap(); + Self { surface, age: 0 } + } + + fn properties( + width: u32, + height: u32, + pixel_format: u32, + bytes_per_pixel: u32, + color_space: &CGColorSpace, + write_combine_cache: bool, + ) -> CFRetained> { + let properties = CFMutableDictionary::::empty(); + + // Set properties of the surface. + properties.add( + unsafe { kIOSurfaceWidth }, + &CFNumber::new_isize(width as isize), + ); + properties.add( + unsafe { kIOSurfaceHeight }, + &CFNumber::new_isize(height as isize), + ); + // NOTE: If an unsupported pixel format is provided, the compositor usually won't render + // anything (which means it'll render whatever was there before, very glitchy). + // + // The list of formats is hardware- and OS-dependent, see e.g. the following link: + // https://developer.apple.com/forums/thread/673868 + // + // Basically only `kCVPixelFormatType_32BGRA` is guaranteed to work, though from testing, + // there's a few more that we might be able to use; see the following repository: + // https://github.com/madsmtm/iosurface-calayer-formats + properties.add( + unsafe { kIOSurfacePixelFormat }, + &CFNumber::new_i32(pixel_format as i32), + ); + properties.add( + unsafe { kIOSurfaceBytesPerElement }, + &CFNumber::new_i32(bytes_per_pixel as i32), + ); + + // TODO: Doesn't seem to do anything? + properties.add( + unsafe { kIOSurfaceColorSpace }, + &*color_space.property_list().unwrap(), + ); + + // We probably don't need to set `kIOSurfaceICCProfile`, the color space information set + // above should contain this. + + // Be a bit more strict about usage of the surface in debug mode. + #[cfg(debug_assertions)] + properties.add( + unsafe { objc2_io_surface::kIOSurfacePixelSizeCastingAllowed }, + &**objc2_core_foundation::CFBoolean::new(false), + ); + + if write_combine_cache { + properties.add( + unsafe { kIOSurfaceCacheMode }, + &**CFNumber::new_i32(kIOSurfaceMapWriteCombineCache as _), + ); } - let num_bytes = util::byte_stride(width) as usize * (height as usize); + properties + } - let buffer = vec![Pixel::default(); num_bytes / size_of::()].into_boxed_slice(); - let data_ptr = Box::into_raw(buffer).cast::(); + // The compositor shouldn't be writing to our surface, let's ensure that with this flag. + const LOCK_OPTIONS: IOSurfaceLockOptions = IOSurfaceLockOptions::AvoidSync; - // SAFETY: The data pointer and length are valid. - // The info pointer can safely be NULL, we don't use it in the `release` callback. - let data_provider = unsafe { - CGDataProvider::with_data(ptr::null_mut(), data_ptr, num_bytes, Some(release)) + #[track_caller] + fn lock(&self) { + let ret = unsafe { self.surface.lock(Self::LOCK_OPTIONS, ptr::null_mut()) }; + if ret != 0 { + panic!("failed locking buffer: {ret}"); } - .unwrap(); + } - Self { - data_provider, - data_ptr: data_ptr.cast(), - age: 0, + #[track_caller] + fn unlock(&self) { + let ret = unsafe { self.surface.unlock(Self::LOCK_OPTIONS, ptr::null_mut()) }; + if ret != 0 { + panic!("failed unlocking buffer: {ret}"); } } - /// Whether the buffer might currently be in use. + /// # Safety /// - /// Might return `false` even if the buffer is unused (such as if it ended up in an autorelease - /// pool), but if this returns `true`, the provider is definitely not in use. - fn might_be_in_use(&self) -> bool { - self.data_provider.retain_count() != 1 + /// Must only be called on the back buffer. + unsafe fn data(&mut self) -> &mut [Pixel] { + // Should've been verified by `next_buffer` above. + debug_assert!(!self.surface.is_in_use()); + + let num_bytes = self.surface.bytes_per_row() * self.surface.height(); + let ptr = self.surface.base_address().cast::(); + + // SAFETY: `IOSurface` is a kernel-managed buffer, which means it's page-aligned, which is + // plenty for the 4 byte alignment required here. + // + // Additionally, the buffer is owned by us, and we're the only ones that are going to write + // to it. Since we re-use buffers, the buffer _might_ be read by the compositor while we write + // to it - this is still sound on our side, though it might cause tearing, depending on when + // the memory is flushed by the kernel, so we try to prevent it by checking `is_in_use`, and + // allocating new buffers in that case. + unsafe { slice::from_raw_parts_mut(ptr.as_ptr(), num_bytes / size_of::()) } } } @@ -513,3 +640,7 @@ impl Deref for SendCALayer { &self.0 } } + +// Grabbed from `objc2-core-video` to avoid having to depend on that (for now at least). +#[allow(non_upper_case_globals)] +const kCVPixelFormatType_32BGRA: u32 = 0x42475241; diff --git a/src/lib.rs b/src/lib.rs index 5e24ede2..b1b69d24 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -306,11 +306,10 @@ impl HasWindowHandle for Surface /// - X, when XShm is available /// - Win32 /// - Orbital, when buffer size matches window size +/// - macOS/iOS /// /// Currently [`Buffer::present`] must block copying image data on: /// - Web -/// - AppKit -/// - UIKit /// /// Buffer copies an channel swizzling happen on: /// - Android @@ -657,16 +656,14 @@ pub enum AlphaMode { /// /// ## Platform Dependent Behavior /// - /// - Android, macOS/iOS, DRM/KMS, Orbital, Wayland, Windows, X11: Supported. - /// - Web: Cannot be supported in a zero-copy manner. + /// - Android, DRM/KMS, Orbital, Wayland, Windows, X11: Supported. + /// - macOS/iOS and Web: Cannot be supported in a zero-copy manner. Ignored, /// The non-alpha channels are expected to already have been multiplied by the alpha channel. /// /// ## Platform Dependent Behavior /// - /// - Wayland and DRM/KMS: Supported. - /// - macOS/iOS: Supported, but currently doesn't work with additive values (maybe only as the - /// root layer?). Will be fixed by . + /// - Wayland, macOS/iOS and DRM/KMS: Supported. /// - Web: Not yet supported (TODO `ImageBitmap`). /// - Android, Orbital, Windows and X11: Not supported (yet unknown if they can be, feel /// free to open an issue about it). @@ -680,9 +677,10 @@ pub enum AlphaMode { /// /// ## Platform Dependent Behavior /// - /// - Web and macOS/iOS: Supported. + /// - Web: Supported. /// - Android, DRM/KMS, Orbital, Wayland, Windows, X11: Not supported (yet unknown if they can /// be, feel free to open an issue about it). + /// - macOS/iOS: Cannot be supported in a zero-copy manner. #[doc(alias = "Straight")] #[doc(alias = "Unassociated")] #[doc(alias = "Unpremultiplied")]