use core::{ alloc::Layout, pin::Pin, sync::atomic::{self, AtomicBool, AtomicU32, AtomicUsize}, }; use alloc::{ alloc::{Allocator, Global}, boxed::Box, vec, }; use embassy_sync::{ channel::{Channel, TrySendError}, signal::Signal, }; use embassy_time::{Instant, Timer}; use esp_alloc::MemoryCapability; use esp_hal::{ Blocking, dma::{ self, AnyGdmaChannel, BufView, BurstConfig, DmaChannel, DmaChannelConvert, DmaDescriptor, DmaDescriptorFlags, DmaEligible, DmaRxStreamBuf, DmaTxBuf, DmaTxBuffer, DmaTxInterrupt, ExternalBurstConfig, Mem2Mem, }, dma_descriptors, handler, interrupt::{self, Priority}, lcd_cam::lcd::dpi::{Dpi, DpiTransfer}, peripherals::{DMA, DMA_CH0, Peripherals, SPI2}, ram, spi::master::AnySpi, }; use esp_sync::RawMutex; use i_slint_core::software_renderer::Rgb565Pixel; use indoc::{formatdoc, indoc}; use log::{error, info, warn}; use rmk::{ futures::{FutureExt, pin_mut}, join_all, }; use crate::{ PSRAM_ALLOCATOR, SIGNAL_LCD_SUBMIT, SIGNAL_UI_RENDER, peripherals::st7701s::St7701s, util::DurationExt, }; /// THIS IS TAKEN FROM https://github.com/esp-rs/esp-hal/blob/main/esp-hal/src/soc/esp32s3/mod.rs /// Write back a specific range of data in the cache. #[doc(hidden)] #[unsafe(link_section = ".rwtext")] pub unsafe fn cache_writeback_addr(addr: u32, size: u32) { unsafe extern "C" { fn rom_Cache_WriteBack_Addr(addr: u32, size: u32); fn Cache_Suspend_DCache_Autoload() -> u32; fn Cache_Resume_DCache_Autoload(value: u32); } // suspend autoload, avoid load cachelines being written back unsafe { let autoload = Cache_Suspend_DCache_Autoload(); rom_Cache_WriteBack_Addr(addr, size); Cache_Resume_DCache_Autoload(autoload); } } /// THIS IS TAKEN FROM https://github.com/esp-rs/esp-hal/blob/main/esp-hal/src/soc/esp32s3/mod.rs /// Invalidate a specific range of addresses in the cache. #[doc(hidden)] #[unsafe(link_section = ".rwtext")] pub unsafe fn cache_invalidate_addr(addr: u32, size: u32) { unsafe extern "C" { fn Cache_Invalidate_Addr(addr: u32, size: u32); } unsafe { Cache_Invalidate_Addr(addr, size); } } pub struct DmaBounce { // TODO: Make these generic. // They currently cannot be generic, because they lacks a `reborrow` method. channel: DMA_CH0<'static>, // This can also be more generic, see `DmaEligible` in `Mem2Mem::new`. peripheral_src: AnySpi<'static>, // This can also be more generic, see `DmaEligible` in `Mem2Mem::new`. peripheral_dst: Option>, // TODO: Consider having a separate burst config for the two transfers. burst_config: BurstConfig, cyclic: bool, /// The size of each window. window_size: usize, /// The number of windows. windows_len: usize, buffer_src: &'static mut [u8], // Two buffers of size `window_size`, // one of which is being written to, while the other is being read from. bounce_buffer_dst: &'static mut [u8], bounce_buffer_src: &'static mut [u8], // A descriptor list that spans a buffer of size `window_size`. // The buffer pointers need to be updated before each transmission to point to the correct window in the source buffer `src_buffer`. src_descs: &'static mut [DmaDescriptor], // A descriptor list that spans a buffer of size `window_size`. // The buffer pointers need to be updated before each transmission to point to the correct bounce buffer. bounce_dst_descs: &'static mut [DmaDescriptor], // A cyclic descriptor list that spans the buffers `bounce_buffer_dst` and `bounce_buffer_src`. bounce_src_descs: &'static mut [DmaDescriptor], // The index of the next window about to be received into the destination bounce buffer. window_index_next: usize, frame_index_next: usize, } impl DmaBounce { pub fn new( channel: DMA_CH0<'static>, peripheral_src: AnySpi<'static>, peripheral_dst: Dpi<'static, Blocking>, buffer_src: &'static mut [u8], window_size: usize, burst_config: BurstConfig, cyclic: bool, ) -> Self { assert_eq!( buffer_src.len() % window_size, 0, "the size of a source buffer must be a multiple of the window size ({window_size} bytes), but it is {len} bytes large", len = buffer_src.len() ); let windows_len = buffer_src.len() / window_size; let bounce_buffer_dst = Box::leak(allocate_dma_buffer_in(window_size, Global)); let bounce_buffer_src = Box::leak(allocate_dma_buffer_in(window_size, Global)); let src_descs = Self::linear_descriptors_for_buffer(window_size, burst_config, |desc| { desc.reset_for_tx(desc.next.is_null()); // Length for TX buffers must be set in software. // In RX buffers, it is set by hardware. desc.set_length(desc.size()); }); let bounce_dst_descs = Self::linear_descriptors_for_buffer(window_size, burst_config, |_| {}); let bounce_src_descs = if cyclic { Self::bounce_descriptors_for_buffer_single( windows_len, unsafe { ( &mut *(bounce_buffer_dst as *mut _), &mut *(bounce_buffer_src as *mut _), ) }, burst_config, ) } else { Self::bounce_descriptors_for_buffer_cyclic( unsafe { ( &mut *(bounce_buffer_dst as *mut _), &mut *(bounce_buffer_src as *mut _), ) }, burst_config, ) }; Self { channel, peripheral_src, peripheral_dst: Some(peripheral_dst), burst_config, cyclic, window_size, windows_len, buffer_src, bounce_buffer_dst, bounce_buffer_src, src_descs, bounce_dst_descs, bounce_src_descs, window_index_next: 0, frame_index_next: 0, } } fn linear_descriptors_for_buffer( buffer_len: usize, burst_config: BurstConfig, mut setup_desc: impl FnMut(&mut DmaDescriptor), ) -> &'static mut [DmaDescriptor] { let max_chunk_size = burst_config.max_compatible_chunk_size(); let descriptors_len = dma::descriptor_count(buffer_len, max_chunk_size, false); // TODO: This leaks memory. Ensure it's only called during setup. let descriptors = Box::leak(vec![DmaDescriptor::EMPTY; descriptors_len].into_boxed_slice()); // Link up the descriptors. let mut next = core::ptr::null_mut(); for desc in descriptors.iter_mut().rev() { desc.next = next; next = desc; } // Prepare each descriptor's buffer size. let mut descriptors_it = descriptors.iter_mut(); let mut remaining_len = buffer_len; while remaining_len > 0 { let chunk_size = core::cmp::min(max_chunk_size, remaining_len); let desc = descriptors_it.next().unwrap(); desc.set_size(chunk_size); (setup_desc)(desc); remaining_len -= chunk_size; } descriptors } fn bounce_descriptors_for_buffer_cyclic( bounce_buffers: (&'static mut [u8], &'static mut [u8]), burst_config: BurstConfig, ) -> &'static mut [DmaDescriptor] { assert_eq!( bounce_buffers.0.len(), bounce_buffers.1.len(), "bounce buffers must be equal in size" ); let buffer_len = bounce_buffers.0.len(); let max_chunk_size = burst_config.max_compatible_chunk_size(); let descriptors_len = dma::descriptor_count( buffer_len, max_chunk_size, // TODO: This might need to be set to true? // I don't know why cyclic descriptor lists must be at least 3 descriptors long. false, ); let descriptors_combined = Box::leak(vec![DmaDescriptor::EMPTY; 2 * descriptors_len].into_boxed_slice()); let descriptors_pair = descriptors_combined.split_at_mut(descriptors_len); // Link up the descriptors. fn link_up_descriptors( descriptors: &mut [DmaDescriptor], descriptors_other: &mut [DmaDescriptor], ) { let mut next = descriptors_other.first_mut().unwrap(); for desc in descriptors.iter_mut().rev() { desc.next = next; next = desc; } } link_up_descriptors(descriptors_pair.0, descriptors_pair.1); link_up_descriptors(descriptors_pair.1, descriptors_pair.0); // Prepare each descriptor's buffer size. for (bounce_buffer, descriptors) in [ (bounce_buffers.0, descriptors_pair.0), (bounce_buffers.1, descriptors_pair.1), ] { let mut descriptors_it = descriptors.iter_mut(); let mut remaining_bounce_buffer = bounce_buffer; while !remaining_bounce_buffer.is_empty() { let chunk_size = core::cmp::min(max_chunk_size, remaining_bounce_buffer.len()); let desc = descriptors_it.next().unwrap(); desc.buffer = remaining_bounce_buffer.as_mut_ptr(); remaining_bounce_buffer = &mut remaining_bounce_buffer[chunk_size..]; let is_last = remaining_bounce_buffer.is_empty(); desc.set_size(chunk_size); desc.set_length(chunk_size); desc.reset_for_tx(is_last); } } descriptors_combined } fn bounce_descriptors_for_buffer_single( windows_len: usize, bounce_buffers: (&'static mut [u8], &'static mut [u8]), burst_config: BurstConfig, ) -> &'static mut [DmaDescriptor] { assert_eq!( bounce_buffers.0.len(), bounce_buffers.1.len(), "bounce buffers must be equal in size" ); // If an odd number of windows were needed, two descriptor lists would be needed, assert_eq!(windows_len % 2, 0, "the number of windows must be even"); let buffer_len = bounce_buffers.0.len(); let max_chunk_size = burst_config.max_compatible_chunk_size(); let descriptors_per_window = dma::descriptor_count(buffer_len, max_chunk_size, false); let descriptors_frame = Box::leak( vec![DmaDescriptor::EMPTY; descriptors_per_window * windows_len].into_boxed_slice(), ); // Link up the descriptors. let mut next = core::ptr::null_mut(); for desc in descriptors_frame.iter_mut().rev() { desc.next = next; next = desc; } // Prepare each descriptor's buffer size. let bounce_buffers = [bounce_buffers.0, bounce_buffers.1]; for (window_index, descriptors) in descriptors_frame .chunks_mut(descriptors_per_window) .enumerate() { let mut descriptors_it = descriptors.iter_mut(); let mut remaining_bounce_buffer = &mut *bounce_buffers[window_index % 2]; while !remaining_bounce_buffer.is_empty() { let chunk_size = core::cmp::min(max_chunk_size, remaining_bounce_buffer.len()); let desc = descriptors_it.next().unwrap(); desc.buffer = remaining_bounce_buffer.as_mut_ptr(); remaining_bounce_buffer = &mut remaining_bounce_buffer[chunk_size..]; let is_last = remaining_bounce_buffer.is_empty(); desc.set_size(chunk_size); desc.set_length(chunk_size); desc.reset_for_tx(is_last); } } descriptors_frame } fn linear_descriptors_prepare( descriptors: &mut [DmaDescriptor], mut buffer: Option<&mut [u8]>, mut setup_desc: impl FnMut(&mut DmaDescriptor), ) { for descriptor in descriptors.iter_mut() { if let Some(inner_buffer) = buffer { descriptor.buffer = inner_buffer.as_mut_ptr(); buffer = Some(&mut inner_buffer[descriptor.size()..]); } (setup_desc)(descriptor); } if let Some(buffer) = buffer { assert!( buffer.is_empty(), "a buffer of an incompatible length was asssigned to a descriptor set" ); } } fn enable_interrupts() { // TODO: Get from self.channel let channel_number = 2; let interrupt = esp_hal::peripherals::Interrupt::DMA_OUT_CH2; // Enable interrupts for the peripheral interrupt::enable(interrupt, dma_interrupt_handler.priority()).unwrap(); // Bind the handler unsafe { interrupt::bind_interrupt(interrupt, dma_interrupt_handler.handler()); } // Enable interrupts in the peripheral. DMA::regs() .ch(channel_number) .out_int() .ena() .modify(|_, w| w.out_eof().bit(true)); } /// Receive a window of bytes into the current dst bounce buffer. /// Finally, swaps the bounce buffers. fn receive_window_blocking(&mut self) { // Descriptors are initialized by `DmaTxBuf::new`. let buffer_src_window = &mut self.buffer_src[self.window_index_next * self.window_size..][..self.window_size]; Self::linear_descriptors_prepare(self.src_descs, Some(buffer_src_window), |desc| { // desc.reset_for_tx(desc.next.is_null()); }); // TODO: Precompute a descriptor list for each buffer, then use `None` instead of `Some(&mut *self.bounce_buffer_dst)`. Self::linear_descriptors_prepare( self.bounce_dst_descs, Some(&mut *self.bounce_buffer_dst), |desc| { desc.reset_for_rx(); }, ); { // Extend the lifetime to 'static because it is required by Mem2Mem. // // Safety: // Pointees are done being used by the driver before this scope ends, // this is because we `SimpleMem2MemTransfer::wait()` on the transfer to finish. let bounce_dst_descs = unsafe { &mut *(self.bounce_dst_descs as *mut _) }; let src_descs = unsafe { &mut *(self.src_descs as *mut _) }; let mut mem2mem = Mem2Mem::new(self.channel.reborrow(), self.peripheral_src.reborrow()) .with_descriptors(bounce_dst_descs, src_descs, self.burst_config) .unwrap(); let transfer = mem2mem .start_transfer(&mut self.bounce_buffer_dst, buffer_src_window) .unwrap(); transfer.wait().unwrap(); } self.increase_window_counter(1); } fn increase_window_counter(&mut self, windows: usize) { if windows % 2 == 1 { core::mem::swap(&mut self.bounce_buffer_dst, &mut self.bounce_buffer_src); } self.window_index_next = self.window_index_next + windows; self.frame_index_next += self.window_index_next / self.windows_len; self.window_index_next = self.window_index_next % self.windows_len; } pub async fn send(&mut self) // -> DpiTransfer<'static, DmaTxBounceBuf, Blocking> { Self::enable_interrupts(); // Receive the first window, so that the outbound transfer can read valid data. self.receive_window_blocking(); let mut dma_tx_buffer = self.get_dma_tx_buffer(); let mut transfer = self .peripheral_dst .take() .unwrap() .send(self.cyclic /* Send perpetually */, dma_tx_buffer) .unwrap_or_else(|(error, _, _)| { panic!("failed to begin the transmission of the first frame: {error:?}"); }); let mut windows_skipped_total = 0; loop { self.receive_window_blocking(); let windows_skipped = WINDOWS_SKIPPED.wait().await; if windows_skipped > 0 { self.increase_window_counter(windows_skipped); windows_skipped_total += windows_skipped; error!( "Skipped {windows_skipped} windows. Windows skipped per frame: {:.2}%", 100.0 * windows_skipped_total as f32 / (self.frame_index_next + 1) as f32 ); } if !self.cyclic && self.window_index_next == 1 { // TODO: Investigate why the DPI transfer isn't done at this point. // The `DpiTransfer::wait()` below takes 0.001039 s. // Perhaps it's the minimum screen refresh period? // // assert!(transfer.is_done()); // if !transfer.is_done() { // error!( // "transfer is not done yet. {} {}", // self.frame_index_next, self.window_index_next // ); // } let result; let peripheral_dst; // let start = Instant::now(); (result, peripheral_dst, dma_tx_buffer) = transfer.wait(); // let duration = Instant::now().duration_since(start); // warn!("Waited for {} seconds", duration.display_as_secs()); if let Err(error) = result { error!("DPI error during sending: {error:?}"); } transfer = peripheral_dst .send(false, dma_tx_buffer) .unwrap_or_else(|(error, _, _)| { panic!("failed to begin the transmission of a frame: {error:?}"); }); } } // loop { // // BOUNCE_BUFFER_SENT.receive().await; // warn!("Iteration. Done = {}", transfer.is_done()); // let receive_window = self.receive_window().fuse(); // pin_mut!(receive_window); // // let mut send_buffer = BOUNCE_BUFFER_SENT.wait().fuse(); // let mut send_buffer = BOUNCE_BUFFER_SENT.receive().fuse(); // let window_received_first = rmk::futures::select_biased! { // () = receive_window => Ok(()), // windows_sent = send_buffer => Err(windows_sent), // }; // match window_received_first { // Ok(()) => { // send_buffer.await; // } // Err(windows_sent) => { // error!("Sent {windows_sent} windows before a window could be received."); // receive_window.await; // } // } // } // transfer } fn get_dma_tx_buffer(&mut self) -> DmaTxBounceBuf { DmaTxBounceBuf { preparation: dma::Preparation { start: self.bounce_src_descs.first_mut().unwrap(), direction: dma::TransferDirection::Out, accesses_psram: false, burst_transfer: self.burst_config, check_owner: Some(true), // Possibly want to set this to false auto_write_back: false, // Possibly true }, } } } pub struct DmaTxBounceBuf { preparation: dma::Preparation, } unsafe impl DmaTxBuffer for DmaTxBounceBuf { type View = Self; type Final = Self; fn prepare(&mut self) -> dma::Preparation { dma::Preparation { start: self.preparation.start, direction: self.preparation.direction, accesses_psram: self.preparation.accesses_psram, burst_transfer: self.preparation.burst_transfer, check_owner: self.preparation.check_owner, auto_write_back: self.preparation.auto_write_back, } } fn into_view(self) -> Self::View { self } fn from_view(view: Self::View) -> Self::Final { view } } static WINDOWS_SKIPPED: Signal = Signal::new(); #[handler(priority = Priority::Priority3)] #[ram] // Improves performance. fn dma_interrupt_handler() { let interrupt = DMA::regs().ch(2).out_int(); let bounce_buffer_processed = interrupt.st().read().out_eof().bit_is_set(); if bounce_buffer_processed { // Clear the bit by writing 1 to the clear bits. interrupt.clr().write(|w| w.out_eof().bit(true)); let windows_skipped = WINDOWS_SKIPPED .try_take() .map(|windows_skipped| windows_skipped + 1) .unwrap_or_default(); WINDOWS_SKIPPED.signal(windows_skipped); } } pub async fn run_lcd( mut st7701s: St7701s<'static, Blocking>, framebuffer: &'static mut Framebuffer, ) { loop { // Timer::after(Duration::from_millis(100)).await; // yield_now().await; SIGNAL_LCD_SUBMIT.wait().await; // TODO: Use bounce buffers: // https://docs.espressif.com/projects/esp-idf/en/v5.0/esp32s3/api-reference/peripherals/lcd.html#bounce-buffer-with-single-psram-frame-buffer // This can be implemented as a `DmaTxBuffer`. let transfer = match st7701s.dpi.send(false, framebuffer.dma_buf.take().unwrap()) { Err((error, result_dpi, result_dma_buf)) => { error!( "An error occurred while initiating transfer of the framebuffer to the LCD display: {error:?}" ); st7701s.dpi = result_dpi; framebuffer.dma_buf = Some(result_dma_buf); continue; } Ok(transfer) => transfer, }; // This could be used to allow other tasks to be executed on the first core, but that causes // the flash to be accessed, which interferes with the framebuffer transfer. // For that reason, it is disabled, and this task blocks the first core, until the transfer // is complete. #[cfg(not(feature = "limit-fps"))] while !transfer.is_done() { // Timer::after_millis(1).await; rmk::embassy_futures::yield_now().await; } let result; let dma_buf; (result, st7701s.dpi, dma_buf) = transfer.wait(); framebuffer.dma_buf = Some(dma_buf); SIGNAL_UI_RENDER.signal(()); if let Err(error) = result { error!( "An error occurred while transferring framebuffer to the LCD display: {error:?}" ); } } } pub struct Framebuffer { pub width: u32, pub height: u32, pub dma_buf: Option, } /// Allocates a buffer appropriately aligned for use with DMA. pub fn allocate_dma_buffer_in(len: usize, alloc: A) -> Box<[u8], A> { const DMA_ALIGNMENT: usize = 32; assert_eq!( len % DMA_ALIGNMENT, 0, "the size of a DMA buffer must be a multiple of {DMA_ALIGNMENT} bytes, but it is {len} bytes large" ); // ⚠️ Note: For chips that support DMA to/from PSRAM (ESP32-S3) DMA transfers to/from PSRAM // have extra alignment requirements. The address and size of the buffer pointed to by each // descriptor must be a multiple of the cache line (block) size. This is 32 bytes on ESP32-S3. // That is ensured by the `assert_eq` preceding this block. unsafe { let raw = alloc .allocate_zeroed(Layout::from_size_align(len, DMA_ALIGNMENT).unwrap()) .expect("failed to allocate a DMA buffer"); Box::from_raw_in(raw.as_ptr(), alloc) } } impl Framebuffer { pub fn new(width: u32, height: u32) -> Self { let buffer_len = width as usize * height as usize * core::mem::size_of::(); let buffer = allocate_dma_buffer_in(buffer_len, &PSRAM_ALLOCATOR); let burst_config: BurstConfig = ExternalBurstConfig::Size16.into(); info!( "PSRAM SPI burst config: max_compatible_chunk_size={}", burst_config.max_compatible_chunk_size() ); let dma_buf_descs_len = esp_hal::dma::descriptor_count( buffer_len, burst_config.max_compatible_chunk_size(), false, ); // Descriptors are initialized by `DmaTxBuf::new`. let dma_buf_descs = vec![DmaDescriptor::EMPTY; dma_buf_descs_len].into_boxed_slice(); // We just leak the buffers. let dma_buf = DmaTxBuf::new(Box::leak(dma_buf_descs), Box::leak(buffer)).unwrap(); Self { width, height, dma_buf: Some(dma_buf), } } pub fn as_target_pixels(&mut self) -> &mut [Rgb565Pixel] { bytemuck::cast_slice_mut::<_, Rgb565Pixel>(self.dma_buf.as_mut().unwrap().as_mut_slice()) } }