Optimize framebuffer and bounce buffers such that the majority of the

front porch is not stored
This commit is contained in:
Jakub Hlusička 2026-02-22 16:09:20 +01:00
parent f20f4e7993
commit c012d5d11d
2 changed files with 237 additions and 96 deletions

View file

@ -152,46 +152,52 @@ async fn test_bounce_buffers(
channel: DMA_CH0<'static>, channel: DMA_CH0<'static>,
peripheral: SPI2<'static>, peripheral: SPI2<'static>,
st7701s: St7701s<'static, Blocking>, st7701s: St7701s<'static, Blocking>,
) -> DpiTransfer<'static, DmaTxBounceBuf, Blocking> { ) {
error!("TEST BOUNCE BUFFERS SECTION ENTERED"); error!("TEST BOUNCE BUFFERS SECTION ENTERED");
const WIDTH: usize = 368; const BYTES_PER_PIXEL: usize = core::mem::size_of::<u16>();
const HEIGHT: usize = 960; // Assume highest burst config setting.
const ROWS_PER_WINDOW: usize = 8; const EXTERNAL_BURST_CONFIG: ExternalBurstConfig = ExternalBurstConfig::Size32;
let windows_len = HEIGHT / ROWS_PER_WINDOW; const ALIGNMENT_PIXELS: usize = EXTERNAL_BURST_CONFIG as usize / BYTES_PER_PIXEL;
let window_size = ROWS_PER_WINDOW * WIDTH * core::mem::size_of::<u16>(); // The total number of pixels demanded by the DPI, per row.
const WIDTH_TOTAL_PIXELS: usize = 368;
// The total number of rows demanded by the DPI, per frame.
const HEIGHT_PIXELS: usize = 960;
// The number of unused pixels at the start of the row.
const FRONT_PORCH_ACTUAL_PIXELS: usize = 120;
// The number of actually visible pixels, per row.
const WIDTH_VISIBLE_PIXELS: usize = 240;
// The number of pixels not stored in a bounce buffer, per row.
// This many arbitrary pixels are sent to the DPI.
const FRONT_PORCH_SKIPPED_PIXELS: usize =
(FRONT_PORCH_ACTUAL_PIXELS / ALIGNMENT_PIXELS) * ALIGNMENT_PIXELS;
const WIDTH_STORED_PIXELS: usize = WIDTH_TOTAL_PIXELS - FRONT_PORCH_SKIPPED_PIXELS;
const VISIBLE_OFFSET_IN_BUFFER_PIXELS: usize =
FRONT_PORCH_ACTUAL_PIXELS - FRONT_PORCH_SKIPPED_PIXELS;
const ROWS_PER_WINDOW: usize = 16;
let burst_config = BurstConfig { let burst_config = BurstConfig {
internal_memory: InternalBurstConfig::Enabled, internal_memory: InternalBurstConfig::Enabled,
external_memory: ExternalBurstConfig::Size64, external_memory: EXTERNAL_BURST_CONFIG,
}; };
let buffer_src = Box::leak(allocate_dma_buffer_in( let buffer_src = Box::leak(allocate_dma_buffer_in(
windows_len * window_size, HEIGHT_PIXELS * WIDTH_STORED_PIXELS * BYTES_PER_PIXEL,
burst_config, burst_config,
&PSRAM_ALLOCATOR, &PSRAM_ALLOCATOR,
)); ));
{
let buffer_src = bytemuck::cast_slice_mut::<u8, Rgb565Pixel>(buffer_src); let buffer_src = bytemuck::cast_slice_mut::<u8, Rgb565Pixel>(buffer_src);
let colors = (0..120_u8) let colors = (0..WIDTH_VISIBLE_PIXELS as u8 / 2)
.rev() .rev()
.map(|val| { .map(|val| Rgb565Pixel::from_rgb(0xFF, val * 2, 0))
// Rgb565Pixel::from_rgb(
// (val % 2) * (0b11111 / (2 - 1)),
// (val % 8) * (0b111111 / (8 - 1)),
// (val % 32) * (0b11111 / (32 - 1)),
// )
// Rgb565Pixel::from_rgb(
// (0b11111 as f32 * (val as f32 / 119.0)) as u8,
// (0b111111 as f32 * (val as f32 / 119.0)) as u8,
// (0b11111 as f32 * (val as f32 / 119.0)) as u8,
// )
Rgb565Pixel::from_rgb(0xFF, val * 2, 0)
})
.collect::<Vec<_>>(); .collect::<Vec<_>>();
for (index, pixel) in buffer_src.iter_mut().enumerate() { for (index, pixel) in buffer_src.iter_mut().enumerate() {
let mut x = (index % WIDTH) as i16 - 120; let mut x =
let mut y = (index / WIDTH) as i16; (index % WIDTH_STORED_PIXELS) as i16 - VISIBLE_OFFSET_IN_BUFFER_PIXELS as i16;
let mut y = (index / WIDTH_STORED_PIXELS) as i16;
if x < 240 { if x < WIDTH_VISIBLE_PIXELS as i16 {
x = core::cmp::min(x, 240 - 1 - x); x = core::cmp::min(x, WIDTH_VISIBLE_PIXELS as i16 - 1 - x);
y = core::cmp::min(y, 960 - 1 - y); y = core::cmp::min(y, HEIGHT_PIXELS as i16 - 1 - y);
let min = core::cmp::min(x, y); let min = core::cmp::min(x, y);
*pixel = colors[min as usize % colors.len()].clone(); *pixel = colors[min as usize % colors.len()].clone();
@ -200,30 +206,26 @@ async fn test_bounce_buffers(
*pixel = Rgb565Pixel::default(); *pixel = Rgb565Pixel::default();
} }
let buffer_src = bytemuck::cast_slice_mut::<Rgb565Pixel, u8>(buffer_src); }
// let mut counter: u8 = 0;
// buffer_src.fill_with(|| { warn!("FRONT_PORCH_SKIPPED_PIXELS: {FRONT_PORCH_SKIPPED_PIXELS}");
// counter = counter.wrapping_add(1); warn!("WIDTH_STORED_PIXELS: {WIDTH_STORED_PIXELS}");
// counter warn!("ROWS_PER_WINDOW: {ROWS_PER_WINDOW}");
// });
let mut buf = DmaBounce::new( let mut buf = DmaBounce::new(
Global, Global,
channel, channel,
AnySpi::from(peripheral), AnySpi::from(peripheral),
st7701s.dpi, st7701s.dpi,
buffer_src, buffer_src,
window_size, FRONT_PORCH_SKIPPED_PIXELS * BYTES_PER_PIXEL,
BurstConfig { WIDTH_STORED_PIXELS * BYTES_PER_PIXEL,
internal_memory: InternalBurstConfig::Enabled, ROWS_PER_WINDOW,
external_memory: ExternalBurstConfig::Size64, burst_config,
},
false, false,
); );
let _ = buf.send().await; buf.send().await;
error!("TEST BOUNCE BUFFERS SECTION DONE"); error!("TEST BOUNCE BUFFERS SECTION DONE");
loop {
Timer::after_secs(10).await;
}
} }
#[esp_rtos::main] #[esp_rtos::main]
@ -598,16 +600,8 @@ async fn main_task(peripherals: MainPeripherals) {
info!("ST7701S-based LCD display initialized!"); info!("ST7701S-based LCD display initialized!");
// interrupt_core_0_spawner.must_spawn(test_bounce_buffers_task( test_bounce_buffers(peripherals.DMA_CH0, peripherals.SPI2, st7701s).await;
// peripherals.DMA_CH0, return;
// peripherals.SPI2,
// st7701s,
// ));
// let lcd_task = test_bounce_buffers(peripherals.DMA_CH0, peripherals.SPI2, st7701s);
// let _ = lcd_task.await;
// return;
// RMK config // RMK config
let vial_config = VialConfig::new(VIAL_KEYBOARD_ID, VIAL_KEYBOARD_DEF, &[(0, 0), (1, 1)]); let vial_config = VialConfig::new(VIAL_KEYBOARD_ID, VIAL_KEYBOARD_DEF, &[(0, 0), (1, 1)]);
@ -705,7 +699,8 @@ async fn main_task(peripherals: MainPeripherals) {
// TODO: Compute the appropriate ranges to pass to the renderer and DPI peripheral. // TODO: Compute the appropriate ranges to pass to the renderer and DPI peripheral.
// The renderer should pass the size of the `pad`ding to the GUI is parameters, // The renderer should pass the size of the `pad`ding to the GUI is parameters,
// to align the content to the displayed range. // to align the content to the displayed range.
368, 112,
368 - 112,
960, 960,
16, 16,
false, false,

View file

@ -30,7 +30,7 @@ use esp_hal::{
spi::master::AnySpi, spi::master::AnySpi,
}; };
use esp_sync::RawMutex; use esp_sync::RawMutex;
use i_slint_core::software_renderer::Rgb565Pixel; use i_slint_core::software_renderer::{Rgb565Pixel, TargetPixel};
use indoc::{formatdoc, indoc}; use indoc::{formatdoc, indoc};
use log::{error, info, warn}; use log::{error, info, warn};
use rmk::{ use rmk::{
@ -111,16 +111,30 @@ pub struct DmaBounce {
} }
impl DmaBounce { impl DmaBounce {
/// * `allocator` - The allocator used to allocate the bounce buffers.
/// * `channel` - The DMA channel used to transfer data from the source buffer to the bounce buffers.
/// * `peripheral_src` - The peripheral to transfer data from the source buffer to the bounce buffers.
/// * `peripheral_dst` - The peripheral to transfer data to, from the bounce buffers.
/// * `buffer_src` - The source buffer, typically allocated in external memory.
/// * `row_front_porch_bytes` - The number of arbitrary-valued bytes to be sent in front of each row to the destination peripheral.
/// * `row_width_bytes` - The width of a row, in bytes.
/// * `window_size_rows` - The size of a single bounce buffer, in rows.
/// * `burst_config` - The burst config to use for memory transfers (both in and out). TODO: This could be split.
/// * `cyclic` - Experimental! Whether to use a cyclic descriptor list for transfer from the bounce buffers to the destination peripheral.
pub fn new( pub fn new(
allocator: impl Allocator + Copy + 'static, allocator: impl Allocator + Copy + 'static,
channel: DMA_CH0<'static>, channel: DMA_CH0<'static>,
peripheral_src: AnySpi<'static>, peripheral_src: AnySpi<'static>,
peripheral_dst: Dpi<'static, Blocking>, peripheral_dst: Dpi<'static, Blocking>,
buffer_src: &'static mut [u8], buffer_src: &'static mut [u8],
window_size: usize, row_front_porch_bytes: usize,
row_width_bytes: usize,
window_size_rows: usize,
burst_config: BurstConfig, burst_config: BurstConfig,
cyclic: bool, cyclic: bool,
) -> Self { ) -> Self {
let window_size = row_width_bytes * window_size_rows;
assert_eq!( assert_eq!(
buffer_src.len() % window_size, buffer_src.len() % window_size,
0, 0,
@ -137,9 +151,21 @@ impl DmaBounce {
"the source buffer must be sufficiently aligned to {alignment} bytes for the burst config", "the source buffer must be sufficiently aligned to {alignment} bytes for the burst config",
); );
assert_eq!( assert_eq!(
window_size % alignment, row_width_bytes % alignment,
0, 0,
"the size of the source buffer must be sufficiently aligned to {alignment} bytes for the burst config", "the size of a row in bytes must be sufficiently aligned to {alignment} bytes for the burst config",
);
assert_eq!(
row_front_porch_bytes % alignment,
0,
"the size of a row's front porch in bytes must be sufficiently aligned to {alignment} bytes for the burst config",
);
// We need to make the destination peripheral read the front porch data from somewhere,
// and that somewhere is currently the bounce buffer.
// Therefore the front porch must be in bounds.
assert!(
row_front_porch_bytes <= window_size,
"front porch too large"
); );
let windows_len = buffer_src.len() / window_size; let windows_len = buffer_src.len() / window_size;
@ -158,8 +184,7 @@ impl DmaBounce {
let bounce_dst_descs = let bounce_dst_descs =
Self::linear_descriptors_for_buffer(window_size, burst_config, |_| {}); Self::linear_descriptors_for_buffer(window_size, burst_config, |_| {});
let bounce_src_descs = if cyclic { let bounce_src_descs = if cyclic {
Self::bounce_descriptors_for_buffer_single( Self::bounce_descriptors_for_buffer_cyclic(
windows_len,
unsafe { unsafe {
( (
&mut *(bounce_buffer_dst as *mut _), &mut *(bounce_buffer_dst as *mut _),
@ -169,7 +194,17 @@ impl DmaBounce {
burst_config, burst_config,
) )
} else { } else {
Self::bounce_descriptors_for_buffer_cyclic( let REMOVE = Box::leak(allocate_dma_buffer_in(
row_front_porch_bytes,
burst_config,
allocator,
));
bytemuck::cast_slice_mut(REMOVE).fill(Rgb565Pixel::from_rgb(0, 0, 0xFF));
Self::bounce_descriptors_for_buffer_single(
windows_len,
row_front_porch_bytes,
row_width_bytes,
window_size_rows,
unsafe { unsafe {
( (
&mut *(bounce_buffer_dst as *mut _), &mut *(bounce_buffer_dst as *mut _),
@ -177,6 +212,7 @@ impl DmaBounce {
) )
}, },
burst_config, burst_config,
REMOVE,
) )
}; };
@ -294,8 +330,12 @@ impl DmaBounce {
fn bounce_descriptors_for_buffer_single( fn bounce_descriptors_for_buffer_single(
windows_len: usize, windows_len: usize,
row_front_porch_bytes: usize,
row_width_bytes: usize,
window_size_rows: usize,
bounce_buffers: (&'static mut [u8], &'static mut [u8]), bounce_buffers: (&'static mut [u8], &'static mut [u8]),
burst_config: BurstConfig, burst_config: BurstConfig,
REMOVE: &'static mut [u8],
) -> &'static mut [DmaDescriptor] { ) -> &'static mut [DmaDescriptor] {
assert_eq!( assert_eq!(
bounce_buffers.0.len(), bounce_buffers.0.len(),
@ -306,12 +346,28 @@ impl DmaBounce {
assert_eq!(windows_len % 2, 0, "the number of windows must be even"); assert_eq!(windows_len % 2, 0, "the number of windows must be even");
let buffer_len = bounce_buffers.0.len(); let buffer_len = bounce_buffers.0.len();
let max_chunk_size = burst_config.max_compatible_chunk_size();
let descriptors_per_window = dma::descriptor_count(buffer_len, max_chunk_size, false); assert_eq!(
let descriptors_frame = Box::leak( buffer_len,
vec![DmaDescriptor::EMPTY; descriptors_per_window * windows_len].into_boxed_slice(), row_width_bytes * window_size_rows,
"the provided bounce buffers have an invalid size"
); );
warn!(
"windows_len: {windows_len}\nrow_front_porch_bytes: {row_front_porch_bytes}\nrow_width_bytes: {row_width_bytes}\nwindow_size_rows: {window_size_rows}\nbuffer_len: {buffer_len}",
);
let max_chunk_size = burst_config.max_compatible_chunk_size();
let descriptors_per_row_front_porch =
dma::descriptor_count(row_front_porch_bytes, max_chunk_size, false);
let descriptors_per_row_stored =
dma::descriptor_count(row_width_bytes, max_chunk_size, false);
let descriptors_per_row = descriptors_per_row_stored + descriptors_per_row_front_porch;
let descriptors_per_window = window_size_rows * descriptors_per_row;
let descriptors_per_frame = descriptors_per_window * windows_len;
let descriptors_frame =
Box::leak(vec![DmaDescriptor::EMPTY; descriptors_per_frame].into_boxed_slice());
// Link up the descriptors. // Link up the descriptors.
let mut next = core::ptr::null_mut(); let mut next = core::ptr::null_mut();
for desc in descriptors_frame.iter_mut().rev() { for desc in descriptors_frame.iter_mut().rev() {
@ -322,25 +378,111 @@ impl DmaBounce {
// Prepare each descriptor's buffer size. // Prepare each descriptor's buffer size.
let bounce_buffers = [bounce_buffers.0, bounce_buffers.1]; let bounce_buffers = [bounce_buffers.0, bounce_buffers.1];
for (window_index, descriptors) in descriptors_frame for (window_index, descriptors_window) in descriptors_frame
.chunks_mut(descriptors_per_window) .chunks_mut(descriptors_per_window)
.enumerate() .enumerate()
{ {
let mut descriptors_it = descriptors.iter_mut(); let bounce_buffer_index = window_index % 2;
let mut remaining_bounce_buffer = &mut *bounce_buffers[window_index % 2]; let bounce_buffer = &mut *bounce_buffers[bounce_buffer_index];
// let bounce_buffer_ptr = bounce_buffers[bounce_buffer_index].as_mut_ptr();
// let mut remaining_bounce_buffer = &mut *bounce_buffers[bounce_buffer_index];
while !remaining_bounce_buffer.is_empty() { for (row_index_in_window, descriptors_row) in descriptors_window
let chunk_size = core::cmp::min(max_chunk_size, remaining_bounce_buffer.len()); .chunks_mut(descriptors_per_row)
.enumerate()
{
// let row_index = row_index_in_window + window_index * window_size_rows;
let (descriptors_row_front_porch, descriptors_row_stored) =
descriptors_row.split_at_mut(descriptors_per_row_front_porch);
// Prepare front porch descriptors.
{
let mut descriptors_it = descriptors_row_front_porch.iter_mut();
let mut remaining_front_porch = row_front_porch_bytes;
while remaining_front_porch > 0 {
let desc = descriptors_it.next().unwrap(); let desc = descriptors_it.next().unwrap();
desc.buffer = remaining_bounce_buffer.as_mut_ptr(); let chunk_size = core::cmp::min(max_chunk_size, remaining_front_porch);
remaining_bounce_buffer = &mut remaining_bounce_buffer[chunk_size..]; remaining_front_porch -= chunk_size;
let is_last = remaining_bounce_buffer.is_empty(); // Just make it point at a bounce buffer.
// It is guaranteed to have enough bytes by `DmaBounce::new`.
desc.buffer = REMOVE.as_mut_ptr();
// desc.buffer = unsafe { bounce_buffer_ptr.offset(0x10000) };
// desc.buffer = bounce_buffer_ptr;
desc.set_size(chunk_size); desc.set_size(chunk_size);
desc.set_length(chunk_size); desc.set_length(chunk_size);
desc.reset_for_tx(is_last); desc.reset_for_tx(false);
}
assert!(
descriptors_it.next().is_none(),
"front porch descriptors must be used up"
);
assert_eq!(
descriptors_row_front_porch
.iter()
.map(|desc| desc.size())
.sum::<usize>(),
row_front_porch_bytes
);
}
// Prepare window descriptors.
{
let mut remaining_bounce_buffer = &mut bounce_buffer
[row_index_in_window * row_width_bytes..][..row_width_bytes];
// if remaining_bounce_buffer.len() > row_width_bytes {
// remaining_bounce_buffer = &mut remaining_bounce_buffer[..row_width_bytes];
// }
for desc in &mut *descriptors_row_stored {
let chunk_size =
core::cmp::min(max_chunk_size, remaining_bounce_buffer.len());
desc.buffer = remaining_bounce_buffer.as_mut_ptr();
remaining_bounce_buffer = &mut remaining_bounce_buffer[chunk_size..];
desc.set_size(chunk_size);
desc.set_length(chunk_size);
desc.reset_for_tx(false);
}
assert!(
remaining_bounce_buffer.is_empty(),
"bounce buffer must be used up"
);
assert_eq!(
descriptors_row_stored
.iter()
.map(|desc| desc.size())
.sum::<usize>(),
row_width_bytes
);
} }
} }
// Set EOF bit on the last descriptor of the window, to signal
// that the bounce buffer is done being read from.
if let Some(last_desc) = descriptors_window.last_mut() {
last_desc.reset_for_tx(true);
}
assert_eq!(
descriptors_window
.iter()
.map(|desc| desc.size())
.sum::<usize>(),
window_size_rows * (row_front_porch_bytes + row_width_bytes)
);
}
assert_eq!(
descriptors_frame
.iter()
.map(|desc| desc.size())
.sum::<usize>(),
windows_len * window_size_rows * (row_front_porch_bytes + row_width_bytes)
);
descriptors_frame descriptors_frame
} }
@ -482,7 +624,7 @@ impl DmaBounce {
.with_timeout(Duration::from_millis(100)) .with_timeout(Duration::from_millis(100))
.await .await
.unwrap_or_else(|_| { .unwrap_or_else(|_| {
// error!("Timed out when waiting for skipped windows."); error!("Timed out when waiting for skipped windows.");
0 // TODO: This should be -1 to repeat the same window. 0 // TODO: This should be -1 to repeat the same window.
}); });
@ -506,7 +648,8 @@ impl DmaBounce {
windows_skipped_total += windows_skipped; windows_skipped_total += windows_skipped;
// error!( // error!(
// "Skipped {windows_skipped} windows. Windows skipped per frame: {:.2}%", // "Skipped {windows_skipped} windows. Windows skipped per frame: {:.2}%",
// 100.0 * windows_skipped_total as f32 / (self.frame_index_next + 1) as f32 // 100.0 * windows_skipped_total as f32
// / (self.windows_len * (self.frame_index_next + 1)) as f32
// ); // );
} }
@ -569,7 +712,7 @@ impl DmaBounce {
direction: dma::TransferDirection::Out, direction: dma::TransferDirection::Out,
accesses_psram: false, accesses_psram: false,
burst_transfer: self.burst_config, burst_transfer: self.burst_config,
check_owner: Some(true), // Possibly want to set this to false check_owner: Some(false), // Possibly want to set this to false
auto_write_back: false, // Possibly true auto_write_back: false, // Possibly true
}, },
} }
@ -733,13 +876,14 @@ impl Framebuffer {
channel: DMA_CH0<'static>, channel: DMA_CH0<'static>,
peripheral_src: AnySpi<'static>, peripheral_src: AnySpi<'static>,
peripheral_dst: Dpi<'static, Blocking>, peripheral_dst: Dpi<'static, Blocking>,
width: u32, front_porch_pixels: u32,
height: u32, width_pixels: u32,
height_pixels: u32,
rows_per_window: usize, rows_per_window: usize,
cyclic: bool, cyclic: bool,
) -> Self { ) -> Self {
let buffer_size = width as usize * height as usize * core::mem::size_of::<u16>(); const BYTES_PER_PIXEL: usize = core::mem::size_of::<u16>();
let window_size = rows_per_window * width as usize * core::mem::size_of::<u16>(); let buffer_size = width_pixels as usize * height_pixels as usize * BYTES_PER_PIXEL;
let burst_config = BurstConfig { let burst_config = BurstConfig {
internal_memory: InternalBurstConfig::Enabled, internal_memory: InternalBurstConfig::Enabled,
external_memory: ExternalBurstConfig::Size64, external_memory: ExternalBurstConfig::Size64,
@ -755,14 +899,16 @@ impl Framebuffer {
peripheral_src, peripheral_src,
peripheral_dst, peripheral_dst,
psram_buffer, psram_buffer,
window_size, front_porch_pixels as usize * BYTES_PER_PIXEL,
width_pixels as usize * BYTES_PER_PIXEL,
rows_per_window,
burst_config, burst_config,
cyclic, cyclic,
); );
Self { Self {
width, width: width_pixels,
height, height: height_pixels,
bounce_buffers, bounce_buffers,
} }
} }