1
0
Fork 0

optimize leds stage

sd2
radex 2024-05-26 16:51:08 +02:00
parent c60736602d
commit 5a34b7d99a
Signed by: radex
SSH Key Fingerprint: SHA256:hvqRXAGG1h89yqnS+cyFTLKQbzjWD4uXIqw7Y+0ws30
5 changed files with 70 additions and 46 deletions

View File

@ -40,6 +40,38 @@ int32_t gfx_decoder_loadNextFrame() {
return false;
}
// Convert framebuffer into raw shift register data for fast PIO pixel pushing
// Data will be held in buffers, one per pixel's depth bit (aka brightness stage),
// with each row split into 32-bit chunks, one per module
// (20 pixels, 24 shift register stages, 8 unused bits)
// TODO: Move this to leds.cpp
// TODO: Use a separate buffer, then copy to ledsBuffer to avoid tearing
for (int bi = 0; bi < 8; bi++) {
uint8_t bitPosition = 1 << bi;
for (int y = 0; y < ROW_COUNT; y++) {
auto yOffset = y * COL_COUNT;
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
auto bufferXOffset = yOffset + xModule * 20;
uint32_t sample = 0;
for (int x = 0; x < 20; x++) {
// insert placeholders for unused stages
// (before pixels 0, 6, 13)
if (x == 0 || x == 6 || x == 13) {
sample >>= 1;
}
uint8_t px = buffer[bufferXOffset + x];
bool bit = px & bitPosition;
sample = (sample >> 1) | (bit ? 0x80000000 : 0);
}
// insert placeholder for unused last stage (after pixel 19)
sample >>=1;
ledBuffer[bi][y * COL_MODULES + xModule] = sample;
}
}
}
// copy to framebuffer
// TODO: mutex? double buffer? or something...
memcpy(framebuffer, buffer, ROW_COUNT * COL_COUNT);

View File

@ -38,6 +38,7 @@ uint8_t brightnessPhaseDelays[COLOR_BITS] = {0, 1, 6, 20, 60};
// NOTE: Alignment required to allow 4-byte reads
uint8_t framebuffer[ROW_COUNT * COL_COUNT] __attribute__((aligned(32))) = {0};
uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES] = {0};
void leds_init() {
memset(framebuffer, 0, sizeof(framebuffer));
@ -104,7 +105,7 @@ void leds_render() {
for (int yCount = 0; yCount < ROW_COUNT; yCount++) {
int y = ROW_COUNT - 1 - yCount;
// brigthness - pushing data takes 40us, so to maximize brightness (at high brightness phases)
// brigthness - pushing data takes time, so to maximize brightness (at high brightness phases)
// we want to keep the matrix on during update (except during latch). At low brightness phases,
// we want it off to actually be dim
bool brightPhase = brightnessPhase >= 2;
@ -132,46 +133,23 @@ void leds_render() {
// silly shit
// TODO: Some ideas for future optimization:
// - see if we can disable px pusher delays on improved electric interface
// - use a profiler to see how the inner loop can be improved
// - do the shift register bullshit once per frame, so that data can be loaded into
// registers with aligned access, DMA, etc.
// - improve outer loop which adds 2us of processing on each loop
// - change busy wait into some kind of interrupt-based thing so that processing can continue
// - latch row and clock simultaneously, avoid disabling output
uint8_t *buffer = framebuffer + (y * COL_COUNT);
// - DMA?
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
uint32_t pxValues;
// placeholder at 0; pixels 0, 1, 2
pxValues = *(reinterpret_cast<uint32_t *>(buffer));
pxValues = pxValues << 8;
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 3, 4, 5, placeholder at 6
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 3));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 6, 7, 8, 9
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 6));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 10, 11, 12, placeholder at 13
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 10));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 13, 14, 15, 16
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 13));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 17, 18, 19, placeholder
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 17));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
buffer += 20;
uint32_t pxValues = ledBuffer[brightnessPhase + 3][y * COL_MODULES + xModule];
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues);
}
// wait for all data to be shifted out
pio_sm_drain_tx_fifo(pusher_pio, pusher_sm);
while (!pio_sm_is_tx_fifo_empty(pusher_pio, pusher_sm)) {
tight_loop_contents();
}
// TODO: Is there an API to wait for PIO to actually become idle?
// pio_sm_drain_tx_fifo doesn't seem to do the trick
// if not, we might need to use irqs or something
busy_wait_us(4);
// disable columns before latch
outputEnable(ROW_OE, false);
@ -201,7 +179,7 @@ void leds_initPusher() {
uint latchPin = COL_SRCLK;
pio_sm_config config = leds_px_pusher_program_get_default_config(offset);
sm_config_set_clkdiv_int_frac(&config, 2, 0);
sm_config_set_clkdiv_int_frac(&config, 1, 0);
// Shift OSR to the right, autopull
sm_config_set_out_shift(&config, true, true, 32);

View File

@ -31,5 +31,6 @@ void leds_loop();
void leds_render();
extern uint8_t framebuffer[ROW_COUNT * COL_COUNT];
extern uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES];
#endif

View File

@ -1,9 +1,21 @@
.program leds_px_pusher
.side_set 1 opt
.wrap_target
public entry_point:
out null, 3 side 0 [0] ; ignore least significant digits
out pins, 1 ; set bit (shifted for brightness phase by C code)
out null, 4 side 1 [1] ; ignore remaining bits, latch data, allow time for latching
nop side 0 ; return to 0 (weird glitches happen otherwise)
.wrap_target
; get 32 bits from fifo (not required with autopull, useful for debug)
; pull
; push 24 bits to the shift registers
; also, return latch bit to 0
set x, 23 side 0
; ignore the 8 least significant bits
out null, 8
loop:
; lower clock edge
nop side 0
; set bit
out pins, 1
; loop; latch bit (rising edge)
; TODO: check if this delay can be lowered with a PCB
jmp x-- loop side 1 [2]
end:
.wrap

View File

@ -13,23 +13,24 @@
// -------------- //
#define leds_px_pusher_wrap_target 0
#define leds_px_pusher_wrap 3
#define leds_px_pusher_wrap 4
#define leds_px_pusher_offset_entry_point 0u
static const uint16_t leds_px_pusher_program_instructions[] = {
// .wrap_target
0x7063, // 0: out null, 3 side 0
0x6001, // 1: out pins, 1
0x7964, // 2: out null, 4 side 1 [1]
0xb042, // 3: nop side 0
0xf037, // 0: set x, 23 side 0
0x6068, // 1: out null, 8
0xb042, // 2: nop side 0
0x6001, // 3: out pins, 1
0x1a42, // 4: jmp x--, 2 side 1 [2]
// .wrap
};
#if !PICO_NO_HARDWARE
static const struct pio_program leds_px_pusher_program = {
.instructions = leds_px_pusher_program_instructions,
.length = 4,
.length = 5,
.origin = -1,
};