1
0
Fork 0

optimize leds stage

sd2
radex 2024-05-26 16:51:08 +02:00
parent c60736602d
commit 5a34b7d99a
Signed by: radex
SSH Key Fingerprint: SHA256:hvqRXAGG1h89yqnS+cyFTLKQbzjWD4uXIqw7Y+0ws30
5 changed files with 70 additions and 46 deletions

View File

@ -40,6 +40,38 @@ int32_t gfx_decoder_loadNextFrame() {
return false; return false;
} }
// Convert framebuffer into raw shift register data for fast PIO pixel pushing
// Data will be held in buffers, one per pixel's depth bit (aka brightness stage),
// with each row split into 32-bit chunks, one per module
// (20 pixels, 24 shift register stages, 8 unused bits)
// TODO: Move this to leds.cpp
// TODO: Use a separate buffer, then copy to ledsBuffer to avoid tearing
for (int bi = 0; bi < 8; bi++) {
uint8_t bitPosition = 1 << bi;
for (int y = 0; y < ROW_COUNT; y++) {
auto yOffset = y * COL_COUNT;
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
auto bufferXOffset = yOffset + xModule * 20;
uint32_t sample = 0;
for (int x = 0; x < 20; x++) {
// insert placeholders for unused stages
// (before pixels 0, 6, 13)
if (x == 0 || x == 6 || x == 13) {
sample >>= 1;
}
uint8_t px = buffer[bufferXOffset + x];
bool bit = px & bitPosition;
sample = (sample >> 1) | (bit ? 0x80000000 : 0);
}
// insert placeholder for unused last stage (after pixel 19)
sample >>=1;
ledBuffer[bi][y * COL_MODULES + xModule] = sample;
}
}
}
// copy to framebuffer // copy to framebuffer
// TODO: mutex? double buffer? or something... // TODO: mutex? double buffer? or something...
memcpy(framebuffer, buffer, ROW_COUNT * COL_COUNT); memcpy(framebuffer, buffer, ROW_COUNT * COL_COUNT);

View File

@ -38,6 +38,7 @@ uint8_t brightnessPhaseDelays[COLOR_BITS] = {0, 1, 6, 20, 60};
// NOTE: Alignment required to allow 4-byte reads // NOTE: Alignment required to allow 4-byte reads
uint8_t framebuffer[ROW_COUNT * COL_COUNT] __attribute__((aligned(32))) = {0}; uint8_t framebuffer[ROW_COUNT * COL_COUNT] __attribute__((aligned(32))) = {0};
uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES] = {0};
void leds_init() { void leds_init() {
memset(framebuffer, 0, sizeof(framebuffer)); memset(framebuffer, 0, sizeof(framebuffer));
@ -104,7 +105,7 @@ void leds_render() {
for (int yCount = 0; yCount < ROW_COUNT; yCount++) { for (int yCount = 0; yCount < ROW_COUNT; yCount++) {
int y = ROW_COUNT - 1 - yCount; int y = ROW_COUNT - 1 - yCount;
// brigthness - pushing data takes 40us, so to maximize brightness (at high brightness phases) // brigthness - pushing data takes time, so to maximize brightness (at high brightness phases)
// we want to keep the matrix on during update (except during latch). At low brightness phases, // we want to keep the matrix on during update (except during latch). At low brightness phases,
// we want it off to actually be dim // we want it off to actually be dim
bool brightPhase = brightnessPhase >= 2; bool brightPhase = brightnessPhase >= 2;
@ -132,46 +133,23 @@ void leds_render() {
// silly shit // silly shit
// TODO: Some ideas for future optimization: // TODO: Some ideas for future optimization:
// - see if we can disable px pusher delays on improved electric interface // - see if we can disable px pusher delays on improved electric interface
// - use a profiler to see how the inner loop can be improved
// - do the shift register bullshit once per frame, so that data can be loaded into
// registers with aligned access, DMA, etc.
// - improve outer loop which adds 2us of processing on each loop // - improve outer loop which adds 2us of processing on each loop
// - change busy wait into some kind of interrupt-based thing so that processing can continue // - change busy wait into some kind of interrupt-based thing so that processing can continue
// - latch row and clock simultaneously, avoid disabling output // - latch row and clock simultaneously, avoid disabling output
uint8_t *buffer = framebuffer + (y * COL_COUNT); // - DMA?
for (int xModule = 0; xModule < COL_MODULES; xModule++) { for (int xModule = 0; xModule < COL_MODULES; xModule++) {
uint32_t pxValues; uint32_t pxValues = ledBuffer[brightnessPhase + 3][y * COL_MODULES + xModule];
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues);
// placeholder at 0; pixels 0, 1, 2
pxValues = *(reinterpret_cast<uint32_t *>(buffer));
pxValues = pxValues << 8;
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 3, 4, 5, placeholder at 6
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 3));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 6, 7, 8, 9
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 6));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 10, 11, 12, placeholder at 13
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 10));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 13, 14, 15, 16
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 13));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
// pixels 17, 18, 19, placeholder
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 17));
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
buffer += 20;
} }
// wait for all data to be shifted out // wait for all data to be shifted out
pio_sm_drain_tx_fifo(pusher_pio, pusher_sm); while (!pio_sm_is_tx_fifo_empty(pusher_pio, pusher_sm)) {
tight_loop_contents();
}
// TODO: Is there an API to wait for PIO to actually become idle?
// pio_sm_drain_tx_fifo doesn't seem to do the trick
// if not, we might need to use irqs or something
busy_wait_us(4);
// disable columns before latch // disable columns before latch
outputEnable(ROW_OE, false); outputEnable(ROW_OE, false);
@ -201,7 +179,7 @@ void leds_initPusher() {
uint latchPin = COL_SRCLK; uint latchPin = COL_SRCLK;
pio_sm_config config = leds_px_pusher_program_get_default_config(offset); pio_sm_config config = leds_px_pusher_program_get_default_config(offset);
sm_config_set_clkdiv_int_frac(&config, 2, 0); sm_config_set_clkdiv_int_frac(&config, 1, 0);
// Shift OSR to the right, autopull // Shift OSR to the right, autopull
sm_config_set_out_shift(&config, true, true, 32); sm_config_set_out_shift(&config, true, true, 32);

View File

@ -31,5 +31,6 @@ void leds_loop();
void leds_render(); void leds_render();
extern uint8_t framebuffer[ROW_COUNT * COL_COUNT]; extern uint8_t framebuffer[ROW_COUNT * COL_COUNT];
extern uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES];
#endif #endif

View File

@ -1,9 +1,21 @@
.program leds_px_pusher .program leds_px_pusher
.side_set 1 opt .side_set 1 opt
.wrap_target
public entry_point: public entry_point:
out null, 3 side 0 [0] ; ignore least significant digits .wrap_target
out pins, 1 ; set bit (shifted for brightness phase by C code) ; get 32 bits from fifo (not required with autopull, useful for debug)
out null, 4 side 1 [1] ; ignore remaining bits, latch data, allow time for latching ; pull
nop side 0 ; return to 0 (weird glitches happen otherwise) ; push 24 bits to the shift registers
; also, return latch bit to 0
set x, 23 side 0
; ignore the 8 least significant bits
out null, 8
loop:
; lower clock edge
nop side 0
; set bit
out pins, 1
; loop; latch bit (rising edge)
; TODO: check if this delay can be lowered with a PCB
jmp x-- loop side 1 [2]
end:
.wrap .wrap

View File

@ -13,23 +13,24 @@
// -------------- // // -------------- //
#define leds_px_pusher_wrap_target 0 #define leds_px_pusher_wrap_target 0
#define leds_px_pusher_wrap 3 #define leds_px_pusher_wrap 4
#define leds_px_pusher_offset_entry_point 0u #define leds_px_pusher_offset_entry_point 0u
static const uint16_t leds_px_pusher_program_instructions[] = { static const uint16_t leds_px_pusher_program_instructions[] = {
// .wrap_target // .wrap_target
0x7063, // 0: out null, 3 side 0 0xf037, // 0: set x, 23 side 0
0x6001, // 1: out pins, 1 0x6068, // 1: out null, 8
0x7964, // 2: out null, 4 side 1 [1] 0xb042, // 2: nop side 0
0xb042, // 3: nop side 0 0x6001, // 3: out pins, 1
0x1a42, // 4: jmp x--, 2 side 1 [2]
// .wrap // .wrap
}; };
#if !PICO_NO_HARDWARE #if !PICO_NO_HARDWARE
static const struct pio_program leds_px_pusher_program = { static const struct pio_program leds_px_pusher_program = {
.instructions = leds_px_pusher_program_instructions, .instructions = leds_px_pusher_program_instructions,
.length = 4, .length = 5,
.origin = -1, .origin = -1,
}; };