mirror of https://github.com/radex/radmatrix.git
optimize leds stage
parent
c60736602d
commit
5a34b7d99a
|
@ -40,6 +40,38 @@ int32_t gfx_decoder_loadNextFrame() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Convert framebuffer into raw shift register data for fast PIO pixel pushing
|
||||
// Data will be held in buffers, one per pixel's depth bit (aka brightness stage),
|
||||
// with each row split into 32-bit chunks, one per module
|
||||
// (20 pixels, 24 shift register stages, 8 unused bits)
|
||||
// TODO: Move this to leds.cpp
|
||||
// TODO: Use a separate buffer, then copy to ledsBuffer to avoid tearing
|
||||
for (int bi = 0; bi < 8; bi++) {
|
||||
uint8_t bitPosition = 1 << bi;
|
||||
for (int y = 0; y < ROW_COUNT; y++) {
|
||||
auto yOffset = y * COL_COUNT;
|
||||
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
|
||||
auto bufferXOffset = yOffset + xModule * 20;
|
||||
uint32_t sample = 0;
|
||||
|
||||
for (int x = 0; x < 20; x++) {
|
||||
// insert placeholders for unused stages
|
||||
// (before pixels 0, 6, 13)
|
||||
if (x == 0 || x == 6 || x == 13) {
|
||||
sample >>= 1;
|
||||
}
|
||||
uint8_t px = buffer[bufferXOffset + x];
|
||||
bool bit = px & bitPosition;
|
||||
sample = (sample >> 1) | (bit ? 0x80000000 : 0);
|
||||
}
|
||||
// insert placeholder for unused last stage (after pixel 19)
|
||||
sample >>=1;
|
||||
|
||||
ledBuffer[bi][y * COL_MODULES + xModule] = sample;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copy to framebuffer
|
||||
// TODO: mutex? double buffer? or something...
|
||||
memcpy(framebuffer, buffer, ROW_COUNT * COL_COUNT);
|
||||
|
|
|
@ -38,6 +38,7 @@ uint8_t brightnessPhaseDelays[COLOR_BITS] = {0, 1, 6, 20, 60};
|
|||
|
||||
// NOTE: Alignment required to allow 4-byte reads
|
||||
uint8_t framebuffer[ROW_COUNT * COL_COUNT] __attribute__((aligned(32))) = {0};
|
||||
uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES] = {0};
|
||||
|
||||
void leds_init() {
|
||||
memset(framebuffer, 0, sizeof(framebuffer));
|
||||
|
@ -104,7 +105,7 @@ void leds_render() {
|
|||
|
||||
for (int yCount = 0; yCount < ROW_COUNT; yCount++) {
|
||||
int y = ROW_COUNT - 1 - yCount;
|
||||
// brigthness - pushing data takes 40us, so to maximize brightness (at high brightness phases)
|
||||
// brigthness - pushing data takes time, so to maximize brightness (at high brightness phases)
|
||||
// we want to keep the matrix on during update (except during latch). At low brightness phases,
|
||||
// we want it off to actually be dim
|
||||
bool brightPhase = brightnessPhase >= 2;
|
||||
|
@ -132,46 +133,23 @@ void leds_render() {
|
|||
// silly shit
|
||||
// TODO: Some ideas for future optimization:
|
||||
// - see if we can disable px pusher delays on improved electric interface
|
||||
// - use a profiler to see how the inner loop can be improved
|
||||
// - do the shift register bullshit once per frame, so that data can be loaded into
|
||||
// registers with aligned access, DMA, etc.
|
||||
// - improve outer loop which adds 2us of processing on each loop
|
||||
// - change busy wait into some kind of interrupt-based thing so that processing can continue
|
||||
// - latch row and clock simultaneously, avoid disabling output
|
||||
uint8_t *buffer = framebuffer + (y * COL_COUNT);
|
||||
// - DMA?
|
||||
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
|
||||
uint32_t pxValues;
|
||||
|
||||
// placeholder at 0; pixels 0, 1, 2
|
||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer));
|
||||
pxValues = pxValues << 8;
|
||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
||||
|
||||
// pixels 3, 4, 5, placeholder at 6
|
||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 3));
|
||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
||||
|
||||
// pixels 6, 7, 8, 9
|
||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 6));
|
||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
||||
|
||||
// pixels 10, 11, 12, placeholder at 13
|
||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 10));
|
||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
||||
|
||||
// pixels 13, 14, 15, 16
|
||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 13));
|
||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
||||
|
||||
// pixels 17, 18, 19, placeholder
|
||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 17));
|
||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
||||
|
||||
buffer += 20;
|
||||
uint32_t pxValues = ledBuffer[brightnessPhase + 3][y * COL_MODULES + xModule];
|
||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues);
|
||||
}
|
||||
|
||||
// wait for all data to be shifted out
|
||||
pio_sm_drain_tx_fifo(pusher_pio, pusher_sm);
|
||||
while (!pio_sm_is_tx_fifo_empty(pusher_pio, pusher_sm)) {
|
||||
tight_loop_contents();
|
||||
}
|
||||
// TODO: Is there an API to wait for PIO to actually become idle?
|
||||
// pio_sm_drain_tx_fifo doesn't seem to do the trick
|
||||
// if not, we might need to use irqs or something
|
||||
busy_wait_us(4);
|
||||
|
||||
// disable columns before latch
|
||||
outputEnable(ROW_OE, false);
|
||||
|
@ -201,7 +179,7 @@ void leds_initPusher() {
|
|||
uint latchPin = COL_SRCLK;
|
||||
|
||||
pio_sm_config config = leds_px_pusher_program_get_default_config(offset);
|
||||
sm_config_set_clkdiv_int_frac(&config, 2, 0);
|
||||
sm_config_set_clkdiv_int_frac(&config, 1, 0);
|
||||
|
||||
// Shift OSR to the right, autopull
|
||||
sm_config_set_out_shift(&config, true, true, 32);
|
||||
|
|
|
@ -31,5 +31,6 @@ void leds_loop();
|
|||
void leds_render();
|
||||
|
||||
extern uint8_t framebuffer[ROW_COUNT * COL_COUNT];
|
||||
extern uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES];
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,9 +1,21 @@
|
|||
.program leds_px_pusher
|
||||
.side_set 1 opt
|
||||
.wrap_target
|
||||
public entry_point:
|
||||
out null, 3 side 0 [0] ; ignore least significant digits
|
||||
out pins, 1 ; set bit (shifted for brightness phase by C code)
|
||||
out null, 4 side 1 [1] ; ignore remaining bits, latch data, allow time for latching
|
||||
nop side 0 ; return to 0 (weird glitches happen otherwise)
|
||||
.wrap_target
|
||||
; get 32 bits from fifo (not required with autopull, useful for debug)
|
||||
; pull
|
||||
; push 24 bits to the shift registers
|
||||
; also, return latch bit to 0
|
||||
set x, 23 side 0
|
||||
; ignore the 8 least significant bits
|
||||
out null, 8
|
||||
loop:
|
||||
; lower clock edge
|
||||
nop side 0
|
||||
; set bit
|
||||
out pins, 1
|
||||
; loop; latch bit (rising edge)
|
||||
; TODO: check if this delay can be lowered with a PCB
|
||||
jmp x-- loop side 1 [2]
|
||||
end:
|
||||
.wrap
|
||||
|
|
|
@ -13,23 +13,24 @@
|
|||
// -------------- //
|
||||
|
||||
#define leds_px_pusher_wrap_target 0
|
||||
#define leds_px_pusher_wrap 3
|
||||
#define leds_px_pusher_wrap 4
|
||||
|
||||
#define leds_px_pusher_offset_entry_point 0u
|
||||
|
||||
static const uint16_t leds_px_pusher_program_instructions[] = {
|
||||
// .wrap_target
|
||||
0x7063, // 0: out null, 3 side 0
|
||||
0x6001, // 1: out pins, 1
|
||||
0x7964, // 2: out null, 4 side 1 [1]
|
||||
0xb042, // 3: nop side 0
|
||||
0xf037, // 0: set x, 23 side 0
|
||||
0x6068, // 1: out null, 8
|
||||
0xb042, // 2: nop side 0
|
||||
0x6001, // 3: out pins, 1
|
||||
0x1a42, // 4: jmp x--, 2 side 1 [2]
|
||||
// .wrap
|
||||
};
|
||||
|
||||
#if !PICO_NO_HARDWARE
|
||||
static const struct pio_program leds_px_pusher_program = {
|
||||
.instructions = leds_px_pusher_program_instructions,
|
||||
.length = 4,
|
||||
.length = 5,
|
||||
.origin = -1,
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue