mirror of https://github.com/radex/radmatrix.git
optimize leds stage
parent
c60736602d
commit
5a34b7d99a
|
@ -40,6 +40,38 @@ int32_t gfx_decoder_loadNextFrame() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert framebuffer into raw shift register data for fast PIO pixel pushing
|
||||||
|
// Data will be held in buffers, one per pixel's depth bit (aka brightness stage),
|
||||||
|
// with each row split into 32-bit chunks, one per module
|
||||||
|
// (20 pixels, 24 shift register stages, 8 unused bits)
|
||||||
|
// TODO: Move this to leds.cpp
|
||||||
|
// TODO: Use a separate buffer, then copy to ledsBuffer to avoid tearing
|
||||||
|
for (int bi = 0; bi < 8; bi++) {
|
||||||
|
uint8_t bitPosition = 1 << bi;
|
||||||
|
for (int y = 0; y < ROW_COUNT; y++) {
|
||||||
|
auto yOffset = y * COL_COUNT;
|
||||||
|
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
|
||||||
|
auto bufferXOffset = yOffset + xModule * 20;
|
||||||
|
uint32_t sample = 0;
|
||||||
|
|
||||||
|
for (int x = 0; x < 20; x++) {
|
||||||
|
// insert placeholders for unused stages
|
||||||
|
// (before pixels 0, 6, 13)
|
||||||
|
if (x == 0 || x == 6 || x == 13) {
|
||||||
|
sample >>= 1;
|
||||||
|
}
|
||||||
|
uint8_t px = buffer[bufferXOffset + x];
|
||||||
|
bool bit = px & bitPosition;
|
||||||
|
sample = (sample >> 1) | (bit ? 0x80000000 : 0);
|
||||||
|
}
|
||||||
|
// insert placeholder for unused last stage (after pixel 19)
|
||||||
|
sample >>=1;
|
||||||
|
|
||||||
|
ledBuffer[bi][y * COL_MODULES + xModule] = sample;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// copy to framebuffer
|
// copy to framebuffer
|
||||||
// TODO: mutex? double buffer? or something...
|
// TODO: mutex? double buffer? or something...
|
||||||
memcpy(framebuffer, buffer, ROW_COUNT * COL_COUNT);
|
memcpy(framebuffer, buffer, ROW_COUNT * COL_COUNT);
|
||||||
|
|
|
@ -38,6 +38,7 @@ uint8_t brightnessPhaseDelays[COLOR_BITS] = {0, 1, 6, 20, 60};
|
||||||
|
|
||||||
// NOTE: Alignment required to allow 4-byte reads
|
// NOTE: Alignment required to allow 4-byte reads
|
||||||
uint8_t framebuffer[ROW_COUNT * COL_COUNT] __attribute__((aligned(32))) = {0};
|
uint8_t framebuffer[ROW_COUNT * COL_COUNT] __attribute__((aligned(32))) = {0};
|
||||||
|
uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES] = {0};
|
||||||
|
|
||||||
void leds_init() {
|
void leds_init() {
|
||||||
memset(framebuffer, 0, sizeof(framebuffer));
|
memset(framebuffer, 0, sizeof(framebuffer));
|
||||||
|
@ -104,7 +105,7 @@ void leds_render() {
|
||||||
|
|
||||||
for (int yCount = 0; yCount < ROW_COUNT; yCount++) {
|
for (int yCount = 0; yCount < ROW_COUNT; yCount++) {
|
||||||
int y = ROW_COUNT - 1 - yCount;
|
int y = ROW_COUNT - 1 - yCount;
|
||||||
// brigthness - pushing data takes 40us, so to maximize brightness (at high brightness phases)
|
// brigthness - pushing data takes time, so to maximize brightness (at high brightness phases)
|
||||||
// we want to keep the matrix on during update (except during latch). At low brightness phases,
|
// we want to keep the matrix on during update (except during latch). At low brightness phases,
|
||||||
// we want it off to actually be dim
|
// we want it off to actually be dim
|
||||||
bool brightPhase = brightnessPhase >= 2;
|
bool brightPhase = brightnessPhase >= 2;
|
||||||
|
@ -132,46 +133,23 @@ void leds_render() {
|
||||||
// silly shit
|
// silly shit
|
||||||
// TODO: Some ideas for future optimization:
|
// TODO: Some ideas for future optimization:
|
||||||
// - see if we can disable px pusher delays on improved electric interface
|
// - see if we can disable px pusher delays on improved electric interface
|
||||||
// - use a profiler to see how the inner loop can be improved
|
|
||||||
// - do the shift register bullshit once per frame, so that data can be loaded into
|
|
||||||
// registers with aligned access, DMA, etc.
|
|
||||||
// - improve outer loop which adds 2us of processing on each loop
|
// - improve outer loop which adds 2us of processing on each loop
|
||||||
// - change busy wait into some kind of interrupt-based thing so that processing can continue
|
// - change busy wait into some kind of interrupt-based thing so that processing can continue
|
||||||
// - latch row and clock simultaneously, avoid disabling output
|
// - latch row and clock simultaneously, avoid disabling output
|
||||||
uint8_t *buffer = framebuffer + (y * COL_COUNT);
|
// - DMA?
|
||||||
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
|
for (int xModule = 0; xModule < COL_MODULES; xModule++) {
|
||||||
uint32_t pxValues;
|
uint32_t pxValues = ledBuffer[brightnessPhase + 3][y * COL_MODULES + xModule];
|
||||||
|
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues);
|
||||||
// placeholder at 0; pixels 0, 1, 2
|
|
||||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer));
|
|
||||||
pxValues = pxValues << 8;
|
|
||||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
|
||||||
|
|
||||||
// pixels 3, 4, 5, placeholder at 6
|
|
||||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 3));
|
|
||||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
|
||||||
|
|
||||||
// pixels 6, 7, 8, 9
|
|
||||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 6));
|
|
||||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
|
||||||
|
|
||||||
// pixels 10, 11, 12, placeholder at 13
|
|
||||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 10));
|
|
||||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
|
||||||
|
|
||||||
// pixels 13, 14, 15, 16
|
|
||||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 13));
|
|
||||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
|
||||||
|
|
||||||
// pixels 17, 18, 19, placeholder
|
|
||||||
pxValues = *(reinterpret_cast<uint32_t *>(buffer + 17));
|
|
||||||
pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues >> brightnessPhase);
|
|
||||||
|
|
||||||
buffer += 20;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for all data to be shifted out
|
// wait for all data to be shifted out
|
||||||
pio_sm_drain_tx_fifo(pusher_pio, pusher_sm);
|
while (!pio_sm_is_tx_fifo_empty(pusher_pio, pusher_sm)) {
|
||||||
|
tight_loop_contents();
|
||||||
|
}
|
||||||
|
// TODO: Is there an API to wait for PIO to actually become idle?
|
||||||
|
// pio_sm_drain_tx_fifo doesn't seem to do the trick
|
||||||
|
// if not, we might need to use irqs or something
|
||||||
|
busy_wait_us(4);
|
||||||
|
|
||||||
// disable columns before latch
|
// disable columns before latch
|
||||||
outputEnable(ROW_OE, false);
|
outputEnable(ROW_OE, false);
|
||||||
|
@ -201,7 +179,7 @@ void leds_initPusher() {
|
||||||
uint latchPin = COL_SRCLK;
|
uint latchPin = COL_SRCLK;
|
||||||
|
|
||||||
pio_sm_config config = leds_px_pusher_program_get_default_config(offset);
|
pio_sm_config config = leds_px_pusher_program_get_default_config(offset);
|
||||||
sm_config_set_clkdiv_int_frac(&config, 2, 0);
|
sm_config_set_clkdiv_int_frac(&config, 1, 0);
|
||||||
|
|
||||||
// Shift OSR to the right, autopull
|
// Shift OSR to the right, autopull
|
||||||
sm_config_set_out_shift(&config, true, true, 32);
|
sm_config_set_out_shift(&config, true, true, 32);
|
||||||
|
|
|
@ -31,5 +31,6 @@ void leds_loop();
|
||||||
void leds_render();
|
void leds_render();
|
||||||
|
|
||||||
extern uint8_t framebuffer[ROW_COUNT * COL_COUNT];
|
extern uint8_t framebuffer[ROW_COUNT * COL_COUNT];
|
||||||
|
extern uint32_t ledBuffer[8][ROW_COUNT * COL_MODULES];
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,9 +1,21 @@
|
||||||
.program leds_px_pusher
|
.program leds_px_pusher
|
||||||
.side_set 1 opt
|
.side_set 1 opt
|
||||||
.wrap_target
|
|
||||||
public entry_point:
|
public entry_point:
|
||||||
out null, 3 side 0 [0] ; ignore least significant digits
|
.wrap_target
|
||||||
out pins, 1 ; set bit (shifted for brightness phase by C code)
|
; get 32 bits from fifo (not required with autopull, useful for debug)
|
||||||
out null, 4 side 1 [1] ; ignore remaining bits, latch data, allow time for latching
|
; pull
|
||||||
nop side 0 ; return to 0 (weird glitches happen otherwise)
|
; push 24 bits to the shift registers
|
||||||
|
; also, return latch bit to 0
|
||||||
|
set x, 23 side 0
|
||||||
|
; ignore the 8 least significant bits
|
||||||
|
out null, 8
|
||||||
|
loop:
|
||||||
|
; lower clock edge
|
||||||
|
nop side 0
|
||||||
|
; set bit
|
||||||
|
out pins, 1
|
||||||
|
; loop; latch bit (rising edge)
|
||||||
|
; TODO: check if this delay can be lowered with a PCB
|
||||||
|
jmp x-- loop side 1 [2]
|
||||||
|
end:
|
||||||
.wrap
|
.wrap
|
||||||
|
|
|
@ -13,23 +13,24 @@
|
||||||
// -------------- //
|
// -------------- //
|
||||||
|
|
||||||
#define leds_px_pusher_wrap_target 0
|
#define leds_px_pusher_wrap_target 0
|
||||||
#define leds_px_pusher_wrap 3
|
#define leds_px_pusher_wrap 4
|
||||||
|
|
||||||
#define leds_px_pusher_offset_entry_point 0u
|
#define leds_px_pusher_offset_entry_point 0u
|
||||||
|
|
||||||
static const uint16_t leds_px_pusher_program_instructions[] = {
|
static const uint16_t leds_px_pusher_program_instructions[] = {
|
||||||
// .wrap_target
|
// .wrap_target
|
||||||
0x7063, // 0: out null, 3 side 0
|
0xf037, // 0: set x, 23 side 0
|
||||||
0x6001, // 1: out pins, 1
|
0x6068, // 1: out null, 8
|
||||||
0x7964, // 2: out null, 4 side 1 [1]
|
0xb042, // 2: nop side 0
|
||||||
0xb042, // 3: nop side 0
|
0x6001, // 3: out pins, 1
|
||||||
|
0x1a42, // 4: jmp x--, 2 side 1 [2]
|
||||||
// .wrap
|
// .wrap
|
||||||
};
|
};
|
||||||
|
|
||||||
#if !PICO_NO_HARDWARE
|
#if !PICO_NO_HARDWARE
|
||||||
static const struct pio_program leds_px_pusher_program = {
|
static const struct pio_program leds_px_pusher_program = {
|
||||||
.instructions = leds_px_pusher_program_instructions,
|
.instructions = leds_px_pusher_program_instructions,
|
||||||
.length = 4,
|
.length = 5,
|
||||||
.origin = -1,
|
.origin = -1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue