some microoptimizations

2024-05-26 17:30:40 +02:00 · 2024-05-26 17:30:40 +02:00 · 589f8a96ae
parent 5e368cf5d3
commit 589f8a96ae
3 changed files with 66 additions and 60 deletions
--- a/firmware/src/gfx_decoder.cpp
+++ b/firmware/src/gfx_decoder.cpp
@ -44,6 +44,7 @@ int32_t gfx_decoder_loadNextFrame() {
  // Data will be held in buffers, one per pixel's depth bit (aka brightness stage),
  // with each row split into 32-bit chunks, one per module
  // (20 pixels, 24 shift register stages, 8 unused bits)
+  // Rows are inverted, because that's how they're fed to the shift registers
  // TODO: Move this to leds.cpp
  // TODO: Use a separate buffer, then copy to ledsBuffer to avoid tearing
  for (int bi = 0; bi < 8; bi++) {
@ -67,7 +68,7 @@ int32_t gfx_decoder_loadNextFrame() {
        // insert placeholder for unused last stage (after pixel 19)
        sample >>=1;

-        ledBuffer[bi][y * COL_MODULES + xModule] = sample;
+        ledBuffer[bi][(ROW_COUNT - 1 - y) * COL_MODULES + xModule] = sample;
      }
    }
  }
--- a/firmware/src/leds.cpp
+++ b/firmware/src/leds.cpp
@ -16,8 +16,6 @@ inline void pulsePin(uint8_t pin) {
   // there are glitches without this (maybe just due to breadboard...)
  _NOP();
  _NOP();
-  _NOP();
-    // busy_wait_us_32(50);
  gpio_put(pin, LOW);
 }

@ -94,6 +92,10 @@ void leds_initRenderer() {
 }

 void leds_render() {
+  // brightness phase
+  bool brightPhase = brightnessPhase >= 3;
+  auto buffer = ledBuffer[brightnessPhase + 3];
+
  // hide output
  outputEnable(ROW_OE, false);

@ -103,65 +105,67 @@ void leds_render() {
  // start selecting rows
  gpio_put(ROW_SER, HIGH);

-  for (int yCount = 0; yCount < ROW_COUNT; yCount++) {
-    int y = ROW_COUNT - 1 - yCount;
-    // brigthness - pushing data takes time, so to maximize brightness (at high brightness phases)
-    // we want to keep the matrix on during update (except during latch). At low brightness phases,
-    // we want it off to actually be dim
-    bool brightPhase = brightnessPhase >= 2;
-    outputEnable(ROW_OE, brightPhase);
+  int bufferOffset = 0;
+  for (int yModule = 0; yModule < ROW_MODULES; yModule++) {
+    for (int moduleY = 0; moduleY < 20; moduleY++) {
+      // brigthness - pushing data takes time, so to maximize brightness (at high brightness phases)
+      // we want to keep the matrix on during update (except during latch). At low brightness phases,
+      // we want it off to actually be dim
+      outputEnable(ROW_OE, brightPhase);

-    // next row
-    pulsePin(ROW_SRCLK);
-    // only one row
-    gpio_put(ROW_SER, LOW);
-
-    // we use 7/8 stages on shift registers + 1 is unused
-    int moduleY = yCount % 20;
-    if (moduleY == 0) {
+      // next row
      pulsePin(ROW_SRCLK);
+      // only one row
+      gpio_put(ROW_SER, LOW);
+
+      // we use 7/8 stages on shift registers + 1 is unused
+      if (moduleY == 0) {
+        pulsePin(ROW_SRCLK);
+      }
+
+      if (moduleY == 7 || moduleY == 14 || (moduleY == 0 && yModule != 0)) {
+        pulsePin(ROW_SRCLK);
+      }
+
+      // set row data
+      // NOTE: values are loaded right-left
+      // Optimized implementation: use PIO, avoid division, modulo, etc...
+      // we use 7/8 stages of each shift register + 1 is unused so we need to do
+      // silly shit
+      // TODO: Some ideas for future optimization:
+      // - see if we can disable px pusher delays on improved electric interface
+      // - improve outer loop which adds 2us of processing on each loop
+      // - change busy wait into some kind of interrupt-based thing so that processing can continue
+      // - latch row and clock simultaneously, avoid disabling output
+      // - DMA?
+      for (int xModule = 0; xModule < COL_MODULES; xModule++) {
+        uint32_t pxValues = buffer[bufferOffset + xModule];
+        pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues);
+      }
+
+      // wait for all data to be shifted out
+      while (!pio_sm_is_tx_fifo_empty(pusher_pio, pusher_sm)) {
+        tight_loop_contents();
+      }
+      // TODO: Is there an API to wait for PIO to actually become idle?
+      // pio_sm_drain_tx_fifo doesn't seem to do the trick
+      // if not, we might need to use irqs or something
+      busy_wait_us(4);
+
+      // latch rows and columns
+      gpio_set_mask(1 << ROW_RCLK | 1 << COL_RCLK);
+      _NOP();
+      _NOP();
+      gpio_clr_mask(1 << ROW_RCLK | 1 << COL_RCLK);
+
+      // show for a certain period
+      outputEnable(ROW_OE, true);
+      busy_wait_us_32(brightnessPhaseDelays[brightnessPhase]);
+      outputEnable(ROW_OE, false);
+
+      // next row
+      bufferOffset += COL_MODULES;
    }
-
-    if (moduleY == 7 || moduleY == 14 || (moduleY == 0 && yCount != 0)) {
-      pulsePin(ROW_SRCLK);
-    }
-
-    // set row data
-    // NOTE: values are loaded right-left
-    // Optimized implementation: use PIO, avoid division, modulo, etc...
-    // we use 7/8 stages of each shift register + 1 is unused so we need to do
-    // silly shit
-    // TODO: Some ideas for future optimization:
-    // - see if we can disable px pusher delays on improved electric interface
-    // - improve outer loop which adds 2us of processing on each loop
-    // - change busy wait into some kind of interrupt-based thing so that processing can continue
-    // - latch row and clock simultaneously, avoid disabling output
-    // - DMA?
-    for (int xModule = 0; xModule < COL_MODULES; xModule++) {
-      uint32_t pxValues = ledBuffer[brightnessPhase + 3][y * COL_MODULES + xModule];
-      pio_sm_put_blocking(pusher_pio, pusher_sm, pxValues);
-    }
-
-    // wait for all data to be shifted out
-    while (!pio_sm_is_tx_fifo_empty(pusher_pio, pusher_sm)) {
-      tight_loop_contents();
-    }
-    // TODO: Is there an API to wait for PIO to actually become idle?
-    // pio_sm_drain_tx_fifo doesn't seem to do the trick
-    // if not, we might need to use irqs or something
-    busy_wait_us(4);
-
-    // disable columns before latch
-    outputEnable(ROW_OE, false);
-
-    // latch rows and columns
-    pulsePin(ROW_RCLK);
-    pulsePin(COL_RCLK);
-
-    // show for a certain period
-    outputEnable(ROW_OE, true);
-    busy_wait_us_32(brightnessPhaseDelays[brightnessPhase]);
-    outputEnable(ROW_OE, false);
  }

  // next brightness phase
--- a/firmware/src/leds.h
+++ b/firmware/src/leds.h
@ -16,7 +16,8 @@
 #define ROW_SRCLK 11
 #define ROW_SRCLR 10

-#define ROW_COUNT 40
+#define ROW_MODULES 2
+#define ROW_COUNT ROW_MODULES * 20
 #define COL_MODULES 2
 #define COL_COUNT COL_MODULES * 20