HUB75 MicroPython DMA and PIO

Switch MicroPython HUB75 driver over to DMA/PIO.

TODO:

* Move this into drivers for C++ use too
* Fix hitting "Stop" in Thonny causing rows to stick on the display (this is bad, and should not happen)
* Fix "Stop -> Start" in Thonny causing weird display issues (it's not memory offets, maybe DMA/PIO issues?)
This commit is contained in:
Phil Howard 2021-11-22 14:12:18 +00:00
parent 01f546aaf1
commit 0e8c1228af
5 changed files with 314 additions and 40 deletions

View File

@ -28,9 +28,14 @@ typedef struct _Hub75_obj_t {
_Hub75_obj_t *hub75_obj;
void __isr dma_complete() {
if(hub75_obj) hub75_obj->hub75->dma_complete();
}
/***** Print *****/
void Hub75_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind) {
(void)kind; //Unused input parameter
(void)kind; // Unused input parameter
_Hub75_obj_t *self = MP_OBJ_TO_PTR2(self_in, _Hub75_obj_t);
mp_print_str(print, "Hub75(");
@ -39,6 +44,11 @@ void Hub75_print(const mp_print_t *print, mp_obj_t self_in, mp_print_kind_t kind
mp_print_str(print, " x ");
mp_obj_print_helper(print, mp_obj_new_int(self->hub75->height), PRINT_REPR);
mp_print_str(print, "addr = front: ");
mp_obj_print_helper(print, mp_obj_new_int((uint32_t)&self->hub75->front_buffer[0]), PRINT_REPR);
mp_print_str(print, " back: ");
mp_obj_print_helper(print, mp_obj_new_int((uint32_t)&self->hub75->back_buffer[0]), PRINT_REPR);
mp_print_str(print, ")");
}
@ -51,8 +61,6 @@ mp_obj_t Hub75___del__(mp_obj_t self_in) {
/***** Constructor *****/
mp_obj_t Hub75_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *all_args) {
_Hub75_obj_t *self = nullptr;
enum {
ARG_width,
ARG_height,
@ -84,13 +92,12 @@ mp_obj_t Hub75_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, c
buffer = m_new(Pixel, width * height * 2);
}
self = m_new_obj_with_finaliser(_Hub75_obj_t);
self->base.type = &Hub75_type;
self->buf = buffer;
self->hub75 = new Hub75(width, height, buffer);
hub75_obj = self;
hub75_obj = m_new_obj_with_finaliser(_Hub75_obj_t);
hub75_obj->base.type = &Hub75_type;
hub75_obj->buf = buffer;
hub75_obj->hub75 = new Hub75(width, height, buffer);
return MP_OBJ_FROM_PTR(self);
return MP_OBJ_FROM_PTR(hub75_obj);
}
mp_obj_t Hub75_clear(mp_obj_t self_in) {
@ -107,15 +114,17 @@ mp_obj_t Hub75_flip(mp_obj_t self_in) {
void Hub75_display_update() {
if(hub75_obj) {
hub75_obj->hub75->start();
hub75_obj->hub75->start(nullptr);
}
}
mp_obj_t Hub75_start(mp_obj_t self_in) {
_Hub75_obj_t *self = MP_OBJ_TO_PTR2(self_in, _Hub75_obj_t);
//size_t stack_size = 0;
//mp_thread_create(&Hub75_display_update, nullptr, &stack_size);
multicore_reset_core1();
multicore_launch_core1(Hub75_display_update);
//multicore_reset_core1();
//multicore_launch_core1(Hub75_display_update);
self->hub75->start(dma_complete);
return mp_const_none;
}

View File

@ -2,21 +2,11 @@
#include <algorithm>
#include "hub75.hpp"
#include "pico/stdlib.h"
#include "pico/multicore.h"
Hub75::Hub75(uint8_t width, uint8_t height, Pixel *buffer)
: width(width), height(height), front_buffer(buffer), back_buffer(buffer + width * height)
{
// 1.3v allows overclock to ~280000-300000 but YMMV. Faster clock = faster screen update rate!
// vreg_set_voltage(VREG_VOLTAGE_1_30);
// sleep_ms(100);
// 200MHz is roughly about the lower limit for driving a 64x64 display smoothly.
// Just don't look at it out of the corner of your eye.
//set_sys_clock_khz(200000, true);
// Set up allllll the GPIO
gpio_init(pin_r0); gpio_set_function(pin_r0, GPIO_FUNC_SIO); gpio_set_dir(pin_r0, true);
gpio_init(pin_g0); gpio_set_function(pin_g0, GPIO_FUNC_SIO); gpio_set_dir(pin_g0, true);
@ -38,7 +28,15 @@ Hub75::Hub75(uint8_t width, uint8_t height, Pixel *buffer)
}
void Hub75::set_rgb(uint8_t x, uint8_t y, uint8_t r, uint8_t g, uint8_t b) {
front_buffer[y * width + x] = Pixel(r, g, b);
int offset = 0;
if(y >= height / 2) {
y -= height / 2;
offset = (y * width + x) * 2;
offset += 1;
} else {
offset = (y * width + x) * 2;
}
front_buffer[offset] = Pixel(r, g, b);
}
void Hub75::FM6126A_write_register(uint16_t value, uint8_t position) {
@ -46,6 +44,7 @@ void Hub75::FM6126A_write_register(uint16_t value, uint8_t position) {
for(auto i = 0u; i < width; i++) {
auto j = i % 16;
bool b = value & (1 << j);
gpio_put(pin_r0, b);
gpio_put(pin_g0, b);
gpio_put(pin_b0, b);
@ -62,24 +61,81 @@ void Hub75::FM6126A_write_register(uint16_t value, uint8_t position) {
}
}
void Hub75::start() {
void Hub75::start(irq_handler_t handler) {
running = true;
// Ridiculous register write nonsense for the FM6126A-based 64x64 matrix
FM6126A_write_register(0b1111111111111110, 12);
FM6126A_write_register(0b0000001000000000, 13);
while (running) {
display_update();
if(handler) {
pio_sm_claim(pio, sm_data);
pio_sm_claim(pio, sm_row);
data_prog_offs = pio_add_program(pio, &hub75_data_rgb888_program);
row_prog_offs = pio_add_program(pio, &hub75_row_program);
hub75_data_rgb888_program_init(pio, sm_data, data_prog_offs, DATA_BASE_PIN, CLK_PIN);
hub75_row_program_init(pio, sm_row, row_prog_offs, ROWSEL_BASE_PIN, ROWSEL_N_PINS, STROBE_PIN);
dma_channel = 0;
dma_channel_claim(dma_channel);
dma_channel_config config = dma_channel_get_default_config(dma_channel);
channel_config_set_transfer_data_size(&config, DMA_SIZE_32);
channel_config_set_bswap(&config, false);
channel_config_set_dreq(&config, pio_get_dreq(pio, sm_data, true));
dma_channel_configure(dma_channel, &config, &pio->txf[sm_data], NULL, 0, false);
dma_channel_set_irq0_enabled(dma_channel, true);
irq_set_enabled(pio_get_dreq(pio, sm_data, true), true);
irq_set_exclusive_handler(DMA_IRQ_0, handler);
irq_set_enabled(DMA_IRQ_0, true);
row = 0;
bit = 0;
dma_channel_set_trans_count(dma_channel, width * 4, false);
dma_channel_set_read_addr(dma_channel, &back_buffer, true);
} else {
while (running) {
display_update();
}
}
}
void Hub75::stop() {
running = false;
// stop and release the dma channel
irq_set_enabled(DMA_IRQ_0, false);
dma_channel_set_irq0_enabled(dma_channel, false);
irq_set_enabled(pio_get_dreq(pio, sm_data, true), false);
//irq_remove_handler(DMA_IRQ_0, dma_complete);
dma_channel_wait_for_finish_blocking(dma_channel);
dma_channel_unclaim(dma_channel);
hub75_wait_tx_stall(pio, sm_row);
// release the pio and sm
pio_sm_unclaim(pio, sm_data);
pio_sm_unclaim(pio, sm_row);
pio_clear_instruction_memory(pio);
pio_sm_restart(pio, sm_data);
pio_sm_restart(pio, sm_row);
gpio_put(pin_stb, !STB_POLARITY);
gpio_put(pin_oe, !OE_POLARITY);
}
Hub75::~Hub75() {
stop();
}
void Hub75::clear() {
for(auto x = 0u; x < width; x++) {
for(auto y = 0u; y < height; y++) {
set_rgb(x, y, 0, 0, 0);
}
}
}
void Hub75::flip() {
@ -88,16 +144,17 @@ void Hub75::flip() {
void Hub75::display_update() {
if (do_flip) {
//std::swap(front_buffer, back_buffer);
memcpy((uint8_t *)back_buffer, (uint8_t *)front_buffer, width * height * sizeof(Pixel));
memcpy(back_buffer, front_buffer, width * height * sizeof(Pixel));
do_flip = false;
}
for(auto bit = 1u; bit < 1 << 11; bit <<= 1) {
for(auto y = 0u; y < height / 2; y++) {
auto row_top = y * width;
auto row_bottom = (y + height / 2) * width;
for(auto x = 0u; x < width; x++) {
Pixel pixel_top = back_buffer[y * width + x];
Pixel pixel_bottom = back_buffer[(y + height / 2) * width + x];
Pixel pixel_top = back_buffer[row_top + x];
Pixel pixel_bottom = back_buffer[row_bottom + x];
gpio_put(pin_clk, !clk_polarity);
@ -123,6 +180,40 @@ void Hub75::display_update() {
gpio_put(pin_stb, !stb_polarity);
gpio_put(pin_oe, !oe_polarity);
}
sleep_us(1);
}
}
void Hub75::dma_complete() {
if (do_flip && bit == 0 && row == 0) {
memcpy(back_buffer, front_buffer, width * height * sizeof(Pixel));
do_flip = false;
}
if(dma_channel_get_irq0_status(dma_channel)) {
dma_channel_acknowledge_irq0(dma_channel);
// SM is finished when it stalls on empty TX FIFO (or, y'know, DMA callback)
//hub75_wait_tx_stall(pio, sm_data);
// Check that previous OEn pulse is finished, else things WILL get out of sequence
hub75_wait_tx_stall(pio, sm_row);
// Latch row data, pulse output enable for new row.
pio_sm_put_blocking(pio, sm_row, row | (3u * (1u << bit) << 5));
row++;
if(row == height / 2) {
row = 0;
bit++;
if (bit == 12) {
bit = 0;
}
hub75_data_rgb888_set_shift(pio, sm_data, data_prog_offs, bit);
}
}
dma_channel_set_trans_count(dma_channel, width * 4, false);
//dma_channel_set_read_addr(dma_channel, dma_buffer, true);
dma_channel_set_read_addr(dma_channel, &back_buffer[row * width * 2], true);
}

View File

@ -1,4 +1,23 @@
#include <stdint.h>
#include "pico/stdlib.h"
#include "pico/multicore.h"
#include "hardware/pio.h"
#include "hardware/dma.h"
#include "hardware/irq.h"
#include "hub75.pio.h"
const uint DATA_BASE_PIN = 0;
const uint DATA_N_PINS = 6;
const uint ROWSEL_BASE_PIN = 6;
const uint ROWSEL_N_PINS = 5;
const uint CLK_PIN = 11;
const uint STROBE_PIN = 12;
const uint OEN_PIN = 13;
const bool CLK_POLARITY = 1;
const bool STB_POLARITY = 1;
const bool OE_POLARITY = 0;
// This gamma table is used to correct our 8-bit (0-255) colours up to 11-bit,
// allowing us to gamma correct without losing dynamic range.
@ -24,12 +43,12 @@ constexpr uint16_t GAMMA_12BIT[256] = {
// We don't *need* to make Pixel a fancy struct with RGB values, but it helps.
#pragma pack(push, 1)
struct alignas(4) Pixel {
uint16_t _;
uint16_t r;
uint16_t g;
uint16_t b;
uint16_t _;
constexpr Pixel() : r(0), g(0), b(0), _(0) {};
constexpr Pixel(uint8_t r, uint8_t g, uint8_t b) : r(GAMMA_12BIT[r]), g(GAMMA_12BIT[g]), b(GAMMA_12BIT[b]), _(0) {};
constexpr Pixel() : _(0), r(0), g(0), b(0) {};
constexpr Pixel(uint8_t r, uint8_t g, uint8_t b) : _(0), r(GAMMA_12BIT[r]), g(GAMMA_12BIT[g]), b(GAMMA_12BIT[b]) {};
};
#pragma pack(pop)
@ -41,6 +60,20 @@ class Hub75 {
Pixel *back_buffer;
bool running = false;
// DMA & PIO
uint dma_channel = 0;
volatile bool do_flip = false;
uint bit = 0;
uint row = 0;
PIO pio = pio0;
uint sm_data = 0;
uint sm_row = 1;
uint data_prog_offs = 0;
uint row_prog_offs = 0;
// Top half of display - 16 rows on a 32x32 panel
unsigned int pin_r0 = 0;
unsigned int pin_g0 = 1;
@ -75,17 +108,15 @@ class Hub75 {
unsigned int pin_led_g = 17;
unsigned int pin_led_b = 18;
volatile bool do_flip = false;
Hub75(uint8_t width, uint8_t height, Pixel *buffer);
~Hub75() {
};
~Hub75();
void FM6126A_write_register(uint16_t value, uint8_t position);
void set_rgb(uint8_t x, uint8_t y, uint8_t r, uint8_t g, uint8_t b);
void display_update();
void clear();
void start();
void start(irq_handler_t handler);
void stop();
void flip();
void dma_complete();
};

View File

@ -0,0 +1,141 @@
;
; Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
;
; SPDX-License-Identifier: BSD-3-Clause
;
.program hub75_row
; side-set pin 0 is LATCH
; side-set pin 1 is OEn
; OUT pins are row select A-E
;
; Each FIFO record consists of:
; - 5-bit row select (LSBs)
; - Pulse width - 1 (27 MSBs)
;
; Repeatedly select a row, pulse LATCH, and generate a pulse of a certain
; width on OEn.
.side_set 2
.wrap_target
out pins, 5 [7] side 0x2 ; Deassert OEn, output row select
out x, 27 [7] side 0x3 ; Pulse LATCH, get OEn pulse width
pulse_loop:
jmp x-- pulse_loop side 0x0 ; Assert OEn for x+1 cycles
.wrap
% c-sdk {
static inline void hub75_row_program_init(PIO pio, uint sm, uint offset, uint row_base_pin, uint n_row_pins, uint latch_base_pin) {
pio_sm_set_consecutive_pindirs(pio, sm, row_base_pin, n_row_pins, true);
pio_sm_set_consecutive_pindirs(pio, sm, latch_base_pin, 2, true);
for (uint i = row_base_pin; i < row_base_pin + n_row_pins; ++i)
pio_gpio_init(pio, i);
pio_gpio_init(pio, latch_base_pin);
pio_gpio_init(pio, latch_base_pin + 1);
pio_sm_config c = hub75_row_program_get_default_config(offset);
sm_config_set_out_pins(&c, row_base_pin, n_row_pins);
sm_config_set_sideset_pins(&c, latch_base_pin);
sm_config_set_out_shift(&c, true, true, 32);
pio_sm_init(pio, sm, offset, &c);
pio_sm_set_enabled(pio, sm, true);
}
static inline void hub75_wait_tx_stall(PIO pio, uint sm) {
uint32_t txstall_mask = 1u << (PIO_FDEBUG_TXSTALL_LSB + sm);
pio->fdebug = txstall_mask;
while (!(pio->fdebug & txstall_mask))
tight_loop_contents();
}
%}
.program hub75_data_rgb888
.side_set 1
; Each FIFO record consists of a RGB888 pixel. (This is ok for e.g. an RGB565
; source which has been gamma-corrected)
;
; Even pixels are sent on R0, G0, B0 and odd pixels on R1, G1, B1 (typically
; these are for different parts of the screen, NOT for adjacent pixels, so the
; frame buffer must be interleaved before passing to PIO.)
;
; Each pass through, we take bit n, n + 8 and n + 16 from each pixel, for n in
; {0...7}. Therefore the pixels need to be transmitted 8 times (ouch) to build
; up the full 8 bit value for each channel, and perform bit-planed PWM by
; varying pulse widths on the other state machine, in ascending powers of 2.
; This avoids a lot of bit shuffling on the processors, at the cost of DMA
; bandwidth (which we have loads of).
; Might want to close your eyes before you read this
public entry_point:
.wrap_target
public shift0: ; R0 G0 B0 (Top half of 64x64 displays)
pull side 0 ; gets patched to `out null, n` if n nonzero (otherwise the PULL is required for fencing)
out null, 16 side 0 ; discard _
in osr, 1 side 0 ; Red0
out null, 32 side 0 ; discard remaining bits
public shift1:
pull side 0 ; gets patched to out null, n if n is nonzero (otherwise PULL required)
in osr, 1 side 0 ; Green0
out null, 16 side 0 ; << next uint16
in osr, 1 side 0 ; Blue0
out null, 32 side 0 ; discard remaining bits
public shift2:
pull side 0 ; gets patched to out null, n if n is nonzero (otherwise PULL required)
out null, 16 side 0 ; discard _
in osr, 1 side 0 ; Red1
out null, 32 side 0 ; discard remaining bits
public shift3: ; R1 G1 B1 (Bottom half of 64x64 displays)
pull side 0 ; gets patched to out null, n if n is nonzero (otherwise PULL required)
in osr, 1 side 0 ; Green0, CLK rising edge
out null, 16 side 0 ; << next uint16
in osr, 1 side 0 ; Blue0
out null, 32 side 0 ; discard remaining bits
in null, 26 side 0 ; Note we are just doing this little manoeuvre here to get GPIOs in the order
mov pins, ::isr [7] side 1 ; R0, G0, B0, R1, G1, B1. Can go 1 cycle faster if reversed
.wrap
; Note that because the clock edge for pixel n is in the middle of pixel n +
; 1, a dummy pixel at the end is required to clock the last piece of genuine
; data. (Also 1 pixel of garbage is clocked out at the start, but this is
; harmless)
% c-sdk {
static inline void hub75_data_rgb888_program_init(PIO pio, uint sm, uint offset, uint rgb_base_pin, uint clock_pin) {
pio_sm_set_consecutive_pindirs(pio, sm, rgb_base_pin, 6, true);
pio_sm_set_consecutive_pindirs(pio, sm, clock_pin, 1, true);
for (uint i = rgb_base_pin; i < rgb_base_pin + 6; ++i)
pio_gpio_init(pio, i);
pio_gpio_init(pio, clock_pin);
pio_sm_config c = hub75_data_rgb888_program_get_default_config(offset);
sm_config_set_out_pins(&c, rgb_base_pin, 6);
sm_config_set_sideset_pins(&c, clock_pin);
sm_config_set_out_shift(&c, true, true, 32);
// ISR shift to left. R0 ends up at bit 5. We push it up to MSB and then flip the register.
sm_config_set_in_shift(&c, false, false, 32);
sm_config_set_fifo_join(&c, PIO_FIFO_JOIN_TX);
pio_sm_init(pio, sm, offset, &c);
pio_sm_exec(pio, sm, offset + hub75_data_rgb888_offset_entry_point);
pio_sm_set_enabled(pio, sm, true);
}
// Patch a data program at `offset` to preshift pixels by `shamt`
static inline void hub75_data_rgb888_set_shift(PIO pio, uint sm, uint offset, uint shamt) {
uint16_t instr;
if (shamt == 0)
instr = pio_encode_pull(false, true); // blocking PULL
else
instr = pio_encode_out(pio_null, shamt);
pio->instr_mem[offset + hub75_data_rgb888_offset_shift0] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift1] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift2] = instr;
pio->instr_mem[offset + hub75_data_rgb888_offset_shift3] = instr;
}
%}

View File

@ -21,3 +21,5 @@ set_source_files_properties(
PROPERTIES COMPILE_FLAGS
"-Wno-discarded-qualifiers -Wno-implicit-int"
)
pico_generate_pio_header(usermod_hub75 ${CMAKE_CURRENT_LIST_DIR}/lib/hub75.pio)