From dfbe2dda3b82427a8fefc49553af880f5195fe88 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 17 Apr 2017 20:06:20 -0400 Subject: [PATCH 1/5] Optimize drawPixel for speed (-20 bytes) - Inlined : 332 -> 358 ops/ms - Non-inlined : 222 -> 303 ops/ms Moves most of the pixel offset calculation math into assembly. Also uses a shift lookup table vs calculting bit shifts on the CPU (which is slow) --- src/Arduboy2.cpp | 45 +++++++++++++++++++++++++++++++++++++-------- src/Arduboy2.h | 4 ++-- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/Arduboy2.cpp b/src/Arduboy2.cpp index b96e2d1..feb77b7 100644 --- a/src/Arduboy2.cpp +++ b/src/Arduboy2.cpp @@ -267,6 +267,14 @@ void Arduboy2Base::clear() fillScreen(BLACK); } + +// Used by drawPixel to help with left bitshifting since AVR has no +// multiple bit shift instruction. We can bit shift from a lookup table +// in flash faster than we can calculate the bit shifts on the CPU. +PROGMEM const unsigned char bitshift_left[] = { + 1, 2, 4, 8, 16, 32, 64, 128 +}; + void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color) { #ifdef PIXEL_SAFE_MODE @@ -276,14 +284,35 @@ void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color) } #endif - uint8_t row = (uint8_t)y / 8; - if (color) - { - sBuffer[(row*WIDTH) + (uint8_t)x] |= _BV((uint8_t)y % 8); - } - else - { - sBuffer[(row*WIDTH) + (uint8_t)x] &= ~ _BV((uint8_t)y % 8); + uint16_t row_offset; // = (y * WIDTH/8) & ~0b01111111 + (uint8_t)x; + uint8_t bit; // = 1 << (y % 8); + + asm volatile( + "mul %[width_offset],%[y]\n" + "movw %[row_offset], r0\n" + "andi %A[row_offset], 0x80\n" // row_offset &= (~0b01111111); + "clr __zero_reg__\n" + "add %A[row_offset], %[x]\n" + "adc %B[row_offset], __zero_reg__\n" + // mask for only 0-7 + "andi %[y], 0x07\n" + // Z += y + "add r30, %[y]\n" + "adc r31, __zero_reg__\n" + // load correct bitshift from program RAM + "lpm %[bit], Z\n" + : [row_offset] "=r" (row_offset), + [bit] "=r" (bit) + : [width_offset] "r" ((uint8_t)(WIDTH/8)), + [y] "r" ((uint8_t)y), + [x] "r" ((uint8_t)x), + "z" (bitshift_left) + : "r1", "r0"); + + if (color) { + sBuffer[row_offset] |= bit; + } else { + sBuffer[row_offset] &= ~ bit; } } diff --git a/src/Arduboy2.h b/src/Arduboy2.h index e1f5f69..efa2172 100644 --- a/src/Arduboy2.h +++ b/src/Arduboy2.h @@ -386,7 +386,7 @@ class Arduboy2Base : public Arduboy2Core * specified color. The values WHITE or BLACK can be used for the color. * If the `color` parameter isn't included, the pixel will be set to WHITE. */ - void drawPixel(int16_t x, int16_t y, uint8_t color = WHITE); + void drawPixel(int16_t x, int16_t y, uint8_t color = WHITE); /** \brief * Returns the state of the given pixel in the screen buffer. @@ -1210,7 +1210,7 @@ class Arduboy2 : public Print, public Arduboy2Base * * \details * This function is called by `bootLogoShell()` and `bootlogoText()`. - * + * * If a unit name has been saved in system EEPROM, it will be displayed at * the bottom of the screen. This function pauses for a short time to allow * the name to be seen. From 07c321cd44dc59330f0acb57d42c845b75ec6348 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 1 Apr 2017 14:40:25 -0400 Subject: [PATCH 2/5] make sure we use upper registers --- src/Arduboy2.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Arduboy2.cpp b/src/Arduboy2.cpp index feb77b7..6eb4f0c 100644 --- a/src/Arduboy2.cpp +++ b/src/Arduboy2.cpp @@ -301,10 +301,10 @@ void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color) "adc r31, __zero_reg__\n" // load correct bitshift from program RAM "lpm %[bit], Z\n" - : [row_offset] "=r" (row_offset), + : [row_offset] "=a" (row_offset), // upper register (ANDI) [bit] "=r" (bit) : [width_offset] "r" ((uint8_t)(WIDTH/8)), - [y] "r" ((uint8_t)y), + [y] "a" ((uint8_t)y), // upper register (ANDI) [x] "r" ((uint8_t)x), "z" (bitshift_left) : "r1", "r0"); From 023a7120b838e6c1dd79c4301b19cb49a59b44b6 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 1 Apr 2017 14:44:27 -0400 Subject: [PATCH 3/5] add back original C implimentation --- src/Arduboy2.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Arduboy2.cpp b/src/Arduboy2.cpp index 6eb4f0c..ae7444b 100644 --- a/src/Arduboy2.cpp +++ b/src/Arduboy2.cpp @@ -284,9 +284,16 @@ void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color) } #endif - uint16_t row_offset; // = (y * WIDTH/8) & ~0b01111111 + (uint8_t)x; - uint8_t bit; // = 1 << (y % 8); + uint16_t row_offset; + uint8_t bit; + // uint8_t row = (uint8_t)y / 8; + // row_offset = (row*WIDTH) + (uint8_t)x; + // bit = _BV((uint8_t)y % 8); + + // the above math can also be rewritten more simply as; + // (y * WIDTH/8) & ~0b01111111 + (uint8_t)x; + // which is what the below assembler does asm volatile( "mul %[width_offset],%[y]\n" "movw %[row_offset], r0\n" From 825d6bc760efa5f2044a9957006ffdc3f218facc Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 17 Apr 2017 20:02:02 -0400 Subject: [PATCH 4/5] make sure row_offset and y do not overlap --- src/Arduboy2.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Arduboy2.cpp b/src/Arduboy2.cpp index ae7444b..b191fc1 100644 --- a/src/Arduboy2.cpp +++ b/src/Arduboy2.cpp @@ -308,11 +308,11 @@ void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color) "adc r31, __zero_reg__\n" // load correct bitshift from program RAM "lpm %[bit], Z\n" - : [row_offset] "=a" (row_offset), // upper register (ANDI) + : [row_offset] "=x&" (row_offset), // upper register (ANDI) [bit] "=r" (bit) : [width_offset] "r" ((uint8_t)(WIDTH/8)), - [y] "a" ((uint8_t)y), // upper register (ANDI) [x] "r" ((uint8_t)x), + [y] "a" ((uint8_t)y), // upper register (ANDI) "z" (bitshift_left) : "r1", "r0"); From d99973332017138ad1bafcbb0d5caf2518537e86 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 17 Apr 2017 23:24:04 -0400 Subject: [PATCH 5/5] remove unnecessary carry --- src/Arduboy2.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Arduboy2.cpp b/src/Arduboy2.cpp index b191fc1..fea61ef 100644 --- a/src/Arduboy2.cpp +++ b/src/Arduboy2.cpp @@ -300,7 +300,6 @@ void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color) "andi %A[row_offset], 0x80\n" // row_offset &= (~0b01111111); "clr __zero_reg__\n" "add %A[row_offset], %[x]\n" - "adc %B[row_offset], __zero_reg__\n" // mask for only 0-7 "andi %[y], 0x07\n" // Z += y