Optimize drawPixel for speed (-20 bytes)

- Inlined     : 332 -> 358 ops/ms
- Non-inlined : 222 -> 303 ops/ms

Moves most of the pixel offset calculation math into assembly.
Also uses a shift lookup table vs calculting bit shifts on the
CPU (which is slow)
This commit is contained in:
Josh Goebel 2017-04-17 20:06:20 -04:00 committed by Scott Allen
parent ab1a7b3bdd
commit e1cce03580
2 changed files with 39 additions and 10 deletions

View File

@ -267,6 +267,14 @@ void Arduboy2Base::clear()
fillScreen(BLACK);
}
// Used by drawPixel to help with left bitshifting since AVR has no
// multiple bit shift instruction. We can bit shift from a lookup table
// in flash faster than we can calculate the bit shifts on the CPU.
PROGMEM const unsigned char bitshift_left[] = {
1, 2, 4, 8, 16, 32, 64, 128
};
void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color)
{
#ifdef PIXEL_SAFE_MODE
@ -276,14 +284,35 @@ void Arduboy2Base::drawPixel(int16_t x, int16_t y, uint8_t color)
}
#endif
uint8_t row = (uint8_t)y / 8;
if (color)
{
sBuffer[(row*WIDTH) + (uint8_t)x] |= _BV((uint8_t)y % 8);
}
else
{
sBuffer[(row*WIDTH) + (uint8_t)x] &= ~ _BV((uint8_t)y % 8);
uint16_t row_offset; // = (y * WIDTH/8) & ~0b01111111 + (uint8_t)x;
uint8_t bit; // = 1 << (y % 8);
asm volatile(
"mul %[width_offset],%[y]\n"
"movw %[row_offset], r0\n"
"andi %A[row_offset], 0x80\n" // row_offset &= (~0b01111111);
"clr __zero_reg__\n"
"add %A[row_offset], %[x]\n"
"adc %B[row_offset], __zero_reg__\n"
// mask for only 0-7
"andi %[y], 0x07\n"
// Z += y
"add r30, %[y]\n"
"adc r31, __zero_reg__\n"
// load correct bitshift from program RAM
"lpm %[bit], Z\n"
: [row_offset] "=r" (row_offset),
[bit] "=r" (bit)
: [width_offset] "r" ((uint8_t)(WIDTH/8)),
[y] "r" ((uint8_t)y),
[x] "r" ((uint8_t)x),
"z" (bitshift_left)
: "r1", "r0");
if (color) {
sBuffer[row_offset] |= bit;
} else {
sBuffer[row_offset] &= ~ bit;
}
}