From 02f0b350df4022067f5b9b41c45e1e03153e9c1d Mon Sep 17 00:00:00 2001 From: Lorenzooone Date: Wed, 25 Sep 2019 19:45:12 +0200 Subject: [PATCH] Improve buffer storing and loading For loading: Use a table to get a byte directly out of an int (instead of two nibbles). For both: Remove writing the unused 4 pixels of the vertical doubletiles. Storing is now 55k cycles and Loading is now 92k cycles. --- src/c/vwf.c | 142 ++++++++---------- src/c/vwf.h | 1 + .../optimized-byte-4bpp-to-1bpp-table.bin | Bin 0 -> 13108 bytes src/m2-hack.asm | 3 + 4 files changed, 65 insertions(+), 81 deletions(-) create mode 100644 src/data/optimized-byte-4bpp-to-1bpp-table.bin diff --git a/src/c/vwf.c b/src/c/vwf.c index 0fac0a9..f0d0d51 100644 --- a/src/c/vwf.c +++ b/src/c/vwf.c @@ -75,7 +75,7 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues) { int* bottomTileRows = TileRows + (0x20 * 8); int* bottomBufferValues = bufferValues + 0x40; - const int foregroundRow = 0xFFFFFFFF; + const int andValue = 0x11111111; //First value unsigned int firstRow = *(TileRows++); @@ -83,26 +83,18 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues) unsigned int thirdRow = *(TileRows++); unsigned int fourthRow = *(TileRows++); - firstRow ^= foregroundRow; - secondRow ^= foregroundRow; - thirdRow ^= foregroundRow; - fourthRow ^= foregroundRow; + firstRow &= andValue; + secondRow &= andValue; + thirdRow &= andValue; + fourthRow &= andValue; - unsigned int value = m2_nybbles_to_bits[(fourthRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(thirdRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(secondRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[secondRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(firstRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[firstRow & 0xFFFF]; + unsigned int value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)]; *(bufferValues++) = value; //Second value @@ -110,27 +102,19 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues) secondRow = *(TileRows++); thirdRow = *(TileRows++); fourthRow = *(TileRows); + + firstRow &= andValue; + secondRow &= andValue; + thirdRow &= andValue; + fourthRow &= andValue; - firstRow ^= foregroundRow; - secondRow ^= foregroundRow; - thirdRow ^= foregroundRow; - fourthRow ^= foregroundRow; - - value = m2_nybbles_to_bits[(fourthRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(thirdRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(secondRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[secondRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(firstRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[firstRow & 0xFFFF]; + value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)]; *(bufferValues) = value; //First value of bottom tile @@ -139,55 +123,41 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues) thirdRow = *(bottomTileRows++); fourthRow = *(bottomTileRows++); - firstRow ^= foregroundRow; - secondRow ^= foregroundRow; - thirdRow ^= foregroundRow; - fourthRow ^= foregroundRow; + firstRow &= andValue; + secondRow &= andValue; + thirdRow &= andValue; + fourthRow &= andValue; - value = m2_nybbles_to_bits[(fourthRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(thirdRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(secondRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[secondRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(firstRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[firstRow & 0xFFFF]; + value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)]; *(bottomBufferValues++) = value; - //Second value of bottom tile + //Second value of bottom tile - Is not used by the game + /* firstRow = *(bottomTileRows++); secondRow = *(bottomTileRows++); thirdRow = *(bottomTileRows++); fourthRow = *(bottomTileRows); - firstRow ^= foregroundRow; - secondRow ^= foregroundRow; - thirdRow ^= foregroundRow; - fourthRow ^= foregroundRow; + firstRow &= andValue; + secondRow &= andValue; + thirdRow &= andValue; + fourthRow &= andValue; - value = m2_nybbles_to_bits[(fourthRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(thirdRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(secondRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[secondRow & 0xFFFF]; - value <<= 4; - value |= m2_nybbles_to_bits[(firstRow >> 16)]; - value <<= 4; - value |= m2_nybbles_to_bits[firstRow & 0xFFFF]; + value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)]; + value <<= 8; + value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)]; *(bottomBufferValues) = value; + */ } byte getSex(byte character) @@ -2096,15 +2066,20 @@ void store_pixels_overworld_buffer(int totalYs) *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; first_half = *(bottomBufferValues++); - second_half = *(bottomBufferValues++); + //second_half = *(bottomBufferValues++); *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; + //Since those are unused + bottomBufferValues++; + bottomTilePointer += 4; + /* The game doesn't use these *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; + */ while(remainingTiles > 0) { @@ -2126,15 +2101,20 @@ void store_pixels_overworld_buffer(int totalYs) *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; first_half = *(bottomBufferValues++); - second_half = *(bottomBufferValues++); + //second_half = *(bottomBufferValues++); *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; + //Since those are unused + bottomBufferValues++; + bottomTilePointer += 4; + /* The game doesn't use these *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; + */ remainingTiles--; } } diff --git a/src/c/vwf.h b/src/c/vwf.h index d415fc6..84cfd31 100644 --- a/src/c/vwf.h +++ b/src/c/vwf.h @@ -131,6 +131,7 @@ extern byte m2_ness_name[]; extern int m2_bits_to_nybbles[]; extern int m2_bits_to_nybbles_fast[]; extern byte m2_nybbles_to_bits[]; +extern byte optimized_byte_4bpp_to_1bpp_table[]; extern byte *m2_font_table[]; extern byte m2_font_widths[]; extern byte m2_font_heights[]; diff --git a/src/data/optimized-byte-4bpp-to-1bpp-table.bin b/src/data/optimized-byte-4bpp-to-1bpp-table.bin new file mode 100644 index 0000000000000000000000000000000000000000..9142c49eac109db25db5ea841e65a1ba5cb93061 GIT binary patch literal 13108 zcmeIu=UWe87{&1)MTt;GRum~Cgp9Ok86^!#MJOYYS!pOT+bOfCtQHj!va%yvlu;?8 z%nEH~ygmQIb6w|obw2N&`zvYFRY3x;t(0OnphQJgYczV`pfO|7qHpPT-Fx5-?PYo@ zq16U!k3*|#jGurOeNUcZWsNr&8Qa*R7cN?}7VYA?ZarG`ZDKmx4sS4#Qpc!{SM zdg(In4QOr_U;!3j0Ty5Z7GMDuU;!2orGRXQUcK?Javl5hMGqM|Q3pM2_#|Dl=)04A zKV`h3bC>=D(3&Ik^wA?n85p8P-_xegb-){Dm^(V5-In|KqTN^c`J+YO7BlC~#~UnX zIWIu3^w=1HUbQ-K6PlX^SbzmsfCX591z3OuSbzmYDX=+cS2X^0%hs6P=)*@&XQ7WC zJ98E-`rfvE&tAM?$IgBG(Z`dr&!Ll3&R;-_zOUbS`V4Qld8^<#y8PXbpXm1=e$}Ey z-?#6)D8w7`?iRg7f2^phM^}DoXhd_f01L1H3$OqSumB6N01L2yC