Improve buffer storing and loading

For loading: Use a table to get a byte directly out of an int (instead of two nibbles).
For both: Remove writing the unused 4 pixels of the vertical doubletiles.

Storing is now 55k cycles and Loading is now 92k cycles.
This commit is contained in:
Lorenzooone 2019-09-25 19:45:12 +02:00
parent c00039cb18
commit 02f0b350df
4 changed files with 65 additions and 81 deletions

View File

@ -75,7 +75,7 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
{ {
int* bottomTileRows = TileRows + (0x20 * 8); int* bottomTileRows = TileRows + (0x20 * 8);
int* bottomBufferValues = bufferValues + 0x40; int* bottomBufferValues = bufferValues + 0x40;
const int foregroundRow = 0xFFFFFFFF; const int andValue = 0x11111111;
//First value //First value
unsigned int firstRow = *(TileRows++); unsigned int firstRow = *(TileRows++);
@ -83,26 +83,18 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
unsigned int thirdRow = *(TileRows++); unsigned int thirdRow = *(TileRows++);
unsigned int fourthRow = *(TileRows++); unsigned int fourthRow = *(TileRows++);
firstRow ^= foregroundRow; firstRow &= andValue;
secondRow ^= foregroundRow; secondRow &= andValue;
thirdRow ^= foregroundRow; thirdRow &= andValue;
fourthRow ^= foregroundRow; fourthRow &= andValue;
unsigned int value = m2_nybbles_to_bits[(fourthRow >> 16)]; unsigned int value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[(thirdRow >> 16)]; value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
value <<= 4;
value |= m2_nybbles_to_bits[(secondRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
value <<= 4;
value |= m2_nybbles_to_bits[(firstRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
*(bufferValues++) = value; *(bufferValues++) = value;
//Second value //Second value
@ -111,26 +103,18 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
thirdRow = *(TileRows++); thirdRow = *(TileRows++);
fourthRow = *(TileRows); fourthRow = *(TileRows);
firstRow ^= foregroundRow; firstRow &= andValue;
secondRow ^= foregroundRow; secondRow &= andValue;
thirdRow ^= foregroundRow; thirdRow &= andValue;
fourthRow ^= foregroundRow; fourthRow &= andValue;
value = m2_nybbles_to_bits[(fourthRow >> 16)]; value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[(thirdRow >> 16)]; value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
value <<= 4;
value |= m2_nybbles_to_bits[(secondRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
value <<= 4;
value |= m2_nybbles_to_bits[(firstRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
*(bufferValues) = value; *(bufferValues) = value;
//First value of bottom tile //First value of bottom tile
@ -139,55 +123,41 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
thirdRow = *(bottomTileRows++); thirdRow = *(bottomTileRows++);
fourthRow = *(bottomTileRows++); fourthRow = *(bottomTileRows++);
firstRow ^= foregroundRow; firstRow &= andValue;
secondRow ^= foregroundRow; secondRow &= andValue;
thirdRow ^= foregroundRow; thirdRow &= andValue;
fourthRow ^= foregroundRow; fourthRow &= andValue;
value = m2_nybbles_to_bits[(fourthRow >> 16)]; value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[(thirdRow >> 16)]; value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
value <<= 4;
value |= m2_nybbles_to_bits[(secondRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
value <<= 4;
value |= m2_nybbles_to_bits[(firstRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
*(bottomBufferValues++) = value; *(bottomBufferValues++) = value;
//Second value of bottom tile //Second value of bottom tile - Is not used by the game
/*
firstRow = *(bottomTileRows++); firstRow = *(bottomTileRows++);
secondRow = *(bottomTileRows++); secondRow = *(bottomTileRows++);
thirdRow = *(bottomTileRows++); thirdRow = *(bottomTileRows++);
fourthRow = *(bottomTileRows); fourthRow = *(bottomTileRows);
firstRow ^= foregroundRow; firstRow &= andValue;
secondRow ^= foregroundRow; secondRow &= andValue;
thirdRow ^= foregroundRow; thirdRow &= andValue;
fourthRow ^= foregroundRow; fourthRow &= andValue;
value = m2_nybbles_to_bits[(fourthRow >> 16)]; value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[(thirdRow >> 16)]; value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
value <<= 4; value <<= 8;
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF]; value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
value <<= 4;
value |= m2_nybbles_to_bits[(secondRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
value <<= 4;
value |= m2_nybbles_to_bits[(firstRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
*(bottomBufferValues) = value; *(bottomBufferValues) = value;
*/
} }
byte getSex(byte character) byte getSex(byte character)
@ -2096,15 +2066,20 @@ void store_pixels_overworld_buffer(int totalYs)
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
first_half = *(bottomBufferValues++); first_half = *(bottomBufferValues++);
second_half = *(bottomBufferValues++); //second_half = *(bottomBufferValues++);
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
//Since those are unused
bottomBufferValues++;
bottomTilePointer += 4;
/* The game doesn't use these
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
*/
while(remainingTiles > 0) while(remainingTiles > 0)
{ {
@ -2126,15 +2101,20 @@ void store_pixels_overworld_buffer(int totalYs)
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; *(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
first_half = *(bottomBufferValues++); first_half = *(bottomBufferValues++);
second_half = *(bottomBufferValues++); //second_half = *(bottomBufferValues++);
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
//Since those are unused
bottomBufferValues++;
bottomTilePointer += 4;
/* The game doesn't use these
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
*/
remainingTiles--; remainingTiles--;
} }
} }

View File

@ -131,6 +131,7 @@ extern byte m2_ness_name[];
extern int m2_bits_to_nybbles[]; extern int m2_bits_to_nybbles[];
extern int m2_bits_to_nybbles_fast[]; extern int m2_bits_to_nybbles_fast[];
extern byte m2_nybbles_to_bits[]; extern byte m2_nybbles_to_bits[];
extern byte optimized_byte_4bpp_to_1bpp_table[];
extern byte *m2_font_table[]; extern byte *m2_font_table[];
extern byte m2_font_widths[]; extern byte m2_font_widths[];
extern byte m2_font_heights[]; extern byte m2_font_heights[];

Binary file not shown.

View File

@ -1783,6 +1783,9 @@ flyovertextLater:
m2_coord_table_file: m2_coord_table_file:
.incbin "data/m2-coord-table-file-select.bin" .incbin "data/m2-coord-table-file-select.bin"
optimized_byte_4bpp_to_1bpp_table:
.incbin "data/optimized-byte-4bpp-to-1bpp-table.bin"
//============================================================================== //==============================================================================
// Existing subroutines/data // Existing subroutines/data