Improve buffer storing and loading
For loading: Use a table to get a byte directly out of an int (instead of two nibbles). For both: Remove writing the unused 4 pixels of the vertical doubletiles. Storing is now 55k cycles and Loading is now 92k cycles.
This commit is contained in:
parent
c00039cb18
commit
02f0b350df
140
src/c/vwf.c
140
src/c/vwf.c
|
@ -75,7 +75,7 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
|
||||||
{
|
{
|
||||||
int* bottomTileRows = TileRows + (0x20 * 8);
|
int* bottomTileRows = TileRows + (0x20 * 8);
|
||||||
int* bottomBufferValues = bufferValues + 0x40;
|
int* bottomBufferValues = bufferValues + 0x40;
|
||||||
const int foregroundRow = 0xFFFFFFFF;
|
const int andValue = 0x11111111;
|
||||||
|
|
||||||
//First value
|
//First value
|
||||||
unsigned int firstRow = *(TileRows++);
|
unsigned int firstRow = *(TileRows++);
|
||||||
|
@ -83,26 +83,18 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
|
||||||
unsigned int thirdRow = *(TileRows++);
|
unsigned int thirdRow = *(TileRows++);
|
||||||
unsigned int fourthRow = *(TileRows++);
|
unsigned int fourthRow = *(TileRows++);
|
||||||
|
|
||||||
firstRow ^= foregroundRow;
|
firstRow &= andValue;
|
||||||
secondRow ^= foregroundRow;
|
secondRow &= andValue;
|
||||||
thirdRow ^= foregroundRow;
|
thirdRow &= andValue;
|
||||||
fourthRow ^= foregroundRow;
|
fourthRow &= andValue;
|
||||||
|
|
||||||
unsigned int value = m2_nybbles_to_bits[(fourthRow >> 16)];
|
unsigned int value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[(thirdRow >> 16)];
|
value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(secondRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(firstRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
|
|
||||||
*(bufferValues++) = value;
|
*(bufferValues++) = value;
|
||||||
|
|
||||||
//Second value
|
//Second value
|
||||||
|
@ -111,26 +103,18 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
|
||||||
thirdRow = *(TileRows++);
|
thirdRow = *(TileRows++);
|
||||||
fourthRow = *(TileRows);
|
fourthRow = *(TileRows);
|
||||||
|
|
||||||
firstRow ^= foregroundRow;
|
firstRow &= andValue;
|
||||||
secondRow ^= foregroundRow;
|
secondRow &= andValue;
|
||||||
thirdRow ^= foregroundRow;
|
thirdRow &= andValue;
|
||||||
fourthRow ^= foregroundRow;
|
fourthRow &= andValue;
|
||||||
|
|
||||||
value = m2_nybbles_to_bits[(fourthRow >> 16)];
|
value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[(thirdRow >> 16)];
|
value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(secondRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(firstRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
|
|
||||||
*(bufferValues) = value;
|
*(bufferValues) = value;
|
||||||
|
|
||||||
//First value of bottom tile
|
//First value of bottom tile
|
||||||
|
@ -139,55 +123,41 @@ void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
|
||||||
thirdRow = *(bottomTileRows++);
|
thirdRow = *(bottomTileRows++);
|
||||||
fourthRow = *(bottomTileRows++);
|
fourthRow = *(bottomTileRows++);
|
||||||
|
|
||||||
firstRow ^= foregroundRow;
|
firstRow &= andValue;
|
||||||
secondRow ^= foregroundRow;
|
secondRow &= andValue;
|
||||||
thirdRow ^= foregroundRow;
|
thirdRow &= andValue;
|
||||||
fourthRow ^= foregroundRow;
|
fourthRow &= andValue;
|
||||||
|
|
||||||
value = m2_nybbles_to_bits[(fourthRow >> 16)];
|
value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[(thirdRow >> 16)];
|
value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(secondRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(firstRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
|
|
||||||
*(bottomBufferValues++) = value;
|
*(bottomBufferValues++) = value;
|
||||||
|
|
||||||
//Second value of bottom tile
|
//Second value of bottom tile - Is not used by the game
|
||||||
|
/*
|
||||||
firstRow = *(bottomTileRows++);
|
firstRow = *(bottomTileRows++);
|
||||||
secondRow = *(bottomTileRows++);
|
secondRow = *(bottomTileRows++);
|
||||||
thirdRow = *(bottomTileRows++);
|
thirdRow = *(bottomTileRows++);
|
||||||
fourthRow = *(bottomTileRows);
|
fourthRow = *(bottomTileRows);
|
||||||
|
|
||||||
firstRow ^= foregroundRow;
|
firstRow &= andValue;
|
||||||
secondRow ^= foregroundRow;
|
secondRow &= andValue;
|
||||||
thirdRow ^= foregroundRow;
|
thirdRow &= andValue;
|
||||||
fourthRow ^= foregroundRow;
|
fourthRow &= andValue;
|
||||||
|
|
||||||
value = m2_nybbles_to_bits[(fourthRow >> 16)];
|
value = optimized_byte_4bpp_to_1bpp_table[(fourthRow >> 0xF) + (fourthRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(thirdRow >> 0xF) + (thirdRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[(thirdRow >> 16)];
|
value |= optimized_byte_4bpp_to_1bpp_table[(secondRow >> 0xF) + (secondRow & 0xFFFF)];
|
||||||
value <<= 4;
|
value <<= 8;
|
||||||
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF];
|
value |= optimized_byte_4bpp_to_1bpp_table[(firstRow >> 0xF) + (firstRow & 0xFFFF)];
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(secondRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[(firstRow >> 16)];
|
|
||||||
value <<= 4;
|
|
||||||
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
|
|
||||||
*(bottomBufferValues) = value;
|
*(bottomBufferValues) = value;
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
byte getSex(byte character)
|
byte getSex(byte character)
|
||||||
|
@ -2096,15 +2066,20 @@ void store_pixels_overworld_buffer(int totalYs)
|
||||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
first_half = *(bottomBufferValues++);
|
first_half = *(bottomBufferValues++);
|
||||||
second_half = *(bottomBufferValues++);
|
//second_half = *(bottomBufferValues++);
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
//Since those are unused
|
||||||
|
bottomBufferValues++;
|
||||||
|
bottomTilePointer += 4;
|
||||||
|
/* The game doesn't use these
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
*/
|
||||||
|
|
||||||
while(remainingTiles > 0)
|
while(remainingTiles > 0)
|
||||||
{
|
{
|
||||||
|
@ -2126,15 +2101,20 @@ void store_pixels_overworld_buffer(int totalYs)
|
||||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
first_half = *(bottomBufferValues++);
|
first_half = *(bottomBufferValues++);
|
||||||
second_half = *(bottomBufferValues++);
|
//second_half = *(bottomBufferValues++);
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
//Since those are unused
|
||||||
|
bottomBufferValues++;
|
||||||
|
bottomTilePointer += 4;
|
||||||
|
/* The game doesn't use these
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
*/
|
||||||
remainingTiles--;
|
remainingTiles--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,6 +131,7 @@ extern byte m2_ness_name[];
|
||||||
extern int m2_bits_to_nybbles[];
|
extern int m2_bits_to_nybbles[];
|
||||||
extern int m2_bits_to_nybbles_fast[];
|
extern int m2_bits_to_nybbles_fast[];
|
||||||
extern byte m2_nybbles_to_bits[];
|
extern byte m2_nybbles_to_bits[];
|
||||||
|
extern byte optimized_byte_4bpp_to_1bpp_table[];
|
||||||
extern byte *m2_font_table[];
|
extern byte *m2_font_table[];
|
||||||
extern byte m2_font_widths[];
|
extern byte m2_font_widths[];
|
||||||
extern byte m2_font_heights[];
|
extern byte m2_font_heights[];
|
||||||
|
|
Binary file not shown.
|
@ -1783,6 +1783,9 @@ flyovertextLater:
|
||||||
m2_coord_table_file:
|
m2_coord_table_file:
|
||||||
.incbin "data/m2-coord-table-file-select.bin"
|
.incbin "data/m2-coord-table-file-select.bin"
|
||||||
|
|
||||||
|
optimized_byte_4bpp_to_1bpp_table:
|
||||||
|
.incbin "data/optimized-byte-4bpp-to-1bpp-table.bin"
|
||||||
|
|
||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
// Existing subroutines/data
|
// Existing subroutines/data
|
||||||
|
|
Loading…
Reference in New Issue