Improve 1bpp buffer storing performance by using an edited coord_table and (if useful) loading the m2_bits_to_nybbles table in IWRAM
This commit is contained in:
parent
24fbc3098e
commit
b372862942
244
src/c/vwf.c
244
src/c/vwf.c
|
@ -799,7 +799,7 @@ int print_menu_string(WINDOW* window)
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
looping = false;
|
looping = false;
|
||||||
window->menu_text = NULL; //Otherwise it will keep printing indefinetly
|
window->menu_text = NULL; //Otherwise it will keep printing indefinetly
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1518,14 +1518,14 @@ int highlight_string(WINDOW* window, byte* str, unsigned short x, unsigned short
|
||||||
//Highlights "Talk to"
|
//Highlights "Talk to"
|
||||||
void highlight_talk_to()
|
void highlight_talk_to()
|
||||||
{
|
{
|
||||||
char Talk_to[] = "Talk to";
|
char Talk_to[] = "Talk to";
|
||||||
byte str[0xA];
|
byte str[0xA];
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < (sizeof(Talk_to) - 1); i++)
|
for(i = 0; i < (sizeof(Talk_to) - 1); i++)
|
||||||
str[i] = encode_ascii(Talk_to[i]);
|
str[i] = encode_ascii(Talk_to[i]);
|
||||||
str[i++] = 0;
|
str[i++] = 0;
|
||||||
str[i] = 0xFF;
|
str[i] = 0xFF;
|
||||||
highlight_string(getWindow(0), str, 1, 0, true);
|
highlight_string(getWindow(0), str, 1, 0, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsigned short printstr_hlight_buffer(WINDOW* window, byte* str, unsigned short x, unsigned short y, bool highlight)
|
unsigned short printstr_hlight_buffer(WINDOW* window, byte* str, unsigned short x, unsigned short y, bool highlight)
|
||||||
|
@ -1926,75 +1926,185 @@ void load_pixels_overworld_buffer()
|
||||||
|
|
||||||
void store_pixels_overworld_buffer(int totalYs)
|
void store_pixels_overworld_buffer(int totalYs)
|
||||||
{
|
{
|
||||||
byte* buffer = (byte*)(OVERWORLD_BUFFER - ((*tile_offset) * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
int tile = *tile_offset;
|
||||||
|
byte* buffer = (byte*)(OVERWORLD_BUFFER - (tile * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
||||||
totalYs >>= 1;
|
totalYs >>= 1;
|
||||||
int total = totalYs * 0x1C;
|
int total = totalYs * 0x1C;
|
||||||
for(int i = 0; i < total; i++)
|
int* topBufferValues = (int*)(&buffer[tile * 8]);
|
||||||
|
int* bottomBufferValues = topBufferValues + 0x40;
|
||||||
|
int* topTilePointer;
|
||||||
|
int* bottomTilePointer;
|
||||||
|
int* bits_to_nybbles_pointer = m2_bits_to_nybbles_fast;
|
||||||
|
int bits_to_nybbles_array[0x100];
|
||||||
|
//It's convenient to copy the table in IWRAM (about 0x400 cycles) only if we have more than 0x40 total tiles to copy ((total * 0x10 * 2) = total cycles used reading from EWRAM vs. (total * 0x10) + 0x400 = total cycles used writing to and reading from IWRAM)
|
||||||
|
//From a full copy it saves about 15k cycles
|
||||||
|
if(total > 0x40)
|
||||||
|
{
|
||||||
|
cpufastset(bits_to_nybbles_pointer, bits_to_nybbles_array, 0x100);
|
||||||
|
bits_to_nybbles_pointer = bits_to_nybbles_array;
|
||||||
|
}
|
||||||
|
int nextValue = 0x20;
|
||||||
|
int i = 0;
|
||||||
|
while(i < total)
|
||||||
{
|
{
|
||||||
//Not using functions for the tile values saves about 30k cycles on average
|
//Not using functions for the tile values saves about 30k cycles on average
|
||||||
int tile = m2_coord_table[i] + *tile_offset;
|
//Using pointers + a way to keep track of subsequent tiles saves 50k cycles on average from a full copy
|
||||||
int addedValue = (i >> 5) << 6;
|
//m2_coord_table_fast_progression has the tile number and the number of tiles used without interruction after it in a single short
|
||||||
int tile_buffer = (i & 0x1F) + addedValue + *tile_offset;
|
tile = m2_coord_table_fast_progression[i];
|
||||||
int* bufferValues = (int*)(&buffer[(tile_buffer * 8)]);
|
int remainingTiles = tile >> 0xB;
|
||||||
unsigned int first_half = bufferValues[0];
|
tile = (tile & 0x7FF) + (*tile_offset);
|
||||||
unsigned int second_half = bufferValues[1];
|
topTilePointer = &vram[(tile * 8)];
|
||||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
bottomTilePointer = topTilePointer + (0x20 * 8);
|
||||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
if(i == nextValue)
|
||||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
{
|
||||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
nextValue += 0x20;
|
||||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
topBufferValues += 0x40;
|
||||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
bottomBufferValues += 0x40;
|
||||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
}
|
||||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
i++;
|
||||||
//Do the tile right below (Saves about 50k cycles on average)
|
unsigned int first_half = *(topBufferValues++);
|
||||||
tile += 0x20;
|
unsigned int second_half = *(topBufferValues++);
|
||||||
bufferValues += 0x40;
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
first_half = bufferValues[0];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
second_half = bufferValues[1];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
first_half = *(bottomBufferValues++);
|
||||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
second_half = *(bottomBufferValues++);
|
||||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
|
||||||
|
while(remainingTiles > 0)
|
||||||
|
{
|
||||||
|
if(i == nextValue)
|
||||||
|
{
|
||||||
|
nextValue += 0x20;
|
||||||
|
topBufferValues += 0x40;
|
||||||
|
bottomBufferValues += 0x40;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
first_half = *(topBufferValues++);
|
||||||
|
second_half = *(topBufferValues++);
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
first_half = *(bottomBufferValues++);
|
||||||
|
second_half = *(bottomBufferValues++);
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
remainingTiles--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void store_pixels_overworld_buffer_totalTiles(int totalTiles)
|
void store_pixels_overworld_buffer_totalTiles(int totalTiles)
|
||||||
{
|
{
|
||||||
byte* buffer = (byte*)(OVERWORLD_BUFFER - ((*tile_offset) * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
int tile = *tile_offset;
|
||||||
for(int i = 0; i < totalTiles; i++)
|
byte* buffer = (byte*)(OVERWORLD_BUFFER - (tile * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
||||||
|
int* topBufferValues = (int*)(&buffer[tile * 8]);
|
||||||
|
int* bottomBufferValues = topBufferValues + 0x40;
|
||||||
|
int* topTilePointer;
|
||||||
|
int* bottomTilePointer;
|
||||||
|
int* bits_to_nybbles_pointer = m2_bits_to_nybbles_fast;
|
||||||
|
int bits_to_nybbles_array[0x100];
|
||||||
|
//It's convenient to copy the table in IWRAM (about 0x400 cycles) only if we have more than 0x40 total tiles to copy ((total * 0x10 * 2) = total cycles used reading from EWRAM vs. (total * 0x10) + 0x400 = total cycles used writing to and reading from IWRAM)
|
||||||
|
//From a full copy it saves about 15k cycles
|
||||||
|
if(totalTiles > 0x40)
|
||||||
|
{
|
||||||
|
cpufastset(bits_to_nybbles_pointer, bits_to_nybbles_array, 0x100);
|
||||||
|
bits_to_nybbles_pointer = bits_to_nybbles_array;
|
||||||
|
}
|
||||||
|
int nextValue = 0x20;
|
||||||
|
int i = 0;
|
||||||
|
while(i < totalTiles)
|
||||||
{
|
{
|
||||||
//Not using functions for the tile values saves about 30k cycles on average
|
//Not using functions for the tile values saves about 30k cycles on average
|
||||||
int tile = m2_coord_table[i] + *tile_offset;
|
//Using pointers + a way to keep track of subsequent tiles saves 50k cycles on average
|
||||||
int addedValue = (i >> 5) << 6;
|
//m2_coord_table_fast_progression has the tile number and the number of tiles used without interruction after it in a single short
|
||||||
int tile_buffer = (i & 0x1F) + addedValue + *tile_offset;
|
tile = m2_coord_table_fast_progression[i];
|
||||||
int* bufferValues = (int*)(&buffer[(tile_buffer * 8)]);
|
int remainingTiles = tile >> 0xB;
|
||||||
unsigned int first_half = bufferValues[0];
|
tile = (tile & 0x7FF) + (*tile_offset);
|
||||||
unsigned int second_half = bufferValues[1];
|
topTilePointer = &vram[(tile * 8)];
|
||||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
bottomTilePointer = topTilePointer + (0x20 * 8);
|
||||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
if(i == nextValue)
|
||||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
{
|
||||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
nextValue += 0x20;
|
||||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
topBufferValues += 0x40;
|
||||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
bottomBufferValues += 0x40;
|
||||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
}
|
||||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
i++;
|
||||||
//Do the tile right below (Saves about 50k cycles on average)
|
unsigned int first_half = *(topBufferValues++);
|
||||||
tile += 0x20;
|
unsigned int second_half = *(topBufferValues++);
|
||||||
bufferValues += 0x40;
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
first_half = bufferValues[0];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
second_half = bufferValues[1];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
first_half = *(bottomBufferValues++);
|
||||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
second_half = *(bottomBufferValues++);
|
||||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
|
||||||
|
while(remainingTiles > 0 && i < totalTiles)
|
||||||
|
{
|
||||||
|
if(i == nextValue)
|
||||||
|
{
|
||||||
|
nextValue += 0x20;
|
||||||
|
topBufferValues += 0x40;
|
||||||
|
bottomBufferValues += 0x40;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
first_half = *(topBufferValues++);
|
||||||
|
second_half = *(topBufferValues++);
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
|
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
first_half = *(bottomBufferValues++);
|
||||||
|
second_half = *(bottomBufferValues++);
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||||
|
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||||
|
remainingTiles--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -124,6 +124,7 @@ void load_pixels_overworld_buffer();
|
||||||
void store_pixels_overworld_buffer(int totalYs);
|
void store_pixels_overworld_buffer(int totalYs);
|
||||||
void store_pixels_overworld_buffer_totalTiles(int totalTiles);
|
void store_pixels_overworld_buffer_totalTiles(int totalTiles);
|
||||||
|
|
||||||
|
extern unsigned short m2_coord_table_fast_progression[];
|
||||||
extern unsigned short m2_coord_table[];
|
extern unsigned short m2_coord_table[];
|
||||||
extern byte m2_ness_name[];
|
extern byte m2_ness_name[];
|
||||||
extern int m2_bits_to_nybbles[];
|
extern int m2_bits_to_nybbles[];
|
||||||
|
|
Binary file not shown.
|
@ -1693,6 +1693,10 @@ m2_font_relocate:
|
||||||
m2_coord_table:
|
m2_coord_table:
|
||||||
.incbin "data/m2-coord-table.bin"
|
.incbin "data/m2-coord-table.bin"
|
||||||
|
|
||||||
|
// Co-ordinate table, version which has 5 bits used for how many consecutive tiles there are after each tile
|
||||||
|
m2_coord_table_fast_progression:
|
||||||
|
.incbin "data/m2-coord-table-fast-progression.bin"
|
||||||
|
|
||||||
// EB fonts
|
// EB fonts
|
||||||
m2_font_table:
|
m2_font_table:
|
||||||
dw m2_font_main
|
dw m2_font_main
|
||||||
|
|
Loading…
Reference in New Issue