Improve 1bpp buffer storing performance by using an edited coord_table and (if useful) loading the m2_bits_to_nybbles table in IWRAM
This commit is contained in:
parent
24fbc3098e
commit
b372862942
244
src/c/vwf.c
244
src/c/vwf.c
|
@ -799,7 +799,7 @@ int print_menu_string(WINDOW* window)
|
|||
break;
|
||||
default:
|
||||
looping = false;
|
||||
window->menu_text = NULL; //Otherwise it will keep printing indefinetly
|
||||
window->menu_text = NULL; //Otherwise it will keep printing indefinetly
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1518,14 +1518,14 @@ int highlight_string(WINDOW* window, byte* str, unsigned short x, unsigned short
|
|||
//Highlights "Talk to"
|
||||
void highlight_talk_to()
|
||||
{
|
||||
char Talk_to[] = "Talk to";
|
||||
byte str[0xA];
|
||||
int i;
|
||||
for(i = 0; i < (sizeof(Talk_to) - 1); i++)
|
||||
str[i] = encode_ascii(Talk_to[i]);
|
||||
str[i++] = 0;
|
||||
str[i] = 0xFF;
|
||||
highlight_string(getWindow(0), str, 1, 0, true);
|
||||
char Talk_to[] = "Talk to";
|
||||
byte str[0xA];
|
||||
int i;
|
||||
for(i = 0; i < (sizeof(Talk_to) - 1); i++)
|
||||
str[i] = encode_ascii(Talk_to[i]);
|
||||
str[i++] = 0;
|
||||
str[i] = 0xFF;
|
||||
highlight_string(getWindow(0), str, 1, 0, true);
|
||||
}
|
||||
|
||||
unsigned short printstr_hlight_buffer(WINDOW* window, byte* str, unsigned short x, unsigned short y, bool highlight)
|
||||
|
@ -1926,75 +1926,185 @@ void load_pixels_overworld_buffer()
|
|||
|
||||
void store_pixels_overworld_buffer(int totalYs)
|
||||
{
|
||||
byte* buffer = (byte*)(OVERWORLD_BUFFER - ((*tile_offset) * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
||||
int tile = *tile_offset;
|
||||
byte* buffer = (byte*)(OVERWORLD_BUFFER - (tile * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
||||
totalYs >>= 1;
|
||||
int total = totalYs * 0x1C;
|
||||
for(int i = 0; i < total; i++)
|
||||
int* topBufferValues = (int*)(&buffer[tile * 8]);
|
||||
int* bottomBufferValues = topBufferValues + 0x40;
|
||||
int* topTilePointer;
|
||||
int* bottomTilePointer;
|
||||
int* bits_to_nybbles_pointer = m2_bits_to_nybbles_fast;
|
||||
int bits_to_nybbles_array[0x100];
|
||||
//It's convenient to copy the table in IWRAM (about 0x400 cycles) only if we have more than 0x40 total tiles to copy ((total * 0x10 * 2) = total cycles used reading from EWRAM vs. (total * 0x10) + 0x400 = total cycles used writing to and reading from IWRAM)
|
||||
//From a full copy it saves about 15k cycles
|
||||
if(total > 0x40)
|
||||
{
|
||||
cpufastset(bits_to_nybbles_pointer, bits_to_nybbles_array, 0x100);
|
||||
bits_to_nybbles_pointer = bits_to_nybbles_array;
|
||||
}
|
||||
int nextValue = 0x20;
|
||||
int i = 0;
|
||||
while(i < total)
|
||||
{
|
||||
//Not using functions for the tile values saves about 30k cycles on average
|
||||
int tile = m2_coord_table[i] + *tile_offset;
|
||||
int addedValue = (i >> 5) << 6;
|
||||
int tile_buffer = (i & 0x1F) + addedValue + *tile_offset;
|
||||
int* bufferValues = (int*)(&buffer[(tile_buffer * 8)]);
|
||||
unsigned int first_half = bufferValues[0];
|
||||
unsigned int second_half = bufferValues[1];
|
||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
||||
//Do the tile right below (Saves about 50k cycles on average)
|
||||
tile += 0x20;
|
||||
bufferValues += 0x40;
|
||||
first_half = bufferValues[0];
|
||||
second_half = bufferValues[1];
|
||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
||||
//Using pointers + a way to keep track of subsequent tiles saves 50k cycles on average from a full copy
|
||||
//m2_coord_table_fast_progression has the tile number and the number of tiles used without interruction after it in a single short
|
||||
tile = m2_coord_table_fast_progression[i];
|
||||
int remainingTiles = tile >> 0xB;
|
||||
tile = (tile & 0x7FF) + (*tile_offset);
|
||||
topTilePointer = &vram[(tile * 8)];
|
||||
bottomTilePointer = topTilePointer + (0x20 * 8);
|
||||
if(i == nextValue)
|
||||
{
|
||||
nextValue += 0x20;
|
||||
topBufferValues += 0x40;
|
||||
bottomBufferValues += 0x40;
|
||||
}
|
||||
i++;
|
||||
unsigned int first_half = *(topBufferValues++);
|
||||
unsigned int second_half = *(topBufferValues++);
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
first_half = *(bottomBufferValues++);
|
||||
second_half = *(bottomBufferValues++);
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
|
||||
while(remainingTiles > 0)
|
||||
{
|
||||
if(i == nextValue)
|
||||
{
|
||||
nextValue += 0x20;
|
||||
topBufferValues += 0x40;
|
||||
bottomBufferValues += 0x40;
|
||||
}
|
||||
i++;
|
||||
first_half = *(topBufferValues++);
|
||||
second_half = *(topBufferValues++);
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
first_half = *(bottomBufferValues++);
|
||||
second_half = *(bottomBufferValues++);
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
remainingTiles--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void store_pixels_overworld_buffer_totalTiles(int totalTiles)
|
||||
{
|
||||
byte* buffer = (byte*)(OVERWORLD_BUFFER - ((*tile_offset) * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
||||
for(int i = 0; i < totalTiles; i++)
|
||||
int tile = *tile_offset;
|
||||
byte* buffer = (byte*)(OVERWORLD_BUFFER - (tile * TILESET_OFFSET_BUFFER_MULTIPLIER));
|
||||
int* topBufferValues = (int*)(&buffer[tile * 8]);
|
||||
int* bottomBufferValues = topBufferValues + 0x40;
|
||||
int* topTilePointer;
|
||||
int* bottomTilePointer;
|
||||
int* bits_to_nybbles_pointer = m2_bits_to_nybbles_fast;
|
||||
int bits_to_nybbles_array[0x100];
|
||||
//It's convenient to copy the table in IWRAM (about 0x400 cycles) only if we have more than 0x40 total tiles to copy ((total * 0x10 * 2) = total cycles used reading from EWRAM vs. (total * 0x10) + 0x400 = total cycles used writing to and reading from IWRAM)
|
||||
//From a full copy it saves about 15k cycles
|
||||
if(totalTiles > 0x40)
|
||||
{
|
||||
cpufastset(bits_to_nybbles_pointer, bits_to_nybbles_array, 0x100);
|
||||
bits_to_nybbles_pointer = bits_to_nybbles_array;
|
||||
}
|
||||
int nextValue = 0x20;
|
||||
int i = 0;
|
||||
while(i < totalTiles)
|
||||
{
|
||||
//Not using functions for the tile values saves about 30k cycles on average
|
||||
int tile = m2_coord_table[i] + *tile_offset;
|
||||
int addedValue = (i >> 5) << 6;
|
||||
int tile_buffer = (i & 0x1F) + addedValue + *tile_offset;
|
||||
int* bufferValues = (int*)(&buffer[(tile_buffer * 8)]);
|
||||
unsigned int first_half = bufferValues[0];
|
||||
unsigned int second_half = bufferValues[1];
|
||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
||||
//Do the tile right below (Saves about 50k cycles on average)
|
||||
tile += 0x20;
|
||||
bufferValues += 0x40;
|
||||
first_half = bufferValues[0];
|
||||
second_half = bufferValues[1];
|
||||
vram[(tile * 8) + 0] = m2_bits_to_nybbles_fast[(first_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 1] = m2_bits_to_nybbles_fast[(first_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 2] = m2_bits_to_nybbles_fast[(first_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 3] = m2_bits_to_nybbles_fast[(first_half >> 0x18) & 0xFF];
|
||||
vram[(tile * 8) + 4] = m2_bits_to_nybbles_fast[(second_half >> 0) & 0xFF];
|
||||
vram[(tile * 8) + 5] = m2_bits_to_nybbles_fast[(second_half >> 8) & 0xFF];
|
||||
vram[(tile * 8) + 6] = m2_bits_to_nybbles_fast[(second_half >> 0x10) & 0xFF];
|
||||
vram[(tile * 8) + 7] = m2_bits_to_nybbles_fast[(second_half >> 0x18) & 0xFF];
|
||||
//Using pointers + a way to keep track of subsequent tiles saves 50k cycles on average
|
||||
//m2_coord_table_fast_progression has the tile number and the number of tiles used without interruction after it in a single short
|
||||
tile = m2_coord_table_fast_progression[i];
|
||||
int remainingTiles = tile >> 0xB;
|
||||
tile = (tile & 0x7FF) + (*tile_offset);
|
||||
topTilePointer = &vram[(tile * 8)];
|
||||
bottomTilePointer = topTilePointer + (0x20 * 8);
|
||||
if(i == nextValue)
|
||||
{
|
||||
nextValue += 0x20;
|
||||
topBufferValues += 0x40;
|
||||
bottomBufferValues += 0x40;
|
||||
}
|
||||
i++;
|
||||
unsigned int first_half = *(topBufferValues++);
|
||||
unsigned int second_half = *(topBufferValues++);
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
first_half = *(bottomBufferValues++);
|
||||
second_half = *(bottomBufferValues++);
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
|
||||
while(remainingTiles > 0 && i < totalTiles)
|
||||
{
|
||||
if(i == nextValue)
|
||||
{
|
||||
nextValue += 0x20;
|
||||
topBufferValues += 0x40;
|
||||
bottomBufferValues += 0x40;
|
||||
}
|
||||
i++;
|
||||
first_half = *(topBufferValues++);
|
||||
second_half = *(topBufferValues++);
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(topTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
first_half = *(bottomBufferValues++);
|
||||
second_half = *(bottomBufferValues++);
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 8) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x10) & 0xFF];
|
||||
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0x18) & 0xFF];
|
||||
remainingTiles--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -124,6 +124,7 @@ void load_pixels_overworld_buffer();
|
|||
void store_pixels_overworld_buffer(int totalYs);
|
||||
void store_pixels_overworld_buffer_totalTiles(int totalTiles);
|
||||
|
||||
extern unsigned short m2_coord_table_fast_progression[];
|
||||
extern unsigned short m2_coord_table[];
|
||||
extern byte m2_ness_name[];
|
||||
extern int m2_bits_to_nybbles[];
|
||||
|
|
Binary file not shown.
|
@ -1693,6 +1693,10 @@ m2_font_relocate:
|
|||
m2_coord_table:
|
||||
.incbin "data/m2-coord-table.bin"
|
||||
|
||||
// Co-ordinate table, version which has 5 bits used for how many consecutive tiles there are after each tile
|
||||
m2_coord_table_fast_progression:
|
||||
.incbin "data/m2-coord-table-fast-progression.bin"
|
||||
|
||||
// EB fonts
|
||||
m2_font_table:
|
||||
dw m2_font_main
|
||||
|
|
Loading…
Reference in New Issue