Improve load_pixels_overworld_buffer performance

Uses a special function in order to speed things up by quite a bunch
This commit is contained in:
Lorenzooone 2019-09-24 22:13:40 +02:00
parent b372862942
commit 22c70883d8
2 changed files with 64 additions and 22 deletions

View File

@ -69,6 +69,66 @@ byte reduce_bit_depth(int row, int foregroundRow)
return lower | (upper << 4); return lower | (upper << 4);
} }
//The order is swapped in order to make this faster
void reduce_bit_depth_sp(int* TileRows, int* bufferValues)
{
const int foregroundRow = 0xFFFFFFFF;
int firstRow = *(TileRows++);
int secondRow = *(TileRows++);
int thirdRow = *(TileRows++);
int fourthRow = *(TileRows++);
firstRow ^= foregroundRow;
secondRow ^= foregroundRow;
thirdRow ^= foregroundRow;
fourthRow ^= foregroundRow;
int value = m2_nybbles_to_bits[(fourthRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF];
value <<= 4;
value = m2_nybbles_to_bits[(thirdRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF];
value <<= 4;
value = m2_nybbles_to_bits[(secondRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
value <<= 4;
value = m2_nybbles_to_bits[(firstRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
*(bufferValues++) = value;
//Second value
firstRow = *(TileRows++);
secondRow = *(TileRows++);
thirdRow = *(TileRows++);
fourthRow = *(TileRows);
firstRow ^= foregroundRow;
secondRow ^= foregroundRow;
thirdRow ^= foregroundRow;
fourthRow ^= foregroundRow;
value = m2_nybbles_to_bits[(fourthRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[fourthRow & 0xFFFF];
value <<= 4;
value = m2_nybbles_to_bits[(thirdRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[thirdRow & 0xFFFF];
value <<= 4;
value = m2_nybbles_to_bits[(secondRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[secondRow & 0xFFFF];
value <<= 4;
value = m2_nybbles_to_bits[(firstRow >> 16)];
value <<= 4;
value |= m2_nybbles_to_bits[firstRow & 0xFFFF];
*(bufferValues) = value;
}
byte getSex(byte character) byte getSex(byte character)
{ {
return character == 1 ? 1 : 0; //character 1 is Paula return character == 1 ? 1 : 0; //character 1 is Paula
@ -1897,30 +1957,11 @@ void load_pixels_overworld_buffer()
int foregroundRow = 0xFFFFFFFF; int foregroundRow = 0xFFFFFFFF;
//Reduce total amount of stores from 16 to 4 //Reduce total amount of stores from 16 to 4
int* bufferValues = (int*)(&buffer[(tile_buffer * 8)]); int* bufferValues = (int*)(&buffer[(tile_buffer * 8)]);
unsigned int first_half; //Using "reduce_bit_depth_sp" reduces the total amount of cycles from 300k to 76k
unsigned int second_half; reduce_bit_depth_sp(&vram[(tile * 8) + 0], bufferValues);
first_half = reduce_bit_depth(vram[(tile * 8) + 0], foregroundRow);
first_half |= reduce_bit_depth(vram[(tile * 8) + 1], foregroundRow) << 8;
first_half |= reduce_bit_depth(vram[(tile * 8) + 2], foregroundRow) << 0x10;
first_half |= reduce_bit_depth(vram[(tile * 8) + 3], foregroundRow) << 0x18;
second_half = reduce_bit_depth(vram[(tile * 8) + 4], foregroundRow);
second_half |= reduce_bit_depth(vram[(tile * 8) + 5], foregroundRow) << 8;
second_half |= reduce_bit_depth(vram[(tile * 8) + 6], foregroundRow) << 0x10;
second_half |= reduce_bit_depth(vram[(tile * 8) + 7], foregroundRow) << 0x18;
bufferValues[0] = first_half;
bufferValues[1] = second_half;
bufferValues += 0x40; bufferValues += 0x40;
tile += 0x20; tile += 0x20;
first_half = reduce_bit_depth(vram[(tile * 8) + 0], foregroundRow); reduce_bit_depth_sp(&vram[(tile * 8) + 0], bufferValues);
first_half |= reduce_bit_depth(vram[(tile * 8) + 1], foregroundRow) << 8;
first_half |= reduce_bit_depth(vram[(tile * 8) + 2], foregroundRow) << 0x10;
first_half |= reduce_bit_depth(vram[(tile * 8) + 3], foregroundRow) << 0x18;
second_half = reduce_bit_depth(vram[(tile * 8) + 4], foregroundRow);
second_half |= reduce_bit_depth(vram[(tile * 8) + 5], foregroundRow) << 8;
second_half |= reduce_bit_depth(vram[(tile * 8) + 6], foregroundRow) << 0x10;
second_half |= reduce_bit_depth(vram[(tile * 8) + 7], foregroundRow) << 0x18;
bufferValues[0] = first_half;
bufferValues[1] = second_half;
} }
} }

View File

@ -39,6 +39,7 @@ int count_pixels_to_tiles(byte *str, int length, int startingPos);
int count_pixels_to_tiles_normal_string(byte *str, int startingPos); int count_pixels_to_tiles_normal_string(byte *str, int startingPos);
int expand_bit_depth(byte row, byte foreground); int expand_bit_depth(byte row, byte foreground);
byte reduce_bit_depth(int row, int foregroundRow); byte reduce_bit_depth(int row, int foregroundRow);
void reduce_bit_depth_sp(int* TileRows, int* bufferValues);
byte print_character(byte chr, int x, int y); byte print_character(byte chr, int x, int y);
byte print_character_formatted(byte chr, int x, int y, int font, int foreground); byte print_character_formatted(byte chr, int x, int y, int font, int foreground);
byte print_character_to_window(byte chr, WINDOW* window); byte print_character_to_window(byte chr, WINDOW* window);