Lower memory usage for printing buffer by 1/4th

Fixes https://github.com/jeffman/Mother2GbaTranslation/issues/131
The loading process now takes 1/10th more time
This commit is contained in:
Lorenzo Carletti 2021-08-16 18:54:00 +02:00
parent 188ac3ae75
commit 0e25bdf8da
2 changed files with 72 additions and 44 deletions

View File

@ -71,10 +71,9 @@ byte reduce_bit_depth(int row, int foregroundRow)
//The order is swapped in order to make this faster //The order is swapped in order to make this faster
//Doing the bottom tile directly saves some cycles //Doing the bottom tile directly saves some cycles
void reduce_bit_depth_sp(int* TileRows, int* bufferValues) void reduce_bit_depth_sp(int* TileRows, int* bufferValues, int* bottomBufferValues)
{ {
int* bottomTileRows = TileRows + (0x20 * 8); int* bottomTileRows = TileRows + (0x20 * 8);
int* bottomBufferValues = bufferValues + 0x40;
const int andValue = 0x11111111; const int andValue = 0x11111111;
//First value //First value
@ -478,6 +477,19 @@ byte print_character_with_callback(byte chr, int x, int y, int font, int foregro
return virtualWidth; return virtualWidth;
} }
int convert_tile_address_buffer(int tile)
{
//Allows using tiles with different size to make the buffer smaller
tile -= *tile_offset;
int inner_line = tile & 0x3F;
int lines = (tile >> 6) << 5;
int returned_address = ((*tile_offset) * TILESET_OFFSET_BUFFER_MULTIPLIER) + (lines * DEFAULT_DOUBLE_TILE_HEIGHT);
if(inner_line >= 0x20)
return returned_address + (0x20 * 8) + ((inner_line & 0x1F) * (DEFAULT_DOUBLE_TILE_HEIGHT - 8));
return returned_address + (inner_line * 8);
}
byte print_character_with_callback_1bpp_buffer(byte chr, int x, int y, byte *dest, int (*getTileCallback)(int, int), int font, byte print_character_with_callback_1bpp_buffer(byte chr, int x, int y, byte *dest, int (*getTileCallback)(int, int), int font,
unsigned short *tilemapPtr, int tilemapWidth, byte doubleTileHeight) unsigned short *tilemapPtr, int tilemapWidth, byte doubleTileHeight)
{ {
@ -525,7 +537,7 @@ byte print_character_with_callback_1bpp_buffer(byte chr, int x, int y, byte *des
for (int row = 0; row < 8; row++) for (int row = 0; row < 8; row++)
{ {
unsigned short canvasRow = dest[(realTileIndex * 8) + ((row + offsetY - sumRemoved) & 7)] + (dest[(realTileIndexRight * 8) + ((row + offsetY - sumRemoved) & 7)] << 8); unsigned short canvasRow = dest[convert_tile_address_buffer(realTileIndex) + ((row + offsetY - sumRemoved) & 7)] + (dest[convert_tile_address_buffer(realTileIndexRight) + ((row + offsetY - sumRemoved) & 7)] << 8);
unsigned short glyphRow = glyphRows[row + (dTileY * 8 * tileWidth) + (dTileX * 8)] << offsetX; unsigned short glyphRow = glyphRows[row + (dTileY * 8 * tileWidth) + (dTileX * 8)] << offsetX;
unsigned short tmpCanvasRow = canvasRow; unsigned short tmpCanvasRow = canvasRow;
@ -534,8 +546,8 @@ byte print_character_with_callback_1bpp_buffer(byte chr, int x, int y, byte *des
if(!availableSwap && ((row + offsetY) >> 3) == 1 && canvasRow != tmpCanvasRow) //This changed the canvas, then it's useful... IF it's the extra vertical tile if(!availableSwap && ((row + offsetY) >> 3) == 1 && canvasRow != tmpCanvasRow) //This changed the canvas, then it's useful... IF it's the extra vertical tile
useful = true; useful = true;
dest[(realTileIndex * 8) + ((row + offsetY - sumRemoved) & 7)] = canvasRow; dest[convert_tile_address_buffer(realTileIndex) + ((row + offsetY - sumRemoved) & 7)] = canvasRow;
dest[(realTileIndexRight * 8) + ((row + offsetY - sumRemoved) & 7)] = canvasRow >> 8; dest[convert_tile_address_buffer(realTileIndexRight) + ((row + offsetY - sumRemoved) & 7)] = canvasRow >> 8;
if((row + offsetY - sumRemoved) == limit) if((row + offsetY - sumRemoved) == limit)
{ {
@ -1738,9 +1750,10 @@ void clear_tile_buffer(int x, int y, byte* dest)
{ {
// Clear pixels // Clear pixels
int tileIndex = get_tile_number_buffer(x, y) + *tile_offset; int tileIndex = get_tile_number_buffer(x, y) + *tile_offset;
int *destTileInt = (int*)&dest[(tileIndex) * 8]; int *destTileInt = (int*)&dest[convert_tile_address_buffer(tileIndex)];
destTileInt[0] = 0; destTileInt[0] = 0;
destTileInt[1] = 0; if(((tileIndex - *tile_offset) & 0x20) == 0)
destTileInt[1] = 0;
// Reset the tilemap (e.g. get rid of equip or SMAAAASH!! tiles) // Reset the tilemap (e.g. get rid of equip or SMAAAASH!! tiles)
(*tilemap_pointer)[x + (y * 32)] = (get_tile_number(x, y) + (*tile_offset)) | *palette_mask; (*tilemap_pointer)[x + (y * 32)] = (get_tile_number(x, y) + (*tile_offset)) | *palette_mask;
@ -1939,6 +1952,27 @@ void printCashWindow()
m2_sub_d3c50(); m2_sub_d3c50();
} }
void eb_cartridge_palette_change(bool background)
{
unsigned short *paletteDest = (unsigned short*)0x5000040;
if(background)
{
if(BUILD_PALETTE)
{
//Makes the game do the palette work. Copy the result in a bin file and use that instead in order to make the swap fast
unsigned short palettes[0x50];
cpuset(paletteDest, palettes, 0x50);
for(int i = 0; i < 5; i++)
m12_dim_palette(&palettes[i * 0x10], 0x10, 0x800);
cpuset(palettes, paletteDest, 0x50);
}
else
cpuset(m12_cartridge_palettes_dimmed, paletteDest, 0x50);
}
else
cpuset(&m12_cartridge_palettes[0x20], paletteDest, 0x50);
}
// x, y, width: tile coordinates // x, y, width: tile coordinates
void print_blankstr_buffer(int x, int y, int width, byte *dest) void print_blankstr_buffer(int x, int y, int width, byte *dest)
{ {
@ -2008,6 +2042,7 @@ void load_pixels_overworld_buffer()
int tile = *tile_offset; int tile = *tile_offset;
byte* buffer = (byte*)(OVERWORLD_BUFFER - (tile * TILESET_OFFSET_BUFFER_MULTIPLIER)); byte* buffer = (byte*)(OVERWORLD_BUFFER - (tile * TILESET_OFFSET_BUFFER_MULTIPLIER));
int* topBufferValues = (int*)(&buffer[tile * 8]); int* topBufferValues = (int*)(&buffer[tile * 8]);
int* bottomBufferValues = topBufferValues + 0x40;
int* topTilePointer; int* topTilePointer;
int nextValue = 0x20; int nextValue = 0x20;
int i = 0; int i = 0;
@ -2021,24 +2056,28 @@ void load_pixels_overworld_buffer()
if(i == nextValue) if(i == nextValue)
{ {
nextValue += 0x20; nextValue += 0x20;
topBufferValues += 0x40; topBufferValues = bottomBufferValues;
bottomBufferValues += 0x40;
} }
i++; i++;
//Using "reduce_bit_depth_sp" reduced the total amount of cycles from 300k to 162k //Using "reduce_bit_depth_sp" reduced the total amount of cycles from 300k to 162k
reduce_bit_depth_sp(topTilePointer, topBufferValues); reduce_bit_depth_sp(topTilePointer, topBufferValues, bottomBufferValues);
topTilePointer += 8; topTilePointer += 8;
topBufferValues += 2; topBufferValues += 2;
bottomBufferValues++;
while(remainingTiles > 0) while(remainingTiles > 0)
{ {
if(i == nextValue) if(i == nextValue)
{ {
nextValue += 0x20; nextValue += 0x20;
topBufferValues += 0x40; topBufferValues = bottomBufferValues;
bottomBufferValues += 0x40;
} }
i++; i++;
reduce_bit_depth_sp(topTilePointer, topBufferValues); reduce_bit_depth_sp(topTilePointer, topBufferValues, bottomBufferValues);
topTilePointer += 8; topTilePointer += 8;
topBufferValues += 2; topBufferValues += 2;
bottomBufferValues++;
remainingTiles--; remainingTiles--;
} }
} }
@ -2078,7 +2117,7 @@ void store_pixels_overworld_buffer(int totalYs)
if(i == nextValue) if(i == nextValue)
{ {
nextValue += 0x20; nextValue += 0x20;
topBufferValues += 0x40; topBufferValues += 0x20;
bottomBufferValues += 0x40; bottomBufferValues += 0x40;
} }
i++; i++;
@ -2099,7 +2138,6 @@ void store_pixels_overworld_buffer(int totalYs)
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
//Since those are unused //Since those are unused
bottomBufferValues++;
bottomTilePointer += 4; bottomTilePointer += 4;
/* The game doesn't use these /* The game doesn't use these
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
@ -2113,7 +2151,7 @@ void store_pixels_overworld_buffer(int totalYs)
if(i == nextValue) if(i == nextValue)
{ {
nextValue += 0x20; nextValue += 0x20;
topBufferValues += 0x40; topBufferValues += 0x20;
bottomBufferValues += 0x40; bottomBufferValues += 0x40;
} }
i++; i++;
@ -2134,7 +2172,6 @@ void store_pixels_overworld_buffer(int totalYs)
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
//Since those are unused //Since those are unused
bottomBufferValues++;
bottomTilePointer += 4; bottomTilePointer += 4;
/* The game doesn't use these /* The game doesn't use these
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
@ -2147,27 +2184,6 @@ void store_pixels_overworld_buffer(int totalYs)
} }
} }
void eb_cartridge_palette_change(bool background)
{
unsigned short *paletteDest = (unsigned short*)0x5000040;
if(background)
{
if(BUILD_PALETTE)
{
//Makes the game do the palette work. Copy the result in a bin file and use that instead in order to make the swap fast
unsigned short palettes[0x50];
cpuset(paletteDest, palettes, 0x50);
for(int i = 0; i < 5; i++)
m12_dim_palette(&palettes[i * 0x10], 0x10, 0x800);
cpuset(palettes, paletteDest, 0x50);
}
else
cpuset(m12_cartridge_palettes_dimmed, paletteDest, 0x50);
}
else
cpuset(&m12_cartridge_palettes[0x20], paletteDest, 0x50);
}
void store_pixels_overworld_buffer_totalTiles(int totalTiles) void store_pixels_overworld_buffer_totalTiles(int totalTiles)
{ {
int tile = *tile_offset; int tile = *tile_offset;
@ -2200,7 +2216,7 @@ void store_pixels_overworld_buffer_totalTiles(int totalTiles)
if(i == nextValue) if(i == nextValue)
{ {
nextValue += 0x20; nextValue += 0x20;
topBufferValues += 0x40; topBufferValues += 0x20;
bottomBufferValues += 0x40; bottomBufferValues += 0x40;
} }
i++; i++;
@ -2221,7 +2237,6 @@ void store_pixels_overworld_buffer_totalTiles(int totalTiles)
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
//Since those are unused //Since those are unused
bottomBufferValues++;
bottomTilePointer += 4; bottomTilePointer += 4;
/* The game doesn't use these /* The game doesn't use these
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
@ -2235,7 +2250,7 @@ void store_pixels_overworld_buffer_totalTiles(int totalTiles)
if(i == nextValue) if(i == nextValue)
{ {
nextValue += 0x20; nextValue += 0x20;
topBufferValues += 0x40; topBufferValues += 0x20;
bottomBufferValues += 0x40; bottomBufferValues += 0x40;
} }
i++; i++;
@ -2256,7 +2271,6 @@ void store_pixels_overworld_buffer_totalTiles(int totalTiles)
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x10) & 0xFF];
*(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(first_half >> 0x18) & 0xFF];
//Since those are unused //Since those are unused
bottomBufferValues++;
bottomTilePointer += 4; bottomTilePointer += 4;
/* The game doesn't use these /* The game doesn't use these
*(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF]; *(bottomTilePointer++) = bits_to_nybbles_pointer[(second_half >> 0) & 0xFF];
@ -2280,10 +2294,23 @@ void copy_tile_buffer(int xSource, int ySource, int xDest, int yDest, byte *dest
{ {
int sourceTileIndex = get_tile_number_buffer(xSource, ySource) + *tile_offset; int sourceTileIndex = get_tile_number_buffer(xSource, ySource) + *tile_offset;
int destTileIndex = get_tile_number_buffer(xDest, yDest) + *tile_offset; int destTileIndex = get_tile_number_buffer(xDest, yDest) + *tile_offset;
int* sourceTile = (int*)&dest[sourceTileIndex * 8]; int* sourceTile = (int*)&dest[convert_tile_address_buffer(sourceTileIndex)];
int* destTile = (int*)&dest[destTileIndex * 8]; int* destTile = (int*)&dest[convert_tile_address_buffer(destTileIndex)];
//Copy the first part, no matter what
destTile[0] = sourceTile[0]; destTile[0] = sourceTile[0];
destTile[1] = sourceTile[1]; //Handle the 4 different cases
if(((sourceTileIndex - *tile_offset) & 0x20) == 0)
{
if(((destTileIndex - *tile_offset) & 0x20) == 0)
destTile[1] = sourceTile[1];
}
else
{
if(((destTileIndex - *tile_offset) & 0x20) == 0)
destTile[1] = 0;
}
} }
// x,y: tile coordinates // x,y: tile coordinates

View File

@ -8,6 +8,7 @@
#define FALSE 0 #define FALSE 0
#define TILESET_OFFSET_BUFFER_MULTIPLIER 0x8 #define TILESET_OFFSET_BUFFER_MULTIPLIER 0x8
#define DEFAULT_DOUBLE_TILE_HEIGHT 0xC
#define CHAR_OFFSET 0x50 #define CHAR_OFFSET 0x50
#define CHAR_END 0x60 #define CHAR_END 0x60
#define YOUWON_START 0x64 #define YOUWON_START 0x64
@ -41,7 +42,7 @@ int count_pixels_to_tiles(byte *str, int length, int startingPos);
int count_pixels_to_tiles_normal_string(byte *str, int startingPos); int count_pixels_to_tiles_normal_string(byte *str, int startingPos);
int expand_bit_depth(byte row, byte foreground); int expand_bit_depth(byte row, byte foreground);
byte reduce_bit_depth(int row, int foregroundRow); byte reduce_bit_depth(int row, int foregroundRow);
void reduce_bit_depth_sp(int* TileRows, int* bufferValues); void reduce_bit_depth_sp(int* TileRows, int* bufferValues, int* bottomBufferValues);
byte print_character(byte chr, int x, int y); byte print_character(byte chr, int x, int y);
byte print_character_formatted(byte chr, int x, int y, int font, int foreground); byte print_character_formatted(byte chr, int x, int y, int font, int foreground);
byte print_character_to_window(byte chr, WINDOW* window); byte print_character_to_window(byte chr, WINDOW* window);