551 lines
13 KiB
NASM
551 lines
13 KiB
NASM
m2_vwf:
|
|
|
|
//==============================================================================
|
|
// int get_tile_number(int x, int y)
|
|
// In:
|
|
// r0: x
|
|
// r1: y
|
|
// Out:
|
|
// r0: tile number
|
|
//==============================================================================
|
|
|
|
.get_tile_number:
|
|
|
|
push {r1-r5,lr}
|
|
ldr r4,=#m2_coord_table
|
|
sub r0,r0,#1
|
|
sub r1,r1,#1
|
|
lsl r2,r1,#0x1F
|
|
lsr r2,r2,#0x1F
|
|
lsr r1,r1,#1
|
|
lsl r5,r1,#4
|
|
sub r5,r5,r1
|
|
sub r5,r5,r1
|
|
lsl r5,r5,#2
|
|
lsl r0,r0,#1
|
|
add r4,r4,r0
|
|
add r4,r4,r5
|
|
ldrh r0,[r4,#0]
|
|
lsl r2,r2,#5
|
|
add r0,r0,r2
|
|
pop {r1-r5,pc}
|
|
|
|
|
|
//==============================================================================
|
|
// void weld_entry(WINDOW* window, byte* chr)
|
|
// In:
|
|
// r0: address of window data
|
|
// r1: address of char to print
|
|
//==============================================================================
|
|
|
|
//--------------------------------
|
|
.weld_entry:
|
|
push {r0-r7,lr}
|
|
add sp,#-28
|
|
mov r5,r0
|
|
|
|
//--------------------------------
|
|
// Get the char
|
|
ldrb r0,[r1,#0]
|
|
sub r0,#0x50
|
|
bpl +
|
|
mov r0,#0x1F // Replace char with ? if it's invalid
|
|
b .char_custom
|
|
+
|
|
cmp r0,#0x60
|
|
bcc .char_custom
|
|
mov r0,#0x1F
|
|
|
|
.char_custom:
|
|
str r0,[sp,#0x0]
|
|
// [sp+0] = char
|
|
|
|
//--------------------------------
|
|
// Get the current X
|
|
ldrh r1,[r5,#0x22]
|
|
ldrh r2,[r5,#0x2A]
|
|
add r1,r1,r2
|
|
str r1,[sp,#4]
|
|
lsl r0,r1,#3
|
|
ldrh r1,[r5,#2]
|
|
str r1,[sp,#24]
|
|
add r0,r0,r1 // Current pixel X
|
|
str r0,[sp,#20]
|
|
|
|
// Get the current Y
|
|
ldrh r1,[r5,#0x24]
|
|
ldrh r3,[r5,#0x2C]
|
|
add r1,r1,r3
|
|
str r1,[sp,#8]
|
|
lsl r1,r1,#3
|
|
|
|
//--------------------------------
|
|
// Print
|
|
ldr r2,[sp,#0x0]
|
|
mov r3,#0
|
|
bl .print_character
|
|
str r0,[sp,#12]
|
|
|
|
//--------------------------------
|
|
// Figure out new window coords
|
|
ldr r0,[sp,#20]
|
|
ldr r1,[sp,#12]
|
|
add r0,r0,r1
|
|
|
|
// Store new window coords
|
|
lsr r1,r0,#3
|
|
ldrh r2,[r5,#0x22]
|
|
sub r1,r1,r2
|
|
strh r1,[r5,#0x2A]
|
|
|
|
// Store new pixel X
|
|
lsl r0,r0,#29
|
|
lsr r0,r0,#29
|
|
strh r0,[r5,#2]
|
|
|
|
//--------------------------------
|
|
add sp,#28
|
|
pop {r0-r7,pc}
|
|
|
|
|
|
//=============================================================================
|
|
// void print_character(int x, int y, int chr, int font)
|
|
// In:
|
|
// r0: x (pixel)
|
|
// r1: y (pixel)
|
|
// r2: character
|
|
// r3: font
|
|
// 0: main
|
|
// 1: saturn
|
|
// 2: tiny
|
|
// Out:
|
|
// r0: virtual width
|
|
//=============================================================================
|
|
|
|
.print_character:
|
|
|
|
push {r1-r7,lr}
|
|
mov r4,r8
|
|
mov r5,r9
|
|
mov r6,r10
|
|
mov r7,r11
|
|
push {r4-r7}
|
|
mov r4,r12
|
|
push {r4}
|
|
add sp,#-24
|
|
|
|
mov r10,r0
|
|
mov r11,r1
|
|
mov r12,r2
|
|
mov r5,r3
|
|
|
|
//----------------------------------------
|
|
ldr r3,=#0x30051EC
|
|
ldrh r4,[r3,#0] // Tile offset
|
|
add r3,#0x3C
|
|
ldrh r6,[r3,#0] // Palette mask
|
|
add r3,#0x48
|
|
ldr r7,[r3,#0] // Tilemap address
|
|
lsr r0,r0,#3
|
|
lsr r1,r1,#3
|
|
lsl r1,r1,#5
|
|
add r0,r0,r1
|
|
lsl r0,r0,#1
|
|
add r7,r7,r0 // Local tilemap address
|
|
mov r8,r4
|
|
|
|
//----------------------------------------
|
|
ldr r0,=#m2_widths_table
|
|
lsl r1,r5,#2 // Font number * 4
|
|
ldr r0,[r0,r1]
|
|
mov r3,r12 // Character
|
|
lsl r2,r3,#1
|
|
ldrb r1,[r0,r2] // Virtual width
|
|
mov r9,r1
|
|
add r2,r2,#1
|
|
ldrb r0,[r0,r2] // Render width
|
|
cmp r0,#0
|
|
beq + // Don't bother rendering a zero-width character
|
|
ldr r2,=#m2_height_table
|
|
ldrb r2,[r2,r5]
|
|
str r2,[sp,#16] // No more registers, gotta store this on the stack
|
|
mov r3,sp
|
|
strb r0,[r3,#9]
|
|
strb r2,[r3,#12]
|
|
mov r1,r10
|
|
lsl r1,r1,#29
|
|
lsr r1,r1,#29
|
|
strb r1,[r3,#8]
|
|
mov r1,#4
|
|
strb r1,[r3,#10]
|
|
mov r1,#0xF
|
|
strb r1,[r3,#11]
|
|
|
|
//----------------------------------------
|
|
mov r0,r10
|
|
mov r1,r11
|
|
lsr r0,r0,#3
|
|
lsr r1,r1,#3
|
|
bl .get_tile_number
|
|
add r4,r0,r4
|
|
lsl r0,r4,#5
|
|
mov r1,#6
|
|
lsl r1,r1,#0x18
|
|
add r0,r0,r1 // VRAM address
|
|
str r0,[sp,#0]
|
|
|
|
//----------------------------------------
|
|
ldr r0,=#m2_font_table
|
|
lsl r1,r5,#2
|
|
ldr r0,[r0,r1]
|
|
mov r1,r12
|
|
lsl r1,r1,#5
|
|
add r0,r0,r1 // Glyph address
|
|
str r0,[sp,#4]
|
|
|
|
//----------------------------------------
|
|
// Render left portion
|
|
mov r0,sp
|
|
bl .print_left
|
|
|
|
//----------------------------------------
|
|
// Update the map
|
|
orr r4,r6
|
|
mov r1,r7
|
|
-
|
|
strh r4,[r1,#0]
|
|
add r4,#0x20
|
|
add r1,#0x40
|
|
sub r2,r2,#1
|
|
bne -
|
|
add r7,r7,#2
|
|
|
|
//----------------------------------------
|
|
// Now we've rendered the left portion;
|
|
// we need to determine whether or not to render the right portion
|
|
ldrb r1,[r0,#8] // VRAM x offset
|
|
str r1,[sp,#20] // No more registers, gotta store this on the stack
|
|
ldrb r2,[r0,#9] // Render width
|
|
add r2,r1,r2
|
|
cmp r2,#8
|
|
bls +
|
|
|
|
// We still have more to render; figure out how much we already rendered
|
|
mov r3,#8
|
|
sub r3,r3,r1
|
|
strb r3,[r0,#8]
|
|
|
|
// Allocate a new tile
|
|
mov r0,r10
|
|
mov r1,r11
|
|
lsr r0,r0,#3
|
|
add r0,r0,#1
|
|
lsr r1,r1,#3
|
|
bl .get_tile_number
|
|
add r0,r8
|
|
mov r4,r0
|
|
lsl r0,r0,#5
|
|
mov r1,#6
|
|
lsl r1,r1,#0x18
|
|
add r0,r0,r1
|
|
str r0,[sp,#0]
|
|
mov r0,sp
|
|
bl .print_right
|
|
|
|
//----------------------------------------
|
|
// Update the map
|
|
orr r4,r6
|
|
mov r1,r7
|
|
ldr r2,[sp,#16]
|
|
-
|
|
strh r4,[r1,#0]
|
|
add r4,#0x20
|
|
add r1,#0x40
|
|
sub r2,r2,#1
|
|
bne -
|
|
add r7,r7,#2
|
|
|
|
//----------------------------------------
|
|
// Now we've rendered the left and right portions;
|
|
// we need to determin whether or not to do a final
|
|
// right portion for super wide characters
|
|
ldr r1,[sp,#20] // Original pixel X offset
|
|
ldrb r2,[r0,#9] // Render width
|
|
add r2,r1,r2 // Right side of glyph
|
|
cmp r2,#16
|
|
bls +
|
|
|
|
// We have one more chunk to render; figure out how much we already rendered
|
|
mov r3,#16
|
|
sub r3,r3,r1
|
|
strb r3,[r0,#8]
|
|
|
|
// Allocate a new tile
|
|
mov r0,r10
|
|
mov r1,r11
|
|
lsr r0,r0,#3
|
|
add r0,r0,#2
|
|
lsr r1,r1,#3
|
|
bl .get_tile_number
|
|
add r0,r8
|
|
mov r4,r0
|
|
lsl r0,r0,#5
|
|
mov r1,#6
|
|
lsl r1,r1,#0x18
|
|
add r0,r0,r1
|
|
str r0,[sp,#0]
|
|
mov r0,sp
|
|
bl .print_right
|
|
|
|
//----------------------------------------
|
|
// Update the map
|
|
orr r4,r6
|
|
mov r1,r7
|
|
ldr r2,[sp,#16]
|
|
-
|
|
strh r4,[r1,#0]
|
|
add r4,#0x20
|
|
add r1,#0x40
|
|
sub r2,r2,#1
|
|
bne -
|
|
add r7,r7,#2
|
|
|
|
//----------------------------------------
|
|
+
|
|
mov r0,r9
|
|
add sp,#24
|
|
pop {r4}
|
|
mov r12,r4
|
|
pop {r4-r7}
|
|
mov r8,r4
|
|
mov r9,r5
|
|
mov r10,r6
|
|
mov r11,r7
|
|
pop {r1-r7,pc}
|
|
|
|
|
|
//=============================================================================
|
|
// void print_left(void* structPointer)
|
|
//=============================================================================
|
|
|
|
// In:
|
|
// r0: struct pointer
|
|
// [r0+0]: VRAM address
|
|
// [r0+4]: glyph address
|
|
// [r0+8]: VRAM x offset (byte)
|
|
// [r0+9]: render width (byte)
|
|
// [r0+10]: background index (byte)
|
|
// [r0+11]: foreground index (byte)
|
|
// [r0+12]: height in tiles (byte)
|
|
// [r0+13]: <unused> (3 bytes)
|
|
|
|
.print_left:
|
|
|
|
push {r0-r7,lr}
|
|
mov r7,r0
|
|
|
|
//----------------------------------------
|
|
ldr r6,[r7,#0] // VRAM address
|
|
ldr r3,[r7,#4] // Glyph address
|
|
ldrb r4,[r7,#12] // Height in tiles
|
|
|
|
.print_left_loop:
|
|
mov r5,#8
|
|
-
|
|
ldr r0,[r6,#0] // 4BPP VRAM row
|
|
ldrb r1,[r7,#11] // Foreground index
|
|
bl .reduce_bit_depth // Returns r0 = 1BPP VRAM row
|
|
ldrb r1,[r7,#9] // Glyph render width
|
|
mov r2,#32
|
|
sub r2,r2,r1
|
|
ldrb r1,[r3,#0] // Glyph row
|
|
lsl r1,r2 // Cut off the pixels we don't want to render
|
|
lsr r1,r2
|
|
ldrb r2,[r7,#8] // X offset
|
|
lsl r1,r2
|
|
lsl r1,r1,#0x18
|
|
lsr r1,r1,#0x18
|
|
orr r0,r1
|
|
ldrb r1,[r7,#10]
|
|
ldrb r2,[r7,#11]
|
|
bl .expand_bit_depth
|
|
str r0,[r6,#0]
|
|
add r6,r6,#4
|
|
add r3,r3,#1
|
|
sub r5,r5,#1
|
|
bne -
|
|
mov r0,#0x1F
|
|
lsl r0,r0,#5
|
|
add r6,r0,r6
|
|
add r3,#8
|
|
sub r4,r4,#1
|
|
bne .print_left_loop
|
|
|
|
//----------------------------------------
|
|
pop {r0-r7,pc}
|
|
|
|
|
|
//=============================================================================
|
|
// void print_right(void* structPointer)
|
|
//=============================================================================
|
|
|
|
// In:
|
|
// r0: struct pointer
|
|
// [r0+0]: VRAM address
|
|
// [r0+4]: glyph address
|
|
// [r0+8]: glyph x offset (byte)
|
|
// [r0+9]: render width (byte)
|
|
// [r0+10]: background index (byte)
|
|
// [r0+11]: foreground index (byte)
|
|
// [r0+12]: height in tiles (byte)
|
|
// [r0+13]: <unused> (3 bytes)
|
|
|
|
.print_right:
|
|
|
|
push {r0-r7,lr}
|
|
mov r7,r0
|
|
|
|
//----------------------------------------
|
|
ldr r6,[r7,#0] // VRAM address
|
|
ldr r3,[r7,#4] // Glyph address
|
|
ldrb r4,[r7,#12] // Height in tiles
|
|
|
|
.print_right_loop:
|
|
mov r5,#8
|
|
-
|
|
ldr r0,[r6,#0] // 4BPP VRAM row
|
|
ldrb r1,[r7,#11] // Foreground index
|
|
bl .reduce_bit_depth // Returns r0 = 1BPP VRAM row
|
|
ldrb r1,[r7,#9] // Glyph render width
|
|
mov r2,#32
|
|
sub r2,r2,r1
|
|
ldrb r1,[r3,#0] // Glyph row
|
|
lsl r1,r2 // Cut off the pixels we don't want to render
|
|
lsr r1,r2
|
|
ldrb r2,[r7,#8] // X offset
|
|
lsr r1,r2
|
|
lsl r1,r1,#0x18
|
|
lsr r1,r1,#0x18
|
|
orr r0,r1
|
|
ldrb r1,[r7,#10]
|
|
ldrb r2,[r7,#11]
|
|
bl .expand_bit_depth
|
|
str r0,[r6,#0]
|
|
add r6,r6,#4
|
|
add r3,r3,#1
|
|
sub r5,r5,#1
|
|
bne -
|
|
mov r0,#0x1F
|
|
lsl r0,r0,#5
|
|
add r6,r0,r6
|
|
add r3,#8
|
|
sub r4,r4,#1
|
|
bne .print_right_loop
|
|
|
|
//----------------------------------------
|
|
pop {r0-r7,pc}
|
|
|
|
|
|
//==============================================================================
|
|
// byte reduce_bit_depth(int pixels)
|
|
// In:
|
|
// r0: row of 4BPP pixels
|
|
// r1: foreground index
|
|
// Out:
|
|
// r0: row of 1BPP pixels
|
|
//==============================================================================
|
|
|
|
// Some notes:
|
|
// - to go faster, load in constants manually using PC-relative loads
|
|
// instead of the ldr rX,=#Y pseudoinstruction (which implicitly branches)
|
|
// - in order to do this properly the instructions need to be 32-bit aligned,
|
|
// hence there are some alignment hacks below
|
|
// - the goal is to reduce the 4BPP row of pixels in r0 to a 1BPP row according
|
|
// to the foreground index in r1
|
|
// - this is achieved quickly using a lookup
|
|
// - first step is to set all foreground pixels (each pixel is a nybble in r0) to 0,
|
|
// and all background pixels to non-zero
|
|
// - this is done by XOR-ing r0 with a row of foreground pixels, where a row of
|
|
// foreground pixels is just r1*0x11111111
|
|
// - when we index into the lookup table using the resulting XOR-ed value, we'll get
|
|
// a 1BPP value where each corresponding 0-nybble (a foreground pixel) is a 1
|
|
// and any corresponding non-zero-nybble is a 0
|
|
// - to keep the lookup table at a reasonable size we'll go 4 pixels at a time:
|
|
// there are thus 16^4 = 65536 possible index values and the lookup table will be 64KB
|
|
// - this uses 63 cycles while the previous method used 273 cycles
|
|
|
|
// Alignment hack
|
|
ldr r0,=#0xDEADBEEF
|
|
|
|
.reduce_bit_depth:
|
|
push {r1-r3,lr}
|
|
|
|
ldr r3,[pc,#32] // 0x11111111
|
|
mul r1,r3
|
|
ldr r2,[pc,#32] // m2_nybbles_to_bits
|
|
eor r0,r1
|
|
|
|
lsl r1,r0,#16
|
|
lsr r1,r1,#16
|
|
lsr r0,r0,#16
|
|
ldrb r3,[r2,r0]
|
|
ldrb r0,[r2,r1]
|
|
lsl r3,r3,#4
|
|
orr r0,r3
|
|
|
|
pop {r1-r3,pc}
|
|
|
|
// Literal pool
|
|
ldr r0,=#0xDEADBEEF
|
|
dd 0x11111111
|
|
dd m2_nybbles_to_bits
|
|
|
|
|
|
//==============================================================================
|
|
// int expand_bit_depth(byte pixels)
|
|
// In:
|
|
// r0: row of 1BPP pixels
|
|
// r1: background index
|
|
// r2: foreground index
|
|
// Out:
|
|
// r0: row of 4BPP pixels
|
|
//==============================================================================
|
|
|
|
// - similar to reduce_bit_depth, we go fast using a lookup table
|
|
// - there are really 16 lookup tables, one for each possible value of r1/r2
|
|
// - we simply look up the word at (table + (r0*4) + (r1*1024)) to get the 4BPP
|
|
// expanded version of r0 using colour index r1 (or r2)
|
|
// - do it once for foreground, then invert r0 and do it again for background
|
|
// - XOR the two values together to get the final 4BPP row of pixels
|
|
// - this uses 61 cycles while the previous method used 287 cycles
|
|
|
|
// Alignment hack
|
|
ldr r0,=#0xDEADBEEF
|
|
|
|
.expand_bit_depth:
|
|
push {r1-r6,lr}
|
|
ldr r6,[pc,#36] // m2_bits_to_nybbles
|
|
|
|
// Foreground
|
|
lsl r4,r2,#10
|
|
lsl r3,r0,#2
|
|
add r5,r4,r6
|
|
ldr r2,[r5,r3]
|
|
|
|
// Background
|
|
lsl r4,r1,#10
|
|
add r5,r4,r6
|
|
mov r4,#0xFF
|
|
eor r0,r4
|
|
lsl r3,r0,#2
|
|
ldr r1,[r5,r3]
|
|
|
|
orr r2,r1
|
|
mov r0,r2
|
|
|
|
pop {r1-r6,pc}
|
|
|
|
// Literal pool
|
|
ldr r0,=#0xDEADBEEF
|
|
dd m2_bits_to_nybbles
|