Files
F3DEX3/rsp/dmem/f3dex3.s

499 lines
16 KiB
ArmAsm

////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// DMEM //////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// RSP DMEM
.create DATA_FILE, 0x0000
/*
Matrices are stored and used in a transposed format compared to how they are
normally written in mathematics. For the integer part:
00 02 04 06 typical Xscl Rot Rot 0
08 0A 0C 0E use: Rot Yscl Rot 0
10 12 14 16 Rot Rot Zscl 0
18 1A 1C 1E Xpos Ypos Zpos 1
The fractional part comes next and is in the same format.
Applying this transformation is done by multiplying a row vector times the
matrix, like:
X Y Z 1 * Xscl Rot Rot 0 = NewX NewY NewZ 1
Rot Yscl Rot 0
Rot Rot Zscl 0
Xpos Ypos Zpos 1
In C, the matrix is accessed as matrix[row][col], and the vector is vector[row].
*/
// 0x0000-0x0040: model matrix
mMatrix:
.fill 0x40
// 0x0040-0x0080: view * projection matrix
vpMatrix:
.fill 0x40
// model * (view * projection) matrix
mvpMatrix:
.fill 0x40
.if . != 0x00C0
.error "Scissor and othermode must be at 0x00C0 for S2DEX"
.endif
// scissor (four 12-bit values)
scissorUpLeft: // the command byte is included since the command word is copied verbatim
.dw (G_SETSCISSOR << 24) | (( 0 * 4) << 12) | (( 0 * 4) << 0)
scissorBottomRight:
.dw ((320 * 4) << 12) | ((240 * 4) << 0)
// othermode
otherMode0: // command byte included, same as above
.dw (G_RDPSETOTHERMODE << 24) | (0x080CFF)
otherMode1:
.dw 0x00000000
// These two words are texrectState in S2DEX, so it can clobber them.
textureSettings1:
.dw 0x00000000 // first word, has command byte, level, tile, and on
textureSettings2:
.dw 0xFFFFFFFF // second word, has s and t scale
// This word is rdpHalf1Val in S2DEX, so it can clobber it.
fogFactor:
.dw 0x00000000
activeClipPlanes:
.dh CLIP_SCAL_NPXY | CLIP_CAMPLANE // Normal tri write, set to zero when clipping
// displaylist stack length
displayListStackLength:
.db 0x00 // starts at 0, increments by 4 for each "return address" pushed onto the stack
unused1:
.db 0
// viewport
viewport:
.fill 16
// Current RDP fifo output position
rdpFifoPos:
.fill 4
matrixStackPtr:
.dw 0x00000000
// segment table
segmentTable:
.fill (4 * 16) // 16 DRAM pointers
// displaylist stack
displayListStack:
// ucode text (shared with DL stack)
.ascii ID_STR, 0x0A
endIdStr:
.if endIdStr < 0x180
.fill (0x180 - endIdStr)
.elseif endIdStr > 0x180
.error "ID_STR is too long"
.align 16 // to suppress subsequent errors
.endif
endSharedDMEM:
.if . != 0x180
.error "endSharedDMEM at incorrect address, matters for G_LOAD_UCODE / S2DEX"
.endif
// constants for register $v31
.if (. & 15) != 0
.error "Wrong alignment for v31value"
.endif
v31Value:
// v31 must go from lowest to highest (signed) values for vcc patterns.
// Also relies on the fact that $v31[0h] is -4,-4,-4,-4, 4, 4, 4, 4.
.dh -4 // used in clipping, vtx write for Newton-Raphson reciprocal
.dh -1 // used often
.dh 0 // used often
.dh 2 // used as clip ratio (vtx write, clipping) and in clipping
.dh 4 // used for same Newton-Raphsons, occlusion plane scaling
.dh 0x4000 // used in tri write, texgen
.dh 0x7F00 // used in fog
.dh 0x7FFF // used often
cameraWorldPos:
.skip 6
tempTriRA:
.skip 2 // Overwritten as part of camera world position, used as temp
lightBufferLookat:
.skip 8 // s8 X0, Y0, Z0, dummy, X1, Y1, Z1, dummy
lightBufferMain:
.skip (G_MAX_LIGHTS * lightSize)
lightBufferAmbient:
.skip 8 // just colors for ambient light
ltBufOfs equ (lightBufferMain - altBase)
occlusionPlaneEdgeCoeffs:
/*
See cpu/occlusionplane.c for more information.
Vertex is in occlusion region if all five equations below are true:
4 * screenX[s13.2] * c0[s0.15] - 0.5 * screenY[s13.2] < c4[s14.1]
4 * screenY[s13.2] * c1[s0.15] - 0.5 * screenX[s13.2] < c5[s14.1]
4 * screenX[s13.2] * c2[s0.15] + 0.5 * screenY[s13.2] < c6[s14.1]
4 * screenY[s13.2] * c3[s0.15] + 0.5 * screenX[s13.2] < c7[s14.1]
clamp_to_0.s15(clipX[s15.16] * kx[s0.15])
+ clamp_to_0.s15(clipY[s15.16] * ky[s0.15])
+ clamp_to_0.s15(clipZ[s15.16] * kz[s0.15])
>= kc[s0.15]
*/
.dh 0x0000 // c0
.dh 0x0000 // c1
.dh 0x0000 // c2
.dh 0x0000 // c3
.dh 0x8000 // c4
.dh 0x8000 // c5
.dh 0x8000 // c6
.dh 0x8000 // c7
occlusionPlaneMidCoeffs:
.dh 0x0000 // kx
.dh 0x0000 // ky
.dh 0x0000 // kz
.dh 0x7FFF // kc
// Alternate base address because vector load offsets can't reach all of DMEM.
// altBaseReg permanently points here.
.if (. & 15) != 0
.error "Wrong alignment for altBase"
.endif
altBase:
// constants for register vTRC
.if (. & 15) != 0
.error "Wrong alignment for vTRCValue"
.endif
vTRCValue:
decalFixMult equ 0x0400
decalFixOff equ (-(decalFixMult / 2))
.dh vertexBuffer // around 0x300; for converting vertex index to address
.dh vtxSize << 7 // 0x1300; it's not 0x2600 because vertex indices are *2
.dh 0x7E00 // vertex index mask for snake
.dh decalFixMult // defined above
.dh decalFixOff // negative
.dh 0x0020 // used in tri write and vtx addr manip
.dh 0x0100 // used several times in tri write
.dh 0x1000 // some multiplier in tri write, vtx addr manip
.macro set_vcc_11110001
vge $v29, vTRC, vTRC[0]
.endmacro
.if (vertexBuffer <= 0x0100 || decalFixMult < vertexBuffer)
.error "VCC pattern for vTRC corrupted"
.endif
vTRC_VB equ vTRC[0] // Vertex Buffer
vTRC_VS equ vTRC[1] // Vertex Size
vTRC_7E00 equ vTRC[2]
vTRC_DM equ vTRC[3] // Decal Multiplier
vTRC_DO equ vTRC[4] // Decal Offset
vTRC_0020 equ vTRC[5]
vTRC_0100 equ vTRC[6]
vTRC_1000 equ vTRC[7]
vTRC_0100_addr equ (vTRCValue + 2 * 6)
.if (. & 15) != 0
.error "Wrong alignment for fxParams"
.endif
fxParams:
// First 8 values here loaded with lqv.
aoAmbientFactor:
.dh 0xFFFF
aoDirectionalFactor:
.dh 0xA000
aoPointFactor:
.dh 0x0000
perspNorm:
.dh 0xFFFF
texgenLinearCoeffs:
.dh 0x44D3
.dh 0x6CB3
fresnelScale:
.dh 0x0000
fresnelOffset:
.dh 0x0000
attrOffsetST:
.dh 0x0100
.dh 0xFF00
alphaCompareCullMode:
.db 0x00 // 0 = disabled, 1 = cull if all < thresh, -1 = cull if all >= thresh
alphaCompareCullThresh:
.db 0x00 // Alpha threshold, 00 - FF
lastMatDLPhyAddr:
.dw 0
.if (. - fxParams) != 0x1A
.error "Update fxParams MWO in GBI"
.endif
packedNormalsMaskConstant:
.db 0xF8 // When read, materialCullMode has been zeroed, so read as 0xF800
materialCullMode:
.db 0
geometryModeLabel:
.dw 0x00000000
movewordTable:
.dh fxParams // G_MW_FX
.dh numLightsxSize - 3 // G_MW_NUMLIGHT; writes numLightsxSize and pointLightFlag, zeroes dirLightsXfrmValid
packedNormalsConstants:
.if (. & 3) != 0
.error "Alignment broken for packed normals constants in movewordTable"
.endif
.dh 0x2008 // For packed normals; unused in movewordTable
.if (segmentTable & 0xFF00) != 0
.error "Packed normals constants relies on first byte of segmentTable addr being 0"
.endif
.dh segmentTable // G_MW_SEGMENT
.dh fogFactor // G_MW_FOG
.dh lightBufferMain // G_MW_LIGHTCOL
// First half of RDP value for split commands. Also used as temp storage for
// tri vertices during tri commands.
rdpHalf1Val:
.fill 4
movememTable:
.dh mMatrix // G_MV_MMTX
.dh tempMatrix // G_MV_TEMPMTX0 multiply temp matrix (model)
.dh vpMatrix // G_MV_VPMTX
.dh tempMatrix // G_MV_TEMPMTX1 multiply temp matrix (view*projection)
.dh viewport // G_MV_VIEWPORT
.dh cameraWorldPos // G_MV_LIGHT
afterMovememRaTable:
.dh run_next_DL_command
.dh G_MTX_multiply_end
clipCondShifts:
.db (31 - CLIP_SCAL_NY_SHIFT) // Constants for clipping algorithm
.db (31 - CLIP_SCAL_PY_SHIFT)
.db (31 - CLIP_SCAL_NX_SHIFT)
.db (31 - CLIP_SCAL_PX_SHIFT)
.db (31 - CLIP_CAMPLANE_SHIFT)
mvpValid:
.db 0 // Nonzero if the MVP matrix is valid, 0 if it needs to be recomputed.
dirLightsXfrmValid:
.db 0 // Nonzero if transformed directional lights are valid.
unused2:
.db 0
pointLightFlag:
.db 0 // Sign bit set if there are point lights.
numLightsxSize:
.db 0 // lightSize * number of lights
.macro miniTableEntry, addr
.if addr < 0x1000 || addr >= 0x1400
.error "Handler address out of range!"
.endif
.db (addr - 0x1000) >> 2
.endmacro
// RDP/Immediate Command Mini Table
// 1 byte per entry, after << 2 points to an addr in first 1/4 of IMEM
miniTableEntry G_FLUSH_handler
miniTableEntry G_MEMSET_handler
miniTableEntry G_DMA_IO_handler
miniTableEntry G_TEXTURE_handler
miniTableEntry G_POPMTX_handler
miniTableEntry G_GEOMETRYMODE_handler
miniTableEntry G_MTX_handler
miniTableEntry G_MOVEWORD_handler
miniTableEntry G_MOVEMEM_handler
miniTableEntry G_LOAD_UCODE_handler
miniTableEntry G_DL_handler
miniTableEntry G_ENDDL_handler
miniTableEntry G_SPNOOP_handler
miniTableEntry G_RDPHALF_1_handler
miniTableEntry G_SETOTHERMODE_L_handler
miniTableEntry G_SETOTHERMODE_H_handler
miniTableEntry G_TEXRECT_handler // G_TEXRECT
miniTableEntry G_TEXRECT_handler // G_TEXRECTFLIP
miniTableEntry G_RDP_handler // G_RDPLOADSYNC
miniTableEntry G_RDP_handler // G_RDPPIPESYNC
miniTableEntry G_RDP_handler // G_RDPTILESYNC
miniTableEntry G_RDP_handler // G_RDPFULLSYNC
miniTableEntry G_RDP_handler // G_SETKEYGB
miniTableEntry G_RDP_handler // G_SETKEYR
miniTableEntry G_RDP_handler // G_SETCONVERT
miniTableEntry G_SETSCISSOR_handler
miniTableEntry G_RDP_handler // G_SETPRIMDEPTH
miniTableEntry G_RDPSETOTHERMODE_handler
miniTableEntry load_cmds_handler // G_LOADTLUT
miniTableEntry G_RDPHALF_2_handler
miniTableEntry G_RDP_handler // G_SETTILESIZE
miniTableEntry load_cmds_handler // G_LOADBLOCK
miniTableEntry load_cmds_handler // G_LOADTILE
miniTableEntry G_RDP_handler // G_SETTILE
miniTableEntry G_RDP_handler // G_FILLRECT
miniTableEntry G_RDP_handler // G_SETFILLCOLOR
miniTableEntry G_RDP_handler // G_SETFOGCOLOR
miniTableEntry G_RDP_handler // G_SETBLENDCOLOR
miniTableEntry G_RDP_handler // G_SETPRIMCOLOR
miniTableEntry G_RDP_handler // G_SETENVCOLOR
miniTableEntry G_RDP_handler // G_SETCOMBINE
miniTableEntry G_SETxIMG_handler // G_SETTIMG
miniTableEntry G_SETxIMG_handler // G_SETZIMG
miniTableEntry G_SETxIMG_handler // G_SETCIMG
cmdMiniTable:
miniTableEntry G_RDP_handler // G_NOOP
miniTableEntry G_VTX_handler
miniTableEntry G_MODIFYVTX_handler
miniTableEntry G_CULLDL_handler
miniTableEntry G_BRANCH_WZ_handler
miniTableEntry G_TRI1_handler
miniTableEntry G_TRI2_handler
miniTableEntry G_QUAD_handler
miniTableEntry G_TRISNAKE_handler
miniTableEntry G_SPNOOP_handler // no command mapped to 0x09
miniTableEntry G_LIGHTTORDP_handler
miniTableEntry G_RELSEGMENT_handler
// The maximum number of generated vertices in a clip polygon. In reality, this
// is equal to MAX_CLIP_POLY_VERTS, but for testing we can change them separately.
// In case you're wondering if it's possible to have a 7-vertex polygon where all
// 7 verts are generated, it looks like this (X = generated vertex):
// ___----=>
// +---------------__X----X _-^
// | __--^^ X^
// | __--^^ _-^|
// _X^^^ _-^ |
// C | _-^ |
// ^X _-^ |
// |\ _-^ |
// +-X--_X^---------------+
// V^
MAX_CLIP_GEN_VERTS equ 7
// Normally, each clip plane can cut off a "tip" of a polygon, turning one vert
// into two. (It can also cut off more of the polygon and remove additional verts,
// but the maximum is one more vert per clip plane.) So with 5 clip planes, we
// could have a maximum of 8 verts in the final polygon. However, the verts
// generated by the no-nearclipping plane will always be at infinity, so they
// will always get replaced by generated verts from one of the other clip planes.
// Put another way, if there are 8 verts in the final polygon, there are 8 edges,
// which are portions of the 3 original edges plus portions of 5 edges along the
// 5 clip planes. But the edge portion along the no-nearclipping plane is at
// infinity, so that edge can't be on screen. So an actual polygon can contain
// up to 7 verts. However, we are relying on 8 verts for circular addressing,
// and the current implementation temporarily inserts a vertex when moving from
// on to offscreen, so it can be 8 valid vertices momentarily.
CLIP_POLY_VERTS equ 8
CLIP_POLY_SIZE_BYTES equ CLIP_POLY_VERTS * 2
CLIP_TEMP_VERTS_SIZE_BYTES equ (MAX_CLIP_GEN_VERTS * vtxSize)
VERTEX_BUFFER_SIZE_BYTES equ (G_MAX_VERTS * vtxSize)
RDP_CMD_BUFSIZE equ 0xB0
RDP_CMD_BUFSIZE_EXCESS equ 0xB0 // Maximum size of an RDP triangle command
RDP_CMD_BUFSIZE_TOTAL equ (RDP_CMD_BUFSIZE + RDP_CMD_BUFSIZE_EXCESS)
INPUT_BUFFER_CMDS equ 21
INPUT_BUFFER_SIZE_BYTES equ (INPUT_BUFFER_CMDS * 8)
OSTASK_ORIG_SIZE equ 0x40 // First CLIP_POLY_SIZE_BYTES (0x10) of this is clipPoly.
END_VARIABLE_LEN_DMEM equ (0x1000 - OSTASK_ORIG_SIZE - INPUT_BUFFER_SIZE_BYTES - (2 * RDP_CMD_BUFSIZE_TOTAL) - CLIP_TEMP_VERTS_SIZE_BYTES - VERTEX_BUFFER_SIZE_BYTES)
startFreeDmem:
.org END_VARIABLE_LEN_DMEM
endFreeDmem:
// Main vertex buffer in RSP internal format
vertexBuffer:
.skip VERTEX_BUFFER_SIZE_BYTES
// Space for temporary verts for clipping code, and reused for other things
clipTempVerts:
yieldOrigV1Addr:
.skip 2 // Needs to be saved over yield
// Round up to 0x8
.org ((clipTempVerts + 0x7) & 0xFF8)
texrectState:
.skip 8 // Only needs to be saved over texrect, half1, half2; but yield can happen
.if . > yieldDataFooter
// Need to fit everything through here in yield buffer
.error "Too much being stored in yieldable DMEM"
.endif
// Round up to 0x10
.org ((texrectState + 0xF) & 0xFF0)
tempMatrix:
.skip 0x40
.if . > (clipTempVerts + CLIP_TEMP_VERTS_SIZE_BYTES)
.error "Too much in clipTempVerts"
.endif
.org (clipTempVerts + CLIP_TEMP_VERTS_SIZE_BYTES)
clipTempVertsEnd:
// First RDP Command Buffer
rdpCmdBuffer1:
.skip RDP_CMD_BUFSIZE
.if (. & 8) != 8
.error "RDP command buffer alignment to 8 assumption broken"
.endif
rdpCmdBuffer1End:
.skip 8
rdpCmdBuffer1EndPlus1Word:
// This is so that we can temporarily store vector regs here with lqv/sqv
.skip RDP_CMD_BUFSIZE_EXCESS - 8
// Second RDP Command Buffer
rdpCmdBuffer2:
.skip RDP_CMD_BUFSIZE
.if (. & 8) != 8
.error "RDP command buffer alignment to 8 assumption broken"
.endif
rdpCmdBuffer2End:
.skip 8
rdpCmdBuffer2EndPlus1Word:
.skip RDP_CMD_BUFSIZE_EXCESS - 8
// Input buffer. After RDP cmd buffers so it can be vector addressed from end.
inputBuffer:
.skip INPUT_BUFFER_SIZE_BYTES
inputBufferEnd:
inputBufferEndSgn equ (-(0x1000 - inputBufferEnd)) // Underflow DMEM address
// 0x0FC0-0x1000: OSTask; 0x0FC0-0x0FD0: clipPoly
OSTask:
clipPoly: // This is here for alignment and vector addressing, see rsp_defs.inc
clipPolySgn equ (-(0x1000 - clipPoly)) // Underflow DMEM address
.skip CLIP_POLY_SIZE_BYTES
// rest of OSTask
.skip (OSTASK_ORIG_SIZE - CLIP_POLY_SIZE_BYTES)
.if . != 0x1000
.error "DMEM organization incorrect"
.endif
.close // DATA_FILE
// See rsp_defs.inc about why these are not used and we can reuse them.
startCounterTime equ (OSTask + OSTask_ucode_size)
xfrmLookatDirs equ -(0x1000 - (OSTask + OSTask_ucode_data)) // and OSTask_ucode_data_size
dumpDmemBuffer equ (OSTask + OSTask_yield_data_size) // CFG_PROFILING_B only
startFifoStallTime equ dumpDmemBuffer // CFG_PROFILING_A only
memsetBufferStart equ ((vertexBuffer + 0xF) & 0xFF0)
memsetBufferMaxEnd equ (rdpCmdBuffer1 & 0xFF0)
memsetBufferMaxSize equ (memsetBufferMaxEnd - memsetBufferStart)
memsetBufferSize equ (memsetBufferMaxSize > 0x800 ? 0x800 : memsetBufferMaxSize)