Compare commits

..

2 Commits

Author SHA1 Message Date
Mr-Wiseguy
e113978834 Fixed typo in config_graphics.h 2022-01-22 22:19:40 -05:00
Mr-Wiseguy
37632f3954 Implemented shear shadows and slightly faster mtxf_to_mtx function 2022-01-22 22:13:12 -05:00
16 changed files with 301 additions and 5827 deletions

4
.gitignore vendored
View File

@@ -116,7 +116,3 @@ lib/libs2d_engine.a
# .orig files
*.orig
# Python
__pycache__
*.pyc

View File

@@ -29,7 +29,6 @@ This is a fork of the ultrasm64 repo by CrashOveride which includes the followin
- **CrashOveride**: creating the [ultrasm64](https://github.com/CrashOveride95/ultrasm64) repo
- **falcobuster**: Original coordinate overflow fix (world scale), ASM version of extended bounds
- **anonymous_moose**: porting falco's extended bounds to decomp
- **tuxlovesyou**: `LOAD_MIO0_TEXTURE` macro and moral support
Thanks to Frame#5375 and AloXado320 for also helping with silhouette stuff

View File

@@ -57,6 +57,11 @@
// Use 64x64 quarter shadow textures (Vanilla are 16x16).
#define HD_SHADOWS
// Stretches shadows to fit the terrain instead of rotating them to align with it.
// This makes them maintain a constant horizontal size.
// Performs better than regular shadows.
// #define SHEAR_SHADOWS
// Makes certain objects (mainly trees) transparent when the camera gets close.
// #define OBJ_OPACITY_BY_CAM_DIST

View File

@@ -310,9 +310,6 @@ enum GoddardScene {
CMD_PTR(romEnd)
#endif
#undef LOAD_MIO0_TEXTURE
#define LOAD_MIO0_TEXTURE(a,b,c) LOAD_YAY0_TEXTURE(a,b,c)
#define CHANGE_AREA_SKYBOX(area, segStart, segEnd) \
CMD_BBH(LEVEL_CMD_CHANGE_AREA_SKYBOX, 0x0C, area), \
CMD_PTR(segStart), \

View File

@@ -1,11 +0,0 @@
DECOMP_INPUT_FILES := ../src/game/mario_actions_airborne.c ../src/game/mario_actions_automatic.c ../src/game/mario_actions_cutscene.c \
../src/game/mario_actions_moving.c ../src/game/mario_actions_object.c ../src/game/mario_actions_stationary.c ../src/game/mario_actions_submerged.c \
../src/game/mario_misc.c ../src/game/mario_step.c ../src/game/mario.c ../src/engine/math_util.c ../src/game/object_helpers.c
all: libmario.dll libmario.so
libmario.dll: $(wildcard mariolib/*.c) $(DECOMP_INPUT_FILES)
x86_64-w64-mingw32-gcc $^ -o $@ -shared -DNON_MATCHING -DAVOID_UB -D_LANGUAGE_C -ggdb -I.. -I../include -I../include/n64 -I../src/engine -I../src/game -I../src -Ism64 -lm -Wl,--subsystem,windows
libmario.so: $(wildcard mariolib/*.c) $(DECOMP_INPUT_FILES)
gcc $^ -o $@ -shared -DNON_MATCHING -DAVOID_UB -D_LANGUAGE_C -ggdb -I.. -I../include -I../include/n64 -I../src/engine -I../src/game -I../src -Ism64 -lm -fPIC

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,205 +0,0 @@
#!/usr/bin/env python3
import inputs
import time
import threading
import sys
import fpstimer
import os
from ctypes import *
pluginPath = os.path.dirname(os.path.realpath(__file__))
library = 'libmario.so'
if sys.platform == 'win32':
library = 'libmario.dll'
if sys.platform == 'darwin':
library = 'libmario.dynlib'
libmario = CDLL(os.path.join(pluginPath, library))
# /*0x00*/ s16 type;
# /*0x02*/ s16 force;
# /*0x04*/ s8 flags;
# /*0x05*/ s8 room;
# /*0x06*/ s16 lowerY;
# /*0x08*/ s16 upperY;
# /*0x0A*/ Vec3s vertex1;
# /*0x10*/ Vec3s vertex2;
# /*0x16*/ Vec3s vertex3;
# /*0x1C*/ struct {
# f32 x;
# f32 y;
# f32 z;
# } normal;
# /*0x28*/ f32 originOffset;
# /*0x2C*/ struct Object *object;
Vec3f = (c_float * 3)
Vec3s = (c_int16 * 3)
class Surface(Structure):
_fields_ = [
('type', c_int16),
('force', c_int16),
('flags', c_int8),
('room', c_int8),
('lowerY', c_int16),
('upperY', c_int16),
('vertex1', Vec3s),
('vertex2', Vec3s),
('vertex3', Vec3s),
('normal', Vec3f),
('origin_offset', c_float),
('object', c_void_p)
]
FindFloorHandlerType = CFUNCTYPE(c_float, c_float, c_float, c_float, POINTER(Surface), POINTER(c_int32))
FindCeilHandlerType = CFUNCTYPE(c_float, c_float, c_float, c_float, POINTER(Surface), POINTER(c_int32))
FindWallsHandlerType = CFUNCTYPE(c_int32, c_float, c_float, c_float, c_float, c_float, POINTER(Surface), POINTER(c_float))
FindWaterLevelHandlerType = CFUNCTYPE(c_float, c_float, c_float)
libmario.init.restype = None
libmario.init.artypes = [FindFloorHandlerType, FindCeilHandlerType, FindWallsHandlerType, FindWaterLevelHandlerType]
libmario.step.restype = None
libmario.step.artypes = [c_int32, c_float, c_float]
libmario.getMarioPosition.restype = None
libmario.getMarioPosition.artypes = [Vec3f]
libmario.getMarioVelocity.restype = None
libmario.getMarioVelocity.artypes = [Vec3f]
libmario.getMarioAnimFrame.restype = c_int32
libmario.getMarioAnimFrame.artypes = []
libmario.getMarioAnimIndex.restype = c_int32
libmario.getMarioAnimIndex.artypes = []
CONT_A = 0x8000
CONT_B = 0x4000
CONT_G = 0x2000
CONT_START = 0x1000
CONT_UP = 0x0800
CONT_DOWN = 0x0400
CONT_LEFT = 0x0200
CONT_RIGHT = 0x0100
CONT_L = 0x0020
CONT_R = 0x0010
CONT_E = 0x0008
CONT_D = 0x0004
CONT_C = 0x0002
CONT_F = 0x0001
A_BUTTON = CONT_A
B_BUTTON = CONT_B
L_TRIG = CONT_L
R_TRIG = CONT_R
Z_TRIG = CONT_G
START_BUTTON = CONT_START
U_JPAD = CONT_UP
L_JPAD = CONT_LEFT
R_JPAD = CONT_RIGHT
D_JPAD = CONT_DOWN
U_CBUTTONS = CONT_E
L_CBUTTONS = CONT_C
R_CBUTTONS = CONT_F
D_CBUTTONS = CONT_D
events = []
_t = None
_handler = None
def find_floor(x, y, z, surface_out, found_out):
found_out[0] = 1
surface_out[0].vertex1[0] = -100
surface_out[0].vertex1[1] = 0
surface_out[0].vertex1[2] = -100
surface_out[0].vertex2[0] = 100
surface_out[0].vertex2[1] = 0
surface_out[0].vertex2[2] = -100
surface_out[0].vertex3[0] = 100
surface_out[0].vertex3[1] = 0
surface_out[0].vertex3[2] = 100
surface_out[0].normal[0] = 0.0
surface_out[0].normal[1] = 1.0
surface_out[0].normal[2] = 0.0
surface_out[0].origin_offset = 0.0
return -1000.0
def find_water_level(x, z):
return -100.0
# Needs to be global to avoid getting garbage collected during execution
find_floor_handler = FindFloorHandlerType(find_floor)
find_water_level_handler = FindWaterLevelHandlerType(find_water_level)
def worker():
global events
while True:
events.append(inputs.get_gamepad())
def main():
global _t
global _handler
global events
if not _t :
_t = threading.Thread(target=worker)
_t.daemon = True
_t.start()
stick_x = 0.0
stick_y = 0.0
buttons = 0
libmario.init(find_floor_handler, None, None, find_water_level_handler)
timer = fpstimer.FPSTimer(30)
try:
while True:
while len(events) > 0 :
for event in events[0]:
if event.code == "ABS_X":
stick_x = float(event.state) / 32768.0
elif event.code == "ABS_Y":
stick_y = float(event.state) / 32768.0
elif event.code == "ABS_RX":
gpd_input = "Right Stick X"
elif event.code == "ABS_RY":
gpd_input = "Right Stick Y"
elif event.code == "BTN_SOUTH":
if event.state == 1:
buttons |= A_BUTTON
else:
buttons &= ~A_BUTTON
elif event.code == "BTN_WEST":
if event.state == 1:
buttons |= B_BUTTON
else:
buttons &= ~B_BUTTON
elif event.code == "ABS_Z":
if event.state == 255:
buttons |= Z_TRIG
else:
buttons &= ~Z_TRIG
elif event.code != "SYN_REPORT":
print(event.code + ':' + str(event.state))
events.pop(0)
libmario.step(buttons, c_float(stick_x), c_float(stick_y))
pos = Vec3f()
vel = Vec3f()
libmario.getMarioPosition(pos)
libmario.getMarioVelocity(vel)
print('Position: %8.2f %8.2f %8.2f Velocity: %8.2f %8.2f %8.2f Buttons: 0x%08X Anim: 0x%02X AnimFrame: %d' % (pos[0], pos[1], pos[2], vel[0], vel[1], vel[2], buttons, libmario.getMarioAnimIndex(), libmario.getMarioAnimFrame()))
timer.sleep()
except KeyboardInterrupt:
print("Ctrl+C pressed...")
sys.exit(0)
if __name__ == '__main__':
main()

View File

@@ -128,9 +128,9 @@ void surface_normal_to_vec3f(Vec3f dest, struct Surface *surf) {
/// Convert float vector a to a short vector 'dest' by rounding the components to the nearest integer.
#define vec3_copy_bits_roundf(fmt, dest, src) { \
register fmt x = lroundf(src[0]); \
register fmt y = lroundf(src[1]); \
register fmt z = lroundf(src[2]); \
register fmt x = roundf(src[0]); \
register fmt y = roundf(src[1]); \
register fmt z = roundf(src[2]); \
((fmt *) dest)[0] = x; \
((fmt *) dest)[1] = y; \
((fmt *) dest)[2] = z; \
@@ -592,6 +592,31 @@ void mtxf_billboard(Mat4 dest, Mat4 mtx, Vec3f position, Vec3f scale, s32 angle)
* 'scale' is the scale of the shadow
* 'yaw' is the angle which it should face
*/
#ifdef SHEAR_SHADOWS
void mtxf_shadow(Mat4 dest, Mat4 src, Vec3f upDir, Vec3f pos, Vec3f scale, s32 yaw) {
float hxy = -upDir[0]/upDir[1];
float hzy = -upDir[2]/upDir[1];
float cosyaw = coss(yaw);
float sinyaw = sins(yaw);
Vec3f entry;
entry[0] = scale[0] * cosyaw;
entry[1] = scale[0] * cosyaw * hxy - scale[0] * sinyaw * hzy;
entry[2] = -scale[0] * sinyaw;
linear_mtxf_mul_vec3f(src, dest[0], entry);
entry[0] = 0;
entry[1] = scale[1];
entry[2] = 0;
linear_mtxf_mul_vec3f(src, dest[1], entry);
entry[0] = scale[2] * sinyaw;
entry[1] = scale[2] * sinyaw * hxy + scale[2] * cosyaw * hzy;
entry[2] = scale[2] * cosyaw;
linear_mtxf_mul_vec3f(src, dest[2], entry);
linear_mtxf_mul_vec3f(src, dest[3], pos);
vec3f_add(dest[3], src[3]);
MTXF_END(dest);
}
#else
void mtxf_shadow(Mat4 dest, Mat4 src, Vec3f upDir, Vec3f pos, Vec3f scale, s32 yaw) {
Vec3f lateralDir;
Vec3f leftDir;
@@ -613,6 +638,7 @@ void mtxf_shadow(Mat4 dest, Mat4 src, Vec3f upDir, Vec3f pos, Vec3f scale, s32 y
vec3f_add(dest[3], src[3]);
MTXF_END(dest);
}
#endif
/**
* Set 'dest' to a transformation matrix that aligns an object with the terrain
@@ -802,7 +828,7 @@ void get_pos_from_transform_mtx(Vec3f dest, Mat4 objMtx, register Mat4 camMtx) {
*x = (temp3[12] - temp2[12]);
temp2++;
temp3++;
x = (f32 *)(((uintptr_t)x) + 4);
x = (f32 *)(((u32)x) + 4);
}
temp2 -= 3;
for (i = 0; i < 3; i++) {
@@ -1294,6 +1320,203 @@ s32 anim_spline_poll(Vec3f result) {
return hasEnded;
}
/**************************************************
* RAYCASTING *
**************************************************/
#define RAY_OFFSET 30.0f /* How many units to extrapolate surfaces when testing for a raycast */
#define RAY_STEPS 4 /* How many steps to do when casting rays, default to quartersteps. */
/**
* @brief Checks if a ray intersects a surface using Möller–Trumbore intersection algorithm.
*
* @param orig is the starting point of the ray.
* @param dir is the normalized ray direction.
* @param dir_length is the length of the ray.
* @param surface is the surface to check.
* @param hit_pos returns the position on the surface where the ray intersects it.
* @param length returns the distance from the starting point to the hit position.
* @return s32 TRUE if the ray intersects a surface.
*/
s32 ray_surface_intersect(Vec3f orig, Vec3f dir, f32 dir_length, struct Surface *surface, Vec3f hit_pos, f32 *length) {
// Ignore certain surface types.
if ((surface->type == SURFACE_INTANGIBLE) || (surface->flags & SURFACE_FLAG_NO_CAM_COLLISION)) return FALSE;
// Convert the vertices to Vec3f.
Vec3f v0, v1, v2;
vec3s_to_vec3f(v0, surface->vertex1);
vec3s_to_vec3f(v1, surface->vertex2);
vec3s_to_vec3f(v2, surface->vertex3);
// Get surface normal and extend it by RAY_OFFSET.
Vec3f norm;
surface_normal_to_vec3f(norm, surface);
vec3_mul_val(norm, RAY_OFFSET);
// Move the face forward by RAY_OFFSET.
vec3f_add(v0, norm);
vec3f_add(v1, norm);
vec3f_add(v2, norm);
// Make 'e1' (edge 1) the vector from vertex 0 to vertex 1.
Vec3f e1;
vec3f_diff(e1, v1, v0);
// Make 'e2' (edge 2) the vector from vertex 0 to vertex 2.
Vec3f e2;
vec3f_diff(e2, v2, v0);
// Make 'h' the cross product of 'dir' and edge 2.
Vec3f h;
vec3f_cross(h, dir, e2);
// Determine the cos(angle) difference between ray and surface normals.
f32 det = vec3f_dot(e1, h);
// Check if we're perpendicular from the surface.
if ((det > -NEAR_ZERO) && (det < NEAR_ZERO)) return FALSE;
// Check if we're making contact with the surface.
// Make f the inverse of the cos(angle) between ray and surface normals.
f32 f = 1.0f / det; // invDet
// Make 's' the vector from vertex 0 to 'orig'.
Vec3f s;
vec3f_diff(s, orig, v0);
// Make 'u' the cos(angle) between vectors 's' and normals, divided by 'det'.
f32 u = f * vec3f_dot(s, h);
// Check if 'u' is within bounds.
if ((u < 0.0f) || (u > 1.0f)) return FALSE;
// Make 'q' the cross product of 's' and edge 1.
Vec3f q;
vec3f_cross(q, s, e1);
// Make 'v' the cos(angle) between the ray and 'q', divided by 'det'.
f32 v = f * vec3f_dot(dir, q);
// Check if 'v' is within bounds.
if ((v < 0.0f) || ((u + v) > 1.0f)) return FALSE;
// Get the length between our origin and the surface contact point.
// Make '*length' the cos(angle) betqwwn edge 2 and 'q', divided by 'det'.
*length = f * vec3f_dot(e2, q);
// Check if the length to the hit point is shorter than the ray length.
if ((*length <= NEAR_ZERO) || (*length > dir_length)) return FALSE;
// Successful contact.
// Make 'add_dir' into 'dir' scaled by 'length'.
Vec3f add_dir;
vec3_prod_val(add_dir, dir, *length);
// Make 'hit_pos' into the sum of 'orig' and 'add_dir'.
vec3f_sum(hit_pos, orig, add_dir);
return TRUE;
}
void find_surface_on_ray_list(struct SurfaceNode *list, Vec3f orig, Vec3f dir, f32 dir_length, struct Surface **hit_surface, Vec3f hit_pos, f32 *max_length) {
s32 hit;
f32 length;
Vec3f chk_hit_pos;
f32 top, bottom;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
// Get upper and lower bounds of ray
if (dir[1] >= 0.0f) {
// Ray is upwards.
top = orig[1] + (dir[1] * dir_length);
bottom = orig[1];
} else {
// Ray is downwards.
top = orig[1];
bottom = orig[1] + (dir[1] * dir_length);
}
// Iterate through every surface of the list
for (; list != NULL; list = list->next) {
// Reject surface if out of vertical bounds
if ((list->surface->lowerY > top) || (list->surface->upperY < bottom)) continue;
// Check intersection between the ray and this surface
hit = ray_surface_intersect(orig, dir, dir_length, list->surface, chk_hit_pos, &length);
if (hit && (length <= *max_length)) {
*hit_surface = list->surface;
vec3f_copy(hit_pos, chk_hit_pos);
*max_length = length;
}
}
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
}
void find_surface_on_ray_cell(s32 cellX, s32 cellZ, Vec3f orig, Vec3f normalized_dir, f32 dir_length, struct Surface **hit_surface, Vec3f hit_pos, f32 *max_length, s32 flags) {
// Skip if OOB
if ((cellX >= 0) && (cellX <= (NUM_CELLS - 1)) && (cellZ >= 0) && (cellZ <= (NUM_CELLS - 1))) {
// Iterate through each surface in this partition
if ((normalized_dir[1] > -NEAR_ONE) && (flags & RAYCAST_FIND_CEIL)) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_CEILS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_CEILS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
if ((normalized_dir[1] < NEAR_ONE) && (flags & RAYCAST_FIND_FLOOR)) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_FLOORS].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_FLOORS].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
if (flags & RAYCAST_FIND_WALL) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
if (flags & RAYCAST_FIND_WATER) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WATER ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WATER ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
}
}
void find_surface_on_ray(Vec3f orig, Vec3f dir, struct Surface **hit_surface, Vec3f hit_pos, s32 flags) {
Vec3f normalized_dir;
f32 step;
s32 i;
const f32 invcell = 1.0f / CELL_SIZE;
// Set that no surface has been hit
*hit_surface = NULL;
vec3f_sum(hit_pos, orig, dir);
// Get normalized direction
f32 dir_length = vec3_mag(dir);
f32 max_length = dir_length;
vec3f_copy(normalized_dir, dir);
vec3f_normalize(normalized_dir);
// Get our cell coordinate
f32 fCellX = (orig[0] + LEVEL_BOUNDARY_MAX) * invcell;
f32 fCellZ = (orig[2] + LEVEL_BOUNDARY_MAX) * invcell;
s32 cellX = fCellX;
s32 cellZ = fCellZ;
s32 cellPrevX = cellX;
s32 cellPrevZ = cellZ;
// Don't do DDA if straight down
if ((normalized_dir[1] >= NEAR_ONE) || (normalized_dir[1] <= -NEAR_ONE)) {
find_surface_on_ray_cell(cellX, cellZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
return;
}
// Get cells we cross using DDA
f32 absDir0 = absf(dir[0]);
f32 absDir2 = absf(dir[2]);
if (absDir0 >= absDir2) {
step = (RAY_STEPS * absDir0) * invcell;
} else {
step = (RAY_STEPS * absDir2) * invcell;
}
f32 dx = (dir[0] / step) * invcell;
f32 dz = (dir[2] / step) * invcell;
for (i = 0; i < step && *hit_surface == NULL; i++) {
find_surface_on_ray_cell(cellX, cellZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
// Move cell coordinate
fCellX += dx;
fCellZ += dz;
cellPrevX = cellX;
cellPrevZ = cellZ;
cellX = fCellX;
cellZ = fCellZ;
if ((cellPrevX != cellX) && (cellPrevZ != cellZ)) {
find_surface_on_ray_cell(cellX, cellPrevZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
find_surface_on_ray_cell(cellPrevX, cellZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
}
}
}
// Constructs a float in registers, which can be faster than gcc's default of loading a float from rodata.
// Especially fast for halfword floats, which get loaded with a `lui` + `mtc1`.
static ALWAYS_INLINE float construct_float(const float f)
@@ -1333,40 +1556,63 @@ static ALWAYS_INLINE float construct_float(const float f)
return f_out;
}
static ALWAYS_INLINE float mul_without_nop(float a, float b)
{
float ret;
__asm__ ("mul.s %0, %1, %2"
: "=f"(ret)
: "f"(a), "f"(b));
return ret;
}
static ALWAYS_INLINE void swl(void* addr, s32 val, const int offset)
{
__asm__ ("swl %1, %2(%0)"
:
: "g"(addr), "g"(val), "I"(offset));
}
// Converts a floating point matrix to a fixed point matrix
// Makes some assumptions about certain fields in the matrix, which will always be true for valid matrices.
__attribute__((optimize("Os")))
__attribute__((optimize("Os"))) __attribute__((aligned(32)))
void mtxf_to_mtx_fast(s16* dst, float* src)
{
int i;
float scale = construct_float(65536.0f / WORLD_SCALE);
// Iterate over pairs of values in the input matrix
for (int i = 0; i < 8; i++)
// Iterate over rows of values in the input matrix
for (i = 0; i < 4; i++)
{
// Read the first input in the current pair
float a = src[2 * i + 0];
// Read the three input in the current row (assume the fourth is zero)
float a = src[4 * i + 0];
float b = src[4 * i + 1];
float c = src[4 * i + 2];
float a_scaled = mul_without_nop(a,scale);
float b_scaled = mul_without_nop(b,scale);
float c_scaled = mul_without_nop(c,scale);
// Convert the first input to fixed
s32 a_int = (s32)(a * scale);
dst[2 * i + 0] = (s16)(a_int >> 16);
dst[2 * i + 16] = (s16)(a_int >> 0);
// Convert the three inputs to fixed
s32 a_int = (s32)a_scaled;
s32 b_int = (s32)b_scaled;
s32 c_int = (s32)c_scaled;
s32 c_high = c_int & 0xFFFF0000;
s32 c_low = c_int << 16;
// Write the integer part of a, as well as garbage into the next two bytes.
// Those two bytes will get overwritten by the integer part of b.
// This prevents needing to shift or mask the integer value of a.
*(s32*)(&dst[4 * i + 0]) = a_int;
// Write the fractional part of a
dst[4 * i + 16] = (s16)a_int;
// If this is the left half of the matrix, convert the second input to fixed
if ((i & 1) == 0)
{
// Read the second input in the current pair
float b = src[2 * i + 1];
s32 b_int = (s32)(b * scale);
dst[2 * i + 1] = (s16)(b_int >> 16);
dst[2 * i + 17] = (s16)(b_int >> 0);
}
// Otherwise, skip the second input because column 4 will always be zero
// Row 4 column 4 is handled after the loop.
else
{
dst[2 * i + 1] = 0;
dst[2 * i + 17] = 0;
}
// Write the integer part of b using swl to avoid needing to shift.
swl(dst + 4 * i, b_int, 2);
// Write the fractional part of b.
dst[4 * i + 17] = (s16)b_int;
// Write the integer part of c and two zeroes for the 4th column.
*(s32*)(&dst[4 * i + 2]) = c_high;
// Write the fractional part of c and two zeroes for the 4th column
*(s32*)(&dst[4 * i + 18]) = c_low;
}
// Write 1.0 to the bottom right entry in the output matrix
// The low half was already set to zero in the loop, so we only need

View File

@@ -212,16 +212,16 @@ extern f32 gSineTable[];
}
#define vec2_copy_roundf(dst, src) { \
(dst)[0] = lroundf((src)[0]); \
(dst)[1] = lroundf((src)[1]); \
(dst)[0] = roundf((src)[0]); \
(dst)[1] = roundf((src)[1]); \
}
#define vec3_copy_roundf(dst, src) { \
vec2_copy_roundf((dst), (src)); \
(dst)[2] = lroundf((src)[2]); \
(dst)[2] = roundf((src)[2]); \
}
#define vec4_copy_roundf(dst, src) { \
vec3_copy_roundf((dst), (src)); \
(dst)[3] = lroundf((src)[3]); \
(dst)[3] = roundf((src)[3]); \
}
#define vec2_copy_inverse(dst, src) { \
@@ -451,32 +451,23 @@ extern f32 gSineTable[];
#define ABS(x) (((x) > 0) ? (x) : -(x))
#ifdef TARGET_N64
ALWAYS_INLINE s32 lroundf(f32 in) {
/// From Wiseguy
ALWAYS_INLINE s32 roundf(f32 in) {
f32 tmp;
s32 out;
__asm__("round.w.s %0,%1" : "=f" (tmp) : "f" (in ));
__asm__("mfc1 %0,%1" : "=r" (out) : "f" (tmp));
return out;
}
#else
long lroundf(f32 in);
#endif
// backwards compatibility
#define round_float(in) lroundf(in)
#define round_float(in) roundf(in)
#ifdef TARGET_N64
/// Absolute value
ALWAYS_INLINE f32 absf(f32 in) {
f32 out;
__asm__("abs.s %0,%1" : "=f" (out) : "f" (in));
return out;
}
#else
ALWAYS_INLINE f32 absf(f32 in) {
return in > 0 ? in : -in;
}
#endif
ALWAYS_INLINE s32 absi(s32 in) {
return ABS(in);
}

View File

@@ -914,200 +914,3 @@ s32 unused_resolve_floor_or_ceil_collisions(s32 checkCeil, f32 *px, f32 *py, f32
return 0;
}
/**************************************************
* RAYCASTING *
**************************************************/
#define RAY_OFFSET 30.0f /* How many units to extrapolate surfaces when testing for a raycast */
#define RAY_STEPS 4 /* How many steps to do when casting rays, default to quartersteps. */
/**
* @brief Checks if a ray intersects a surface using Möller–Trumbore intersection algorithm.
*
* @param orig is the starting point of the ray.
* @param dir is the normalized ray direction.
* @param dir_length is the length of the ray.
* @param surface is the surface to check.
* @param hit_pos returns the position on the surface where the ray intersects it.
* @param length returns the distance from the starting point to the hit position.
* @return s32 TRUE if the ray intersects a surface.
*/
s32 ray_surface_intersect(Vec3f orig, Vec3f dir, f32 dir_length, struct Surface *surface, Vec3f hit_pos, f32 *length) {
// Ignore certain surface types.
if ((surface->type == SURFACE_INTANGIBLE) || (surface->flags & SURFACE_FLAG_NO_CAM_COLLISION)) return FALSE;
// Convert the vertices to Vec3f.
Vec3f v0, v1, v2;
vec3s_to_vec3f(v0, surface->vertex1);
vec3s_to_vec3f(v1, surface->vertex2);
vec3s_to_vec3f(v2, surface->vertex3);
// Get surface normal and extend it by RAY_OFFSET.
Vec3f norm;
surface_normal_to_vec3f(norm, surface);
vec3_mul_val(norm, RAY_OFFSET);
// Move the face forward by RAY_OFFSET.
vec3f_add(v0, norm);
vec3f_add(v1, norm);
vec3f_add(v2, norm);
// Make 'e1' (edge 1) the vector from vertex 0 to vertex 1.
Vec3f e1;
vec3f_diff(e1, v1, v0);
// Make 'e2' (edge 2) the vector from vertex 0 to vertex 2.
Vec3f e2;
vec3f_diff(e2, v2, v0);
// Make 'h' the cross product of 'dir' and edge 2.
Vec3f h;
vec3f_cross(h, dir, e2);
// Determine the cos(angle) difference between ray and surface normals.
f32 det = vec3f_dot(e1, h);
// Check if we're perpendicular from the surface.
if ((det > -NEAR_ZERO) && (det < NEAR_ZERO)) return FALSE;
// Check if we're making contact with the surface.
// Make f the inverse of the cos(angle) between ray and surface normals.
f32 f = 1.0f / det; // invDet
// Make 's' the vector from vertex 0 to 'orig'.
Vec3f s;
vec3f_diff(s, orig, v0);
// Make 'u' the cos(angle) between vectors 's' and normals, divided by 'det'.
f32 u = f * vec3f_dot(s, h);
// Check if 'u' is within bounds.
if ((u < 0.0f) || (u > 1.0f)) return FALSE;
// Make 'q' the cross product of 's' and edge 1.
Vec3f q;
vec3f_cross(q, s, e1);
// Make 'v' the cos(angle) between the ray and 'q', divided by 'det'.
f32 v = f * vec3f_dot(dir, q);
// Check if 'v' is within bounds.
if ((v < 0.0f) || ((u + v) > 1.0f)) return FALSE;
// Get the length between our origin and the surface contact point.
// Make '*length' the cos(angle) betqwwn edge 2 and 'q', divided by 'det'.
*length = f * vec3f_dot(e2, q);
// Check if the length to the hit point is shorter than the ray length.
if ((*length <= NEAR_ZERO) || (*length > dir_length)) return FALSE;
// Successful contact.
// Make 'add_dir' into 'dir' scaled by 'length'.
Vec3f add_dir;
vec3_prod_val(add_dir, dir, *length);
// Make 'hit_pos' into the sum of 'orig' and 'add_dir'.
vec3f_sum(hit_pos, orig, add_dir);
return TRUE;
}
void find_surface_on_ray_list(struct SurfaceNode *list, Vec3f orig, Vec3f dir, f32 dir_length, struct Surface **hit_surface, Vec3f hit_pos, f32 *max_length) {
s32 hit;
f32 length;
Vec3f chk_hit_pos;
f32 top, bottom;
#if PUPPYPRINT_DEBUG
OSTime first = osGetTime();
#endif
// Get upper and lower bounds of ray
if (dir[1] >= 0.0f) {
// Ray is upwards.
top = orig[1] + (dir[1] * dir_length);
bottom = orig[1];
} else {
// Ray is downwards.
top = orig[1];
bottom = orig[1] + (dir[1] * dir_length);
}
// Iterate through every surface of the list
for (; list != NULL; list = list->next) {
// Reject surface if out of vertical bounds
if ((list->surface->lowerY > top) || (list->surface->upperY < bottom)) continue;
// Check intersection between the ray and this surface
hit = ray_surface_intersect(orig, dir, dir_length, list->surface, chk_hit_pos, &length);
if (hit && (length <= *max_length)) {
*hit_surface = list->surface;
vec3f_copy(hit_pos, chk_hit_pos);
*max_length = length;
}
}
#if PUPPYPRINT_DEBUG
collisionTime[perfIteration] += osGetTime() - first;
#endif
}
void find_surface_on_ray_cell(s32 cellX, s32 cellZ, Vec3f orig, Vec3f normalized_dir, f32 dir_length, struct Surface **hit_surface, Vec3f hit_pos, f32 *max_length, s32 flags) {
// Skip if OOB
if ((cellX >= 0) && (cellX <= (NUM_CELLS - 1)) && (cellZ >= 0) && (cellZ <= (NUM_CELLS - 1))) {
// Iterate through each surface in this partition
if ((normalized_dir[1] > -NEAR_ONE) && (flags & RAYCAST_FIND_CEIL)) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_CEILS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_CEILS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
if ((normalized_dir[1] < NEAR_ONE) && (flags & RAYCAST_FIND_FLOOR)) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_FLOORS].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_FLOORS].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
if (flags & RAYCAST_FIND_WALL) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WALLS ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
if (flags & RAYCAST_FIND_WATER) {
find_surface_on_ray_list( gStaticSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WATER ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
find_surface_on_ray_list(gDynamicSurfacePartition[cellZ][cellX][SPATIAL_PARTITION_WATER ].next, orig, normalized_dir, dir_length, hit_surface, hit_pos, max_length);
}
}
}
void find_surface_on_ray(Vec3f orig, Vec3f dir, struct Surface **hit_surface, Vec3f hit_pos, s32 flags) {
Vec3f normalized_dir;
f32 step;
s32 i;
const f32 invcell = 1.0f / CELL_SIZE;
// Set that no surface has been hit
*hit_surface = NULL;
vec3f_sum(hit_pos, orig, dir);
// Get normalized direction
f32 dir_length = vec3_mag(dir);
f32 max_length = dir_length;
vec3f_copy(normalized_dir, dir);
vec3f_normalize(normalized_dir);
// Get our cell coordinate
f32 fCellX = (orig[0] + LEVEL_BOUNDARY_MAX) * invcell;
f32 fCellZ = (orig[2] + LEVEL_BOUNDARY_MAX) * invcell;
s32 cellX = fCellX;
s32 cellZ = fCellZ;
s32 cellPrevX = cellX;
s32 cellPrevZ = cellZ;
// Don't do DDA if straight down
if ((normalized_dir[1] >= NEAR_ONE) || (normalized_dir[1] <= -NEAR_ONE)) {
find_surface_on_ray_cell(cellX, cellZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
return;
}
// Get cells we cross using DDA
f32 absDir0 = absf(dir[0]);
f32 absDir2 = absf(dir[2]);
if (absDir0 >= absDir2) {
step = (RAY_STEPS * absDir0) * invcell;
} else {
step = (RAY_STEPS * absDir2) * invcell;
}
f32 dx = (dir[0] / step) * invcell;
f32 dz = (dir[2] / step) * invcell;
for (i = 0; i < step && *hit_surface == NULL; i++) {
find_surface_on_ray_cell(cellX, cellZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
// Move cell coordinate
fCellX += dx;
fCellZ += dz;
cellPrevX = cellX;
cellPrevZ = cellZ;
cellX = fCellX;
cellZ = fCellZ;
if ((cellPrevX != cellX) && (cellPrevZ != cellZ)) {
find_surface_on_ray_cell(cellX, cellPrevZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
find_surface_on_ray_cell(cellPrevX, cellZ, orig, normalized_dir, dir_length, hit_surface, hit_pos, &max_length, flags);
}
}
}

View File

@@ -1448,7 +1448,7 @@ void render_hud_cannon_reticle(void) {
gSPDisplayList(gDisplayListHead++, dl_draw_triangle);
gSPPopMatrix(gDisplayListHead++, G_MTX_MODELVIEW);
gSPPopMatrix(gDisplayListHead++, G_MTX_MODELVIEW);
// gSPPopMatrix(gDisplayListHead++, G_MTX_MODELVIEW);
}
void reset_red_coins_collected(void) {

View File

@@ -361,6 +361,16 @@ void save_file_load_all(void) {
}
#ifdef PUPPYCAM
void puppycam_check_save(void) {
if (gSaveBuffer.menuData.firstBoot != 4
|| gSaveBuffer.menuData.saveOptions.sensitivityX < 5
|| gSaveBuffer.menuData.saveOptions.sensitivityY < 5) {
wipe_main_menu_data();
gSaveBuffer.menuData.firstBoot = 4;
puppycam_default_config();
}
}
void puppycam_get_save(void) {
gPuppyCam.options = gSaveBuffer.menuData.saveOptions;
@@ -384,15 +394,6 @@ void puppycam_set_save(void) {
gMainMenuDataModified = TRUE;
save_main_menu_data();
}
void puppycam_check_save(void) {
if (gSaveBuffer.menuData.firstBoot != 4) {
wipe_main_menu_data();
gSaveBuffer.menuData.firstBoot = 4;
puppycam_default_config();
puppycam_set_save();
}
}
#endif
/**

View File

@@ -130,8 +130,8 @@ void make_tex_transition_vertex(Vtx *verts, s32 n, s8 fadeTimer, struct WarpTran
u16 zeroTimer = sTransitionTextureFadeCount[fadeTimer];
f32 centerX = texRadius1 * coss(zeroTimer) - texRadius2 * sins(zeroTimer) + centerTransX;
f32 centerY = texRadius1 * sins(zeroTimer) + texRadius2 * coss(zeroTimer) + centerTransY;
s16 x = lroundf(centerX);
s16 y = lroundf(centerY);
s16 x = roundf(centerX);
s16 y = roundf(centerY);
make_vertex(verts, n, x, y, -1, tx * 32, ty * 32, r, g, b, 255);
}

View File

@@ -161,7 +161,7 @@ s32 calculate_skybox_scaled_y(s8 player, UNUSED f32 fov) {
// Scale by 360 / fov
f32 degreesToScale = 360.0f * pitchInDegrees / 90.0f;
s32 roundedY = lroundf(degreesToScale);
s32 roundedY = roundf(degreesToScale);
// Since pitch can be negative, and the tile grid starts 1 octant above the camera's focus, add
// 5 octants to the y position