Submitting on behalf of Refik.Karic

ISPC templates integration
#rb alex.mcadams, jeff.rous

[CL 31018238 by alex mcadams in ue5-main branch]
This commit is contained in:
alex mcadams
2024-01-30 15:20:51 -05:00
parent 3c9e10eb06
commit 85516efd79
7 changed files with 1338 additions and 856 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -176,14 +176,8 @@ inline uniform FVector MatrixGetOrigin(const uniform FMatrix &M)
return SetVector(M.M[12], M.M[13], M.M[14]);
}
inline void MatrixGetScaledAxes(const uniform FMatrix44d &M, uniform FVector3d &X, uniform FVector3d &Y, uniform FVector3d &Z)
{
X = SetVector(M.M[0], M.M[1], M.M[2]);
Y = SetVector(M.M[4], M.M[5], M.M[6]);
Z = SetVector(M.M[8], M.M[9], M.M[10]);
}
inline void MatrixGetScaledAxes(const uniform FMatrix44f &M, uniform FVector3f &X, uniform FVector3f &Y, uniform FVector3f &Z)
template <typename T, typename V>
inline void MatrixGetScaledAxes(const T& M, V &X, V &Y, V &Z)
{
X = SetVector(M.M[0], M.M[1], M.M[2]);
Y = SetVector(M.M[4], M.M[5], M.M[6]);
@@ -271,41 +265,27 @@ inline uniform FMatrix MatrixTranspose(const uniform FMatrix& M)
// we use __m128 to represent 2x2 matrix as A = | A0 A1 |
// | A2 A3 |
// 2x2 row major Matrix multiply A*B
static inline uniform FVector4d Mat2Mul(const uniform FVector4d& vec1, const uniform FVector4d& vec2)
{
return
VectorAdd(VectorMultiply( vec1, VectorSwizzle(vec2, 0,3,0,3)),
VectorMultiply(VectorSwizzle(vec1, 1,0,3,2), VectorSwizzle(vec2, 2,1,2,1)));
}
static inline uniform FVector4f Mat2Mul(const uniform FVector4f& vec1, const uniform FVector4f& vec2)
template <typename T>
static inline uniform T Mat2Mul(const uniform T& vec1, const uniform T& vec2)
{
return
VectorAdd(VectorMultiply( vec1, VectorSwizzle(vec2, 0,3,0,3)),
VectorMultiply(VectorSwizzle(vec1, 1,0,3,2), VectorSwizzle(vec2, 2,1,2,1)));
}
// 2x2 row major Matrix adjugate multiply (A#)*B
static inline uniform FVector4d Mat2AdjMul(const uniform FVector4d& vec1, const uniform FVector4d& vec2)
template <typename T>
static inline uniform T Mat2AdjMul(const uniform T& vec1, const uniform T& vec2)
{
return
VectorSubtract(VectorMultiply(VectorSwizzle(vec1, 3,3,0,0), vec2),
VectorMultiply(VectorSwizzle(vec1, 1,1,2,2), VectorSwizzle(vec2, 2,3,0,1)));
}
static inline uniform FVector4f Mat2AdjMul(const uniform FVector4f& vec1, const uniform FVector4f& vec2)
{
return
VectorSubtract(VectorMultiply(VectorSwizzle(vec1, 3,3,0,0), vec2),
VectorMultiply(VectorSwizzle(vec1, 1,1,2,2), VectorSwizzle(vec2, 2,3,0,1)));
}
// 2x2 row major Matrix multiply adjugate A*(B#)
static inline uniform FVector4d Mat2MulAdj(const uniform FVector4d& vec1, const uniform FVector4d& vec2)
{
return
VectorSubtract(VectorMultiply( vec1, VectorSwizzle(vec2, 3,0,3,0)),
VectorMultiply(VectorSwizzle(vec1, 1,0,3,2), VectorSwizzle(vec2, 2,1,2,1)));
}
static inline uniform FVector4f Mat2MulAdj(const uniform FVector4f& vec1, const uniform FVector4f& vec2)
template <typename T>
static inline uniform T Mat2MulAdj(const uniform T& vec1, const uniform T& vec2)
{
return
VectorSubtract(VectorMultiply( vec1, VectorSwizzle(vec2, 3,0,3,0)),
@@ -572,57 +552,10 @@ inline uniform FMatrix44f MatrixInverse(const uniform FMatrix44f& M)
return Result;
}
inline uniform FVector4d VectorTransformVector(const uniform FVector4d &VecP, const uniform FMatrix44d &M)
template <typename T, typename V>
inline T VectorTransformVector(const T& VecP, const V& M)
{
uniform FVector4d VTempX, VTempY, VTempZ, VTempW;
// Splat x,y,z and w
VTempX = VectorReplicate(VecP, 0);
VTempY = VectorReplicate(VecP, 1);
VTempZ = VectorReplicate(VecP, 2);
VTempW = VectorReplicate(VecP, 3);
// Mul by the matrix
VTempX = VectorMultiply(VTempX, SetVector4(M.M[0], M.M[1], M.M[2], M.M[3]));
VTempY = VectorMultiply(VTempY, SetVector4(M.M[4], M.M[5], M.M[6], M.M[7]));
VTempZ = VectorMultiply(VTempZ, SetVector4(M.M[8], M.M[9], M.M[10], M.M[11]));
VTempW = VectorMultiply(VTempW, SetVector4(M.M[12], M.M[13], M.M[14], M.M[15]));
// Add them all together
VTempX = VectorAdd(VTempX, VTempY);
VTempZ = VectorAdd(VTempZ, VTempW);
VTempX = VectorAdd(VTempX, VTempZ);
return VTempX;
}
inline uniform FVector4f VectorTransformVector(const uniform FVector4f &VecP, const uniform FMatrix44f &M)
{
uniform FVector4f VTempX, VTempY, VTempZ, VTempW;
// Splat x,y,z and w
VTempX = VectorReplicate(VecP, 0);
VTempY = VectorReplicate(VecP, 1);
VTempZ = VectorReplicate(VecP, 2);
VTempW = VectorReplicate(VecP, 3);
// Mul by the matrix
VTempX = VectorMultiply(VTempX, SetVector4(M.M[0], M.M[1], M.M[2], M.M[3]));
VTempY = VectorMultiply(VTempY, SetVector4(M.M[4], M.M[5], M.M[6], M.M[7]));
VTempZ = VectorMultiply(VTempZ, SetVector4(M.M[8], M.M[9], M.M[10], M.M[11]));
VTempW = VectorMultiply(VTempW, SetVector4(M.M[12], M.M[13], M.M[14], M.M[15]));
// Add them all together
VTempX = VectorAdd(VTempX, VTempY);
VTempZ = VectorAdd(VTempZ, VTempW);
VTempX = VectorAdd(VTempX, VTempZ);
return VTempX;
}
inline FVector4 VectorTransformVector(const FVector4 &VecP, const FMatrix &M)
{
FVector4 VTempX, VTempY, VTempZ, VTempW;
T VTempX, VTempY, VTempZ, VTempW;
// Splat x,y,z and w
VTempX = VectorReplicate(VecP, 0);
@@ -715,6 +648,30 @@ inline FVector3f MatrixTransformPosition(const FVector3f &P, const uniform FMatr
return VTempX;
}
// Calculate homogeneous transform. W component assumed to be 1.0
inline uniform FVector3f MatrixTransformPosition(const uniform FVector3f& P, const uniform FMatrix44f& M)
{
uniform FVector3f VTempX, VTempY, VTempZ;
// Splat x,y,z
VTempX = SetVector(P.V[0], P.V[0], P.V[0]);
VTempY = SetVector(P.V[1], P.V[1], P.V[1]);
VTempZ = SetVector(P.V[2], P.V[2], P.V[2]);
// Mul by the matrix
VTempX = VTempX * SetVector(M.M[0], M.M[1], M.M[2]);
VTempY = VTempY * SetVector(M.M[4], M.M[5], M.M[6]);
VTempZ = VTempZ * SetVector(M.M[8], M.M[9], M.M[10]);
const uniform FVector3f VTempW = SetVector(M.M[12], M.M[13], M.M[14]);
// Add them all together
VTempX = VTempX + VTempY;
VTempZ = VTempZ + VTempW;
VTempX = VTempX + VTempZ;
return VTempX;
}
// Calculate homogeneous transform. W component assumed to be 0.0
inline FVector MatrixTransformVector(const FVector &P, const FMatrix &M)
{
@@ -746,17 +703,8 @@ inline uniform FVector3f MatrixInverseTransformVector(const uniform FMatrix44f &
return SetVector(VectorTransformVector(SetVector4(V, FLOAT_ZERO), InvSelf));
}
inline uniform FMatrix44d MatrixReduceAdd(const varying FMatrix44d &M)
{
return SetMatrix(
SetVector4(reduce_add(M.M[0]), reduce_add(M.M[1]), reduce_add(M.M[2]), reduce_add(M.M[3])),
SetVector4(reduce_add(M.M[4]), reduce_add(M.M[5]), reduce_add(M.M[6]), reduce_add(M.M[7])),
SetVector4(reduce_add(M.M[8]), reduce_add(M.M[9]), reduce_add(M.M[10]), reduce_add(M.M[11])),
SetVector4(reduce_add(M.M[12]), reduce_add(M.M[13]), reduce_add(M.M[14]), reduce_add(M.M[15]))
);
}
inline uniform FMatrix44f MatrixReduceAdd(const varying FMatrix44f &M)
template <typename T>
inline uniform T MatrixReduceAdd(const varying T& M)
{
return SetMatrix(
SetVector4(reduce_add(M.M[0]), reduce_add(M.M[1]), reduce_add(M.M[2]), reduce_add(M.M[3])),

View File

@@ -80,19 +80,10 @@ inline uniform FVector4 MatrixToQuat(const uniform FMatrix &M)
* @param Quat2 Pointer to the second quaternion
* @return Quat1 * Quat2
*/
inline FVector4 VectorQuaternionMultiply2( const FVector4& Quat1, const FVector4& Quat2 )
template <typename T>
inline T VectorQuaternionMultiply2(const T& Quat1, const T& Quat2)
{
FVector4 Result = VectorReplicate(Quat1, 3) * Quat2;
Result = VectorMultiplyAdd((VectorReplicate(Quat1, 0) * VectorSwizzle(Quat2, 3,2,1,0)), QMULTI_SIGN_MASK0, Result);
Result = VectorMultiplyAdd((VectorReplicate(Quat1, 1) * VectorSwizzle(Quat2, 2,3,0,1)), QMULTI_SIGN_MASK1, Result);
Result = VectorMultiplyAdd((VectorReplicate(Quat1, 2) * VectorSwizzle(Quat2, 1,0,3,2)), QMULTI_SIGN_MASK2, Result);
return Result;
}
inline uniform FVector4 VectorQuaternionMultiply2( const uniform FVector4& Quat1, const uniform FVector4& Quat2 )
{
uniform FVector4 Result = VectorReplicate(Quat1, 3) * Quat2;
T Result = VectorReplicate(Quat1, 3) * Quat2;
Result = VectorMultiplyAdd((VectorReplicate(Quat1, 0) * VectorSwizzle(Quat2, 3,2,1,0)), QMULTI_SIGN_MASK0, Result);
Result = VectorMultiplyAdd((VectorReplicate(Quat1, 1) * VectorSwizzle(Quat2, 2,3,0,1)), QMULTI_SIGN_MASK1, Result);
Result = VectorMultiplyAdd((VectorReplicate(Quat1, 2) * VectorSwizzle(Quat2, 1,0,3,2)), QMULTI_SIGN_MASK2, Result);
@@ -131,28 +122,13 @@ inline uniform FVector4f QuatInverse(const uniform FVector4f &Quat)
return Quat * FLOAT_QINV_SIGN_MASK;
}
inline FVector4d QuatFastLerp(const FVector4d& A, const FVector4d& B, const double Alpha)
template <typename T, typename F>
inline T QuatFastLerp(const T& A, const T& B, const F Alpha)
{
// To ensure the 'shortest route', we make sure the dot product between the both rotations is positive.
const double DotResult = VectorDot(A, B);
const double Bias = select(DotResult >= 0.d, 1.d, -1.d);
return (B * Alpha) + (A * (Bias * (1.d - Alpha)));
}
inline FVector4f QuatFastLerp(const FVector4f& A, const FVector4f& B, const float Alpha)
{
// To ensure the 'shortest route', we make sure the dot product between the both rotations is positive.
const float DotResult = VectorDot(A, B);
const float Bias = select(DotResult >= 0.f, 1.f, -1.f);
return (B * Alpha) + (A * (Bias * (1.f - Alpha)));
}
inline uniform FVector4 QuatFastLerp(const uniform FVector4& A, const uniform FVector4& B, const uniform FReal Alpha)
{
// To ensure the 'shortest route', we make sure the dot product between the both rotations is positive.
const uniform FReal DotResult = VectorDot(A, B);
const uniform FReal Bias = select(DotResult >= ZERO, ONE, -ONE);
return (B * Alpha) + (A * (Bias * (ONE - Alpha)));
const F DotResult = VectorDot(A, B);
const F Bias = select(DotResult >= 0, 1, -1);
return (B * Alpha) + (A * (Bias * (1 - Alpha)));
}
// A and B are quaternions. The result is A + (|A.B| >= 0 ? 1 : -1) * B
@@ -176,7 +152,8 @@ inline uniform FVector4 VectorAccumulateQuaternionShortestPath(const uniform FVe
* @param VectorW0 Vector to rotate. W component must be zero.
* @return Vector after rotation by Quat.
*/
inline FVector4d VectorQuaternionRotateVector(const FVector4d& Quat, const FVector4d& VectorW0)
template<typename T, typename V>
inline V VectorQuaternionRotateVector(const T& Quat, const V& VectorW0)
{
// Q * V * Q.Inverse
//const VectorRegister InverseRotation = VectorQuaternionInverse(Quat);
@@ -191,136 +168,17 @@ inline FVector4d VectorQuaternionRotateVector(const FVector4d& Quat, const FVect
// T = 2(Q x V);
// V' = V + w*(T) + (Q x T)
const FVector4d QW = VectorReplicate(Quat, 3);
FVector4d T = VectorCross(Quat, VectorW0);
T = VectorAdd(T, T);
const FVector4d VTemp0 = VectorMultiplyAdd(QW, T, VectorW0);
const FVector4d VTemp1 = VectorCross(Quat, T);
const FVector4d Rotated = VectorAdd(VTemp0, VTemp1);
const V QW = VectorReplicate(Quat, 3);
V Q = VectorCross(Quat, VectorW0);
Q = VectorAdd(Q, Q);
const V VTemp0 = VectorMultiplyAdd(QW, Q, VectorW0);
const V VTemp1 = VectorCross(Quat, Q);
const V Rotated = VectorAdd(VTemp0, VTemp1);
return Rotated;
}
inline FVector4f VectorQuaternionRotateVector(const FVector4f& Quat, const FVector4f& VectorW0)
{
// Q * V * Q.Inverse
//const VectorRegister InverseRotation = VectorQuaternionInverse(Quat);
//const VectorRegister Temp = VectorQuaternionMultiply2(Quat, VectorW0);
//const VectorRegister Rotated = VectorQuaternionMultiply2(Temp, InverseRotation);
// Equivalence of above can be shown to be:
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
// refactor:
// V' = V + w(2(Q x V)) + (Q x (2(Q x V)))
// T = 2(Q x V);
// V' = V + w*(T) + (Q x T)
const FVector4f QW = VectorReplicate(Quat, 3);
FVector4f T = VectorCross(Quat, VectorW0);
T = VectorAdd(T, T);
const FVector4f VTemp0 = VectorMultiplyAdd(QW, T, VectorW0);
const FVector4f VTemp1 = VectorCross(Quat, T);
const FVector4f Rotated = VectorAdd(VTemp0, VTemp1);
return Rotated;
}
inline FVector4 VectorQuaternionRotateVector(const uniform FVector4& Quat, const FVector4& VectorW0)
{
// Q * V * Q.Inverse
//const VectorRegister InverseRotation = VectorQuaternionInverse(Quat);
//const VectorRegister Temp = VectorQuaternionMultiply2(Quat, VectorW0);
//const VectorRegister Rotated = VectorQuaternionMultiply2(Temp, InverseRotation);
// Equivalence of above can be shown to be:
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
// refactor:
// V' = V + w(2(Q x V)) + (Q x (2(Q x V)))
// T = 2(Q x V);
// V' = V + w*(T) + (Q x T)
const uniform FVector4 QW = VectorReplicate(Quat, 3);
FVector4 T = VectorCross(Quat, VectorW0);
T = VectorAdd(T, T);
const FVector4 VTemp0 = VectorMultiplyAdd(QW, T, VectorW0);
const FVector4 VTemp1 = VectorCross(Quat, T);
const FVector4 Rotated = VectorAdd(VTemp0, VTemp1);
return Rotated;
}
inline FVector4f VectorQuaternionRotateVector(const uniform FVector4f& Quat, const FVector4f& VectorW0)
{
// Q * V * Q.Inverse
//const VectorRegister InverseRotation = VectorQuaternionInverse(Quat);
//const VectorRegister Temp = VectorQuaternionMultiply2(Quat, VectorW0);
//const VectorRegister Rotated = VectorQuaternionMultiply2(Temp, InverseRotation);
// Equivalence of above can be shown to be:
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
// refactor:
// V' = V + w(2(Q x V)) + (Q x (2(Q x V)))
// T = 2(Q x V);
// V' = V + w*(T) + (Q x T)
const uniform FVector4f QW = VectorReplicate(Quat, 3);
FVector4f T = VectorCross(Quat, VectorW0);
T = VectorAdd(T, T);
const FVector4f VTemp0 = VectorMultiplyAdd(QW, T, VectorW0);
const FVector4f VTemp1 = VectorCross(Quat, T);
const FVector4f Rotated = VectorAdd(VTemp0, VTemp1);
return Rotated;
}
inline uniform FVector4d VectorQuaternionRotateVector(const uniform FVector4d& Quat, const uniform FVector4d& VectorW0)
{
// Q * V * Q.Inverse
//const VectorRegister InverseRotation = VectorQuaternionInverse(Quat);
//const VectorRegister Temp = VectorQuaternionMultiply2(Quat, VectorW0);
//const VectorRegister Rotated = VectorQuaternionMultiply2(Temp, InverseRotation);
// Equivalence of above can be shown to be:
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
// refactor:
// V' = V + w(2(Q x V)) + (Q x (2(Q x V)))
// T = 2(Q x V);
// V' = V + w*(T) + (Q x T)
const uniform FVector4d QW = VectorReplicate(Quat, 3);
uniform FVector4d T = VectorCross(Quat, VectorW0);
T = VectorAdd(T, T);
const uniform FVector4d VTemp0 = VectorMultiplyAdd(QW, T, VectorW0);
const uniform FVector4d VTemp1 = VectorCross(Quat, T);
const uniform FVector4d Rotated = VectorAdd(VTemp0, VTemp1);
return Rotated;
}
inline uniform FVector4f VectorQuaternionRotateVector(const uniform FVector4f& Quat, const uniform FVector4f& VectorW0)
{
// Q * V * Q.Inverse
//const VectorRegister InverseRotation = VectorQuaternionInverse(Quat);
//const VectorRegister Temp = VectorQuaternionMultiply2(Quat, VectorW0);
//const VectorRegister Rotated = VectorQuaternionMultiply2(Temp, InverseRotation);
// Equivalence of above can be shown to be:
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
// refactor:
// V' = V + w(2(Q x V)) + (Q x (2(Q x V)))
// T = 2(Q x V);
// V' = V + w*(T) + (Q x T)
const uniform FVector4f QW = VectorReplicate(Quat, 3);
uniform FVector4f T = VectorCross(Quat, VectorW0);
T = VectorAdd(T, T);
const uniform FVector4f VTemp0 = VectorMultiplyAdd(QW, T, VectorW0);
const uniform FVector4f VTemp1 = VectorCross(Quat, T);
const uniform FVector4f Rotated = VectorAdd(VTemp0, VTemp1);
return Rotated;
}
inline uniform FVector VectorQuaternionRotateVector(const uniform FVector4& Quat, const uniform FVector& V)
template<>
inline uniform FVector VectorQuaternionRotateVector<uniform FVector4, uniform FVector>(const uniform FVector4& Quat, const uniform FVector& V)
{
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
@@ -335,7 +193,8 @@ inline uniform FVector VectorQuaternionRotateVector(const uniform FVector4& Quat
return Result;
}
inline FVector3d VectorQuaternionRotateVector(const FVector4d& Quat, const FVector3d& V)
template<>
inline FVector3d VectorQuaternionRotateVector<FVector4d, FVector3d>(const FVector4d& Quat, const FVector3d& V)
{
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
@@ -350,7 +209,8 @@ inline FVector3d VectorQuaternionRotateVector(const FVector4d& Quat, const FVect
return Result;
}
inline FVector3f VectorQuaternionRotateVector(const FVector4f& Quat, const FVector3f& V)
template<>
inline FVector3f VectorQuaternionRotateVector<FVector4f, FVector3f>(const FVector4f& Quat, const FVector3f& V)
{
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
@@ -365,7 +225,8 @@ inline FVector3f VectorQuaternionRotateVector(const FVector4f& Quat, const FVect
return Result;
}
inline FVector3d VectorQuaternionRotateVector(const uniform FVector4d& Quat, const FVector3d& V)
template<>
inline FVector3d VectorQuaternionRotateVector<uniform FVector4d, FVector3d>(const uniform FVector4d& Quat, const FVector3d& V)
{
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
@@ -380,7 +241,8 @@ inline FVector3d VectorQuaternionRotateVector(const uniform FVector4d& Quat, con
return Result;
}
inline FVector3f VectorQuaternionRotateVector(const uniform FVector4f& Quat, const FVector3f& V)
template<>
inline FVector3f VectorQuaternionRotateVector<uniform FVector4f, FVector3f>(const uniform FVector4f& Quat, const FVector3f& V)
{
// http://people.csail.mit.edu/bkph/articles/Quaternions.pdf
// V' = V + 2w(Q x V) + (2Q x (Q x V))
@@ -395,7 +257,8 @@ inline FVector3f VectorQuaternionRotateVector(const uniform FVector4f& Quat, con
return Result;
}
inline uniform FVector8 VectorQuaternionRotateVector(const uniform FVector8& Quat, const uniform FVector8& VectorW0)
template<>
inline uniform FVector8 VectorQuaternionRotateVector<uniform FVector8, uniform FVector8>(const uniform FVector8& Quat, const uniform FVector8& VectorW0)
{
const uniform FVector8 QW = VectorReplicate(Quat, 3);
uniform FVector8 T = VectorCross(Quat, VectorW0);

File diff suppressed because it is too large Load Diff

View File

@@ -347,7 +347,7 @@ inline FVector TransformPosition(const uniform FTransform &T, const FVector& V)
const FVector4 ScaledVec = VectorMultiply(T.Scale3D, InputVectorW0);
const FVector4 RotatedVec = VectorQuaternionRotateVector(T.Rotation, ScaledVec);
const FVector4 TranslatedVec = VectorAdd(RotatedVec, T.Translation);
const FVector4 TranslatedVec = VectorAdd(RotatedVec, (const varying FVector4)T.Translation);
return SetVector(TranslatedVec);
}
@@ -363,7 +363,7 @@ inline FVector3f TransformPosition(const uniform FTransform3f &T, const FVector3
const FVector4f ScaledVec = VectorMultiply(T.Scale3D, InputVectorW0);
const FVector4f RotatedVec = VectorQuaternionRotateVector(T.Rotation, ScaledVec);
const FVector4f TranslatedVec = VectorAdd(RotatedVec, T.Translation);
const FVector4f TranslatedVec = VectorAdd(RotatedVec, (const varying FVector4f)SetVector4(T.Translation.V[0], T.Translation.V[1], T.Translation.V[2], 0.f));
return SetVector(TranslatedVec);
}

File diff suppressed because it is too large Load Diff

View File

@@ -86,22 +86,26 @@ unmasked inline uniform WideFVector4 VectorSwizzle(const uniform WideFVector4 &V
return Result;
}
unmasked inline uniform WideFVector4 VectorReplicate(const uniform WideFVector4 &Vec, const uniform int R)
template<>
inline uniform WideFVector4 VectorReplicate<uniform WideFVector4>(const uniform WideFVector4 &Vec, const uniform int R)
{
#if TARGET_WIDTH == 4
const varying int vPerm = { R, R, R, R };
#elif TARGET_WIDTH == 8
const varying int vPerm = { R, R, R, R, R+4, R+4, R+4, R+4 };
#elif TARGET_WIDTH == 16
const varying int vPerm = { R, R, R, R, R+4, R+4, R+4, R+4, R+8, R+8, R+8, R+8, R+12, R+12, R+12, R+12 };
#endif
unmasked
{
#if TARGET_WIDTH == 4
const varying int vPerm = { R, R, R, R };
#elif TARGET_WIDTH == 8
const varying int vPerm = { R, R, R, R, R+4, R+4, R+4, R+4 };
#elif TARGET_WIDTH == 16
const varying int vPerm = { R, R, R, R, R+4, R+4, R+4, R+4, R+8, R+8, R+8, R+8, R+12, R+12, R+12, R+12 };
#endif
const FReal V = Vec.V[programIndex];
const FReal S = shuffle(V, vPerm);
uniform WideFVector4 Result;
Result.V[programIndex] = S;
const FReal V = Vec.V[programIndex];
const FReal S = shuffle(V, vPerm);
uniform WideFVector4 Result;
Result.V[programIndex] = S;
return Result;
return Result;
}
}
unmasked inline uniform WideFVector4 VectorCompareGE(const uniform WideFVector4 &A, const uniform WideFVector4 &B)