{Soft-Float] - Implemented stop-gap Div rounding mode + implements so…

…ftfloat on some extra obscure VU ops and fixes a denormals check on the checkDivideByZero method in the FPU. Fixes: - Final Fantasy X (fully playable) - Klonoa 2 Partially Fixes: - Mortal combat Shaloin Monks - Gran Turismo 4 (game patch will be neceassary to skip Licence Test CRC check). - Tourist Trophy (game patch will be neceassary to skip Licence Test CRC check). The stop gap div measure is not yet enough to fully fix GT4/TouristTrophy (they will need different rounding modes per licences). Currently this is bridged on the Div Rounding Mode setting.
PCSX2 · Dec 27, 2024 · 5b9f003 · 5b9f003
1 parent 5b94f53
commit 5b9f003
Show file tree

Hide file tree

Showing 5 changed files with 136 additions and 92 deletions.
diff --git a/common/BitUtils.h b/common/BitUtils.h
@@ -119,16 +119,6 @@ namespace Common
 	{
 		return msb[b];
 	}
-
-	__fi static s32 GetMostSignificantBitPosition(u32 value)
-	{
-		for (s32 i = 31; i >= 0; i--)
-		{
-			if (((value >> i) & 1) != 0)
-				return i;
-		}
-		return -1;
-	}
 } // namespace Common
 
 template <typename T>

diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp
@@ -140,13 +140,21 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT
 		PS2Float yMatrix = PS2Float(yDivisorReg);
 		PS2Float zMatrix = PS2Float(zDividendReg);
 
-		if (yMatrix.IsZero())
+		// Check Final Fantasy X controls and Klonoa 2 to test this code, they send a bunch of denormals which are often hack-fixed on the game code.
+		if (zMatrix.IsDenormalized() || yMatrix.IsDenormalized())
 		{
-			bool dividendZero = zMatrix.IsZero();
+			_ContVal_ |= 0;
+			xReg = PS2Float::SolveDivisionDenormalizedOperation(zMatrix, yMatrix).raw;
+			return true;
+		}
+
+		if (zMatrix.IsZero())
+		{
+			bool dividendZero = yMatrix.IsZero();
 
 			_ContVal_ |= dividendZero ? cFlagsToSet2 : cFlagsToSet1;
 
-			bool IsSigned = yMatrix.Sign() ^ zMatrix.Sign();
+			bool IsSigned = zMatrix.Sign() ^ yMatrix.Sign();
 
 			if (dividendZero)
 				xReg = IsSigned ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE;

diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp
@@ -76,14 +76,18 @@ u64 PS2Float::MulMantissa(u32 a, u32 b)
 // Float Processor
 //****************************************************************
 
+PS2Float::PS2Float(s32 value) { raw = (u32)value; }
+
 PS2Float::PS2Float(u32 value) { raw = value; }
 
+PS2Float::PS2Float(float value) { raw = std::bit_cast<u32>(value); }
+
 PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa)
 {
 	raw = 0;
 	raw |= (sign ? 1u : 0u) << 31;
 	raw |= (u32)(exponent << 23);
-	raw |= mantissa;
+	raw |= mantissa & 0x7FFFFF;
 }
 
 PS2Float PS2Float::Max()
@@ -285,6 +289,26 @@ PS2Float PS2Float::Rsqrt(PS2Float other)
 	return Div(other.Sqrt());
 }
 
+PS2Float PS2Float::Pow(s32 exponent)
+{
+	PS2Float result = PS2Float::One(); // Start with 1, since any number raised to the power of 0 is 1
+
+	if (exponent != 0)
+	{
+		s32 exp = abs(exponent);
+
+		for (s32 i = 0; i < exp; i++)
+		{
+			result = result.Mul(*this);
+		}
+	}
+
+	if (exponent < 0)
+		return PS2Float::One().Div(result);
+	else
+		return result;
+}
+
 bool PS2Float::IsDenormalized()
 {
 	return Exponent() == 0;
@@ -312,11 +336,6 @@ PS2Float PS2Float::Negate()
 	return PS2Float(raw ^ 0x80000000);
 }
 
-PS2Float PS2Float::RoundTowardsZero()
-{
-	return PS2Float((u32)std::trunc((double)raw));
-}
-
 s32 PS2Float::CompareTo(PS2Float other)
 {
 	s32 selfTwoComplementVal = (s32)Abs();
@@ -486,47 +505,24 @@ PS2Float PS2Float::DoDiv(PS2Float other)
 	u32 resMantissa = (u32)(selfMantissa64 / otherMantissa);
 
 	if ((resMantissa & 0x3F) == 0)
-		resMantissa |= ((u64)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0;
-
-	resMantissa = (resMantissa + 0x40U) >> 7;
-
-	if (resMantissa > 0)
-	{
-		s32 leadingBitPosition = Common::GetMostSignificantBitPosition(resMantissa);
-
-		while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS)
-		{
-			if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS)
-			{
-				resMantissa >>= 1;
+		resMantissa |= ((u64)otherMantissa * resMantissa != selfMantissa64) ? 1U : 0;
 
-				s32 exp = resExponent + 1;
+	FPRoundMode roundingMode = EmuConfig.Cpu.FPUDivFPCR.GetRoundMode();
 
-				if (exp > 255)
-					return sign ? Min() : Max();
+	bool roundNearEven = roundingMode == FPRoundMode::Nearest;
+	u32 roundIncrement = (!roundNearEven) ? ((roundingMode == (sign ? FPRoundMode::NegativeInfinity : FPRoundMode::PositiveInfinity)) ? 0x7FU : 0) : 0x40U;
+	u32 roundBits = resMantissa & 0x7F;
 
-				resExponent = exp;
-
-				leadingBitPosition--;
-			}
-			else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS)
-			{
-				resMantissa <<= 1;
-
-				s32 exp = resExponent - 1;
-
-				if (exp <= 0)
-					return PS2Float(sign, 0, 0);
+	if (0x80000000 <= resMantissa + roundIncrement)
+		return sign ? Min() : Max();
 
-				resExponent = exp;
+	resMantissa = (resMantissa + roundIncrement) >> 7;
 
-				leadingBitPosition++;
-			}
-		}
-	}
+	resMantissa &= ~(((roundBits ^ 0x40) == 0 & roundNearEven) ? 1U : 0U);
+	if (resMantissa == 0)
+		resExponent = 0;
 
-	resMantissa &= 0x7FFFFF;
-	return PS2Float(sign, (u8)resExponent, resMantissa).RoundTowardsZero();
+	return PS2Float(sign, (u8)resExponent, resMantissa);
 }
 
 PS2Float PS2Float::SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add)

diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h
@@ -43,8 +43,12 @@ class PS2Float
     constexpr u8 Exponent() const { return (raw >> 23) & 0xFF; }
     constexpr bool Sign() const { return ((raw >> 31) & 1) != 0; }
 
+    PS2Float(s32 value);
+
     PS2Float(u32 value);
 
+    PS2Float(float value);
+
     PS2Float(bool sign, u8 exponent, u32 mantissa);
 
     static PS2Float Max();
@@ -55,6 +59,16 @@ class PS2Float
 
     static PS2Float MinOne();
 
+    static PS2Float SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add);
+
+	static PS2Float SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul);
+
+	static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add);
+
+	static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b);
+
+	static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b);
+
     PS2Float Add(PS2Float addend);
 
     PS2Float Sub(PS2Float subtrahend);
@@ -67,6 +81,8 @@ class PS2Float
 
     PS2Float Rsqrt(PS2Float other);
 
+    PS2Float Pow(s32 exponent);
+
     bool IsDenormalized();
 
     bool IsAbnormal();
@@ -77,8 +93,6 @@ class PS2Float
 
     PS2Float Negate();
 
-    PS2Float RoundTowardsZero();
-
     s32 CompareTo(PS2Float other);
 
     s32 CompareOperand(PS2Float other);
@@ -97,16 +111,6 @@ class PS2Float
 
     PS2Float DoDiv(PS2Float other);
 
-    static PS2Float SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add);
-
-    static PS2Float SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul);
-
-    static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add);
-
-    static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b);
-
-    static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b);
-
     static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b);
 
     static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b);

diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp
@@ -2670,48 +2670,67 @@ static __ri void _vuERLENG(VURegs* VU)
 }
 
 
-static __ri float _vuCalculateEATAN(u32 inputvalue) {
-
-	float fvalue = vuDouble(inputvalue);
-
+static __ri u32 _vuCalculateEATAN(VURegs* VU, u32 inputvalue)
+{
 	float eatanconst[9] = { 0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f,
 							0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f,
 							0.785398185253143f };
 
-	float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7))
-		+ (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15));
+	if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0))
+	{
+		PS2Float p = PS2Float(inputvalue);
+
+		return PS2Float(eatanconst[0]).Mul(p)
+			.Add(PS2Float(eatanconst[1]).Mul(p.Pow(3)))
+			.Add(PS2Float(eatanconst[2]).Mul(p.Pow(5)))
+			.Add(PS2Float(eatanconst[3]).Mul(p.Pow(7)))
+			.Add(PS2Float(eatanconst[4]).Mul(p.Pow(9)))
+			.Add(PS2Float(eatanconst[5]).Mul(p.Pow(11)))
+			.Add(PS2Float(eatanconst[6]).Mul(p.Pow(13)))
+			.Add(PS2Float(eatanconst[7]).Mul(p.Pow(15)))
+			.Add(PS2Float(eatanconst[8])).raw;
+	}
+	else
+	{
+		float fvalue = vuDouble(inputvalue);
+
+		float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7)) + (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15));
 
-	result += eatanconst[8];
+		result += eatanconst[8];
 
-	result = vuDouble(*(u32*)&result);
+		result = vuDouble(*(u32*)&result);
 
-	return result;
+		return std::bit_cast<u32>(result);
+	}
 }
 
 static __ri void _vuEATAN(VURegs* VU)
 {
-	float p = _vuCalculateEATAN(VU->VF[_Fs_].UL[_Fsf_]);
-	VU->p.F = p;
+	VU->p.UL = _vuCalculateEATAN(VU, VU->VF[_Fs_].UL[_Fsf_]);
 }
 
 static __ri void _vuEATANxy(VURegs* VU)
 {
-	float p = 0;
 	if (!PS2Float(VU->VF[_Fs_].i.x).IsZero())
 	{
-		p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x));
+		VU->p.UL = _vuCalculateEATAN(VU, vuAccurateDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x));
+	}
+	else
+	{
+		VU->p.UL = PS2Float(0).raw;
 	}
-	VU->p.F = p;
 }
 
 static __ri void _vuEATANxz(VURegs* VU)
 {
-	float p = 0;
 	if (!PS2Float(VU->VF[_Fs_].i.x).IsZero())
 	{
-		p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x));
+		VU->p.UL = _vuCalculateEATAN(VU, vuAccurateDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x));
+	}
+	else
+	{
+		VU->p.UL = PS2Float(0).raw;
 	}
-	VU->p.F = p;
 }
 
 static __ri void _vuESUM(VURegs* VU)
@@ -2810,24 +2829,51 @@ static __ri void _vuERSQRT(VURegs* VU)
 static __ri void _vuESIN(VURegs* VU)
 {
 	float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f};
-	float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]);
 
-	p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9));
-	VU->p.F = vuDouble(*(u32*)&p);
+	if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0))
+	{
+		PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]);
+
+		VU->p.UL = PS2Float(sinconsts[0]).Mul(p).Add(PS2Float(sinconsts[1]).Mul(p.Pow(3))).Add(PS2Float(sinconsts[2]).Mul(p.Pow(5))).Add(PS2Float(sinconsts[3]).Mul(p.Pow(7))).Add(PS2Float(sinconsts[4]).Mul(p.Pow(9))).raw;
+	}
+	else
+	{
+		float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]);
+
+		p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9));
+		VU->p.F = vuDouble(*(u32*)&p);
+	}
 }
 
 static __ri void _vuEEXP(VURegs* VU)
 {
 	float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f,
 						0.000171562001924f, 0.000005430199963f, 0.000000690600018f};
-	float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]);
 
-	p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6));
-	p = pow(p, 4);
-	p = vuDouble(*(u32*)&p);
-	p = 1 / p;
+	if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0))
+	{
+		PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]);
+
+		VU->p.UL = PS2Float::One().Div(PS2Float::One()
+			.Add(PS2Float(consts[0]).Mul(p))
+			.Add(PS2Float(consts[1]).Mul(p.Pow(2)))
+			.Add(PS2Float(consts[2]).Mul(p.Pow(3)))
+			.Add(PS2Float(consts[3]).Mul(p.Pow(4)))
+			.Add(PS2Float(consts[4]).Mul(p.Pow(5)))
+			.Add(PS2Float(consts[5]).Mul(p.Pow(6)))
+			.Pow(4)).raw;
+	}
+	else
+	{
+		float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]);
 
-	VU->p.F = p;
+		p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6));
+		p = pow(p, 4);
+		p = vuDouble(*(u32*)&p);
+		p = 1 / p;
+
+		VU->p.F = p;
+	}
 }
 
 static __ri void _vuXITOP(VURegs* VU)