diff --git a/include/universal/native/error_free_ops.hpp b/include/universal/native/error_free_ops.hpp index a3a0a17f9..2202f5d26 100644 --- a/include/universal/native/error_free_ops.hpp +++ b/include/universal/native/error_free_ops.hpp @@ -46,10 +46,8 @@ namespace sw { namespace universal { /// input /// reference to the residual /// the sum s - template - inline double quick_two_sum(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r) - { - volatile NativeFloat s = a + b; + inline double quick_two_sum(double a, double b, volatile double& r) { + volatile double s = a + b; r = (std::isfinite(s) ? b - (s - a) : 0.0); return s; } @@ -61,9 +59,7 @@ namespace sw { namespace universal { /// input /// reference to the residual /// the sum s - template - inline double two_sum(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r) - { + inline double two_sum(double a, double b, volatile double& r) { volatile double s = a + b; if (std::isfinite(s)) { volatile double bb = s - a; @@ -87,8 +83,7 @@ namespace sw { namespace universal { /// input /// reference to the residual /// the sum s - template - inline double quick_two_diff(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r) { + inline double quick_two_diff(double a, double b, volatile double& r) { volatile double s = a - b; r = (std::isfinite(s) ? (a - s) - b : 0.0); return s; @@ -103,8 +98,7 @@ namespace sw { namespace universal { /// input /// reference to the residual /// the difference s - template - inline NativeFloat two_diff(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r) { + inline double two_diff(double a, double b, volatile double& r) { volatile double s = a - b; if (std::isfinite(s)) { volatile double bb = s - a; @@ -124,8 +118,7 @@ namespace sw { namespace universal { /// input /// input /// input value, output residual - inline void three_sum(volatile double& a, volatile double& b, volatile double& c) - { + inline void three_sum(volatile double& a, volatile double& b, volatile double& c) { volatile double t1, t2, t3; t1 = two_sum(a, b, t2); @@ -138,8 +131,7 @@ namespace sw { namespace universal { #if !defined( QD_FMS ) /* Computes high word and lo word of a */ - inline void split(volatile double a, volatile double& hi, volatile double& lo) - { + inline void split(double a, volatile double& hi, volatile double& lo) { int const QD_BITS = (std::numeric_limits< double >::digits + 1) / 2; static double const QD_SPLITTER = std::ldexp(1.0, QD_BITS) + 1.0; static double const QD_SPLIT_THRESHOLD = std::ldexp((std::numeric_limits< double >::max)(), -QD_BITS - 1); @@ -173,7 +165,7 @@ namespace sw { namespace universal { /// input /// reference to the residual /// the product of a * b - inline double two_prod(volatile double a, volatile double b, volatile double& r) + inline double two_prod(double a, double b, volatile double& r) { volatile double p = a * b; if (std::isfinite(p)) { @@ -198,8 +190,7 @@ namespace sw { namespace universal { /// input /// reference to the residual /// the square product of a - inline double two_sqr(double a, double& r) - { + inline double two_sqr(double a, volatile double& r) { volatile double p = a * a; if (std::isfinite(p)) { diff --git a/include/universal/native/integers.hpp b/include/universal/native/integers.hpp index eb68fefc1..fecabe858 100644 --- a/include/universal/native/integers.hpp +++ b/include/universal/native/integers.hpp @@ -146,7 +146,7 @@ inline int64_t fastipow(int64_t base, uint8_t exp) { template::value, Integer >::type > -inline std::string to_binary(const Integer& number, int nbits = 0, bool bNibbleMarker = true) { +inline std::string to_binary(const Integer& number, bool bNibbleMarker = true, int nbits = 0) { std::stringstream s; if (nbits == 0) nbits = 8*sizeof(number); s << "0b"; diff --git a/include/universal/number/dd/dd_impl.hpp b/include/universal/number/dd/dd_impl.hpp index 162246a24..dea0a3cbf 100644 --- a/include/universal/number/dd/dd_impl.hpp +++ b/include/universal/number/dd/dd_impl.hpp @@ -103,7 +103,6 @@ class dd { constexpr dd(unsigned long long iv) noexcept { *this = iv; } constexpr dd(float iv) noexcept : hi{ iv }, lo{ 0.0 } {} constexpr dd(double iv) noexcept : hi{ iv }, lo{ 0.0 } {} - dd(long double iv) noexcept { *this = iv; } // assignment operators for native types constexpr dd& operator=(signed char rhs) noexcept { return convert_signed(rhs); } @@ -118,7 +117,24 @@ class dd { constexpr dd& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); } constexpr dd& operator=(float rhs) noexcept { return convert_ieee754(rhs); } constexpr dd& operator=(double rhs) noexcept { return convert_ieee754(rhs); } - dd& operator=(long double rhs) noexcept { return convert_ieee754(rhs); } + + // conversion operators + explicit operator int() const noexcept { return convert_to_signed(); } + explicit operator long() const noexcept { return convert_to_signed(); } + explicit operator long long() const noexcept { return convert_to_signed(); } + explicit operator unsigned int() const noexcept { return convert_to_unsigned(); } + explicit operator unsigned long() const noexcept { return convert_to_unsigned(); } + explicit operator unsigned long long() const noexcept { return convert_to_unsigned(); } + explicit operator float() const noexcept { return convert_to_ieee754(); } + explicit operator double() const noexcept { return convert_to_ieee754(); } + + +#if LONG_DOUBLE_SUPPORT + // can't be constexpr as remainder calculation requires volatile designation + dd(long double iv) noexcept { *this = iv; } + dd& operator=(long double rhs) noexcept { return convert_ieee754(rhs); } + explicit operator long double() const noexcept { return convert_to_ieee754(); } +#endif // prefix operators constexpr dd operator-() const noexcept { @@ -128,10 +144,6 @@ class dd { return negated; } - // conversion operators - explicit operator float() const { return toNativeFloatingPoint(); } - explicit operator double() const { return toNativeFloatingPoint(); } - explicit operator long double() const { return toNativeFloatingPoint(); } // arithmetic operators dd& operator+=(const dd& rhs) { @@ -449,12 +461,7 @@ class dd { // if (fixed && (precision == 0) && (abs(*this) < 1.0)) { // if (abs(*this) >= 0.5) if (fixed && (precision == 0) && (std::abs(high()) < 1.0)) { - if (std::abs(high()) >= 0.5) - - s += '1'; - else - s += '0'; - + s += (std::abs(hi) >= 0.5) ? '1' : '0'; return s; } @@ -467,8 +474,7 @@ class dd { } } else { // default - - char* t; // = new char[d+1]; + char* t; if (fixed) { t = new char[d_with_extra + 1]; @@ -540,13 +546,13 @@ class dd { if (!fixed && !isinf()) { - /* Fill in exponent part */ + // construct the exponent s += uppercase ? 'E' : 'e'; append_expn(s, e); } } - /* Fill in the blanks */ + // Fill int len = s.length(); if (len < width) { int delta = static_cast(width) - len; @@ -583,13 +589,7 @@ class dd { // HELPER methods - // convert to native floating-point, use conversion rules to cast down to float and double - template - NativeFloat toNativeFloatingPoint() const { - return NativeFloat(hi + lo); - } - - constexpr dd& convert_signed(int64_t v) { + constexpr dd& convert_signed(int64_t v) noexcept { if (0 == v) { setzero(); } @@ -600,7 +600,7 @@ class dd { return *this; } - constexpr dd& convert_unsigned(uint64_t v) { + constexpr dd& convert_unsigned(uint64_t v) noexcept { if (0 == v) { setzero(); } @@ -612,16 +612,17 @@ class dd { } // no need to SFINAE this as it is an internal method that we ONLY call when we know the argument type is a native float - constexpr dd& convert_ieee754(float rhs) { + constexpr dd& convert_ieee754(float rhs) noexcept { hi = double(rhs); lo = 0.0; return *this; } - constexpr dd& convert_ieee754(double rhs) { + constexpr dd& convert_ieee754(double rhs) noexcept { hi = rhs; lo = 0.0; return *this; } +#if LONG_DOUBLE_SUPPORT dd& convert_ieee754(long double rhs) { volatile long double truncated = static_cast(double(rhs)); volatile double remainder = static_cast(rhs - truncated); @@ -629,6 +630,31 @@ class dd { lo = remainder; return *this; } +#endif + + // convert to native unsigned integer, use C++ conversion rules to cast down to float and double + template + Unsigned convert_to_unsigned() const noexcept { + uint64_t h = hi; + uint64_t l = lo; // TBD: lo could be negative + return Unsigned(h + l); + } + + // convert to native unsigned integer, use C++ conversion rules to cast down to float and double + template + Signed convert_to_signed() const noexcept { + int64_t h = hi; + int64_t l = lo; + return Signed(h + l); + } + + // convert to native floating-point, use C++ conversion rules to cast down to float and double + template + Real convert_to_ieee754() const noexcept { + return Real(hi + lo); + } + + void to_digits(char* s, int& expn, int precision) const { int D = precision + 1; // number of digits to compute diff --git a/static/dd/api/api.cpp b/static/dd/api/api.cpp index 17b32b48e..421bedf61 100644 --- a/static/dd/api/api.cpp +++ b/static/dd/api/api.cpp @@ -1,4 +1,4 @@ -// api.cpp: application programming interface tests for doubledouble number system +// api.cpp: application programming interface tests for double-double (dd) number system // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT diff --git a/static/dd/conversion/conversion.cpp b/static/dd/conversion/conversion.cpp index 88c2bfe95..0e647460c 100644 --- a/static/dd/conversion/conversion.cpp +++ b/static/dd/conversion/conversion.cpp @@ -1,4 +1,4 @@ -// conversion.cpp: test suite runner for conversion operators for doubledouble +// conversion.cpp: test suite runner for conversion operators for double-double (dd) floating-point // // Copyright (C) 2017 Stillwater Supercomputing, Inc. // SPDX-License-Identifier: MIT @@ -41,6 +41,34 @@ try { #if MANUAL_TESTING + uint64_t u64; + int64_t i64; + + // check if we get all the bit of a 64-bit int + u64 = 0xFFFF'FFFF'FFFF'FFFFull; + i64 = 0x7FFF'FFFF'FFFF'FFFFll; + + { + std::cout << to_binary(u64, false, 64) << " : " << u64 << '\n'; + u64 = -1; + std::cout << to_binary(u64, false, 64) << " : " << u64 << '\n'; + uint64_t v{ u64 }; + double hi = static_cast(v); + uint64_t h = static_cast(hi); + std::cout << std::fixed << hi << '\n'; + std::cout << to_binary(h) << '\n'; + double lo = static_cast(v - h); // difference is always positive + + dd a(u64); + ReportValue(a, "0xFFFF'FFFF'FFFF'FFFF", 35, 32); + std::cout << to_pair(a) << '\n'; + uint64_t i = uint64_t(a); + ReportValue(i, "0xFFFF'FFFF'FFFF'FFFF", 35, 32); + } + { + dd a(i64); + ReportValue(a, "0x7FFF'FFFF'FFFF'FFFF", 35, 32); + } ReportTestSuiteResults(test_suite, nrOfFailedTestCases); return EXIT_SUCCESS; // ignore failures