diff --git a/include/universal/native/error_free_ops.hpp b/include/universal/native/error_free_ops.hpp
index a3a0a17f9..2202f5d26 100644
--- a/include/universal/native/error_free_ops.hpp
+++ b/include/universal/native/error_free_ops.hpp
@@ -46,10 +46,8 @@ namespace sw { namespace universal {
/// input
/// reference to the residual
/// the sum s
- template
- inline double quick_two_sum(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r)
- {
- volatile NativeFloat s = a + b;
+ inline double quick_two_sum(double a, double b, volatile double& r) {
+ volatile double s = a + b;
r = (std::isfinite(s) ? b - (s - a) : 0.0);
return s;
}
@@ -61,9 +59,7 @@ namespace sw { namespace universal {
/// input
/// reference to the residual
/// the sum s
- template
- inline double two_sum(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r)
- {
+ inline double two_sum(double a, double b, volatile double& r) {
volatile double s = a + b;
if (std::isfinite(s)) {
volatile double bb = s - a;
@@ -87,8 +83,7 @@ namespace sw { namespace universal {
/// input
/// reference to the residual
/// the sum s
- template
- inline double quick_two_diff(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r) {
+ inline double quick_two_diff(double a, double b, volatile double& r) {
volatile double s = a - b;
r = (std::isfinite(s) ? (a - s) - b : 0.0);
return s;
@@ -103,8 +98,7 @@ namespace sw { namespace universal {
/// input
/// reference to the residual
/// the difference s
- template
- inline NativeFloat two_diff(volatile NativeFloat a, volatile NativeFloat b, volatile NativeFloat& r) {
+ inline double two_diff(double a, double b, volatile double& r) {
volatile double s = a - b;
if (std::isfinite(s)) {
volatile double bb = s - a;
@@ -124,8 +118,7 @@ namespace sw { namespace universal {
/// input
/// input
/// input value, output residual
- inline void three_sum(volatile double& a, volatile double& b, volatile double& c)
- {
+ inline void three_sum(volatile double& a, volatile double& b, volatile double& c) {
volatile double t1, t2, t3;
t1 = two_sum(a, b, t2);
@@ -138,8 +131,7 @@ namespace sw { namespace universal {
#if !defined( QD_FMS )
/* Computes high word and lo word of a */
- inline void split(volatile double a, volatile double& hi, volatile double& lo)
- {
+ inline void split(double a, volatile double& hi, volatile double& lo) {
int const QD_BITS = (std::numeric_limits< double >::digits + 1) / 2;
static double const QD_SPLITTER = std::ldexp(1.0, QD_BITS) + 1.0;
static double const QD_SPLIT_THRESHOLD = std::ldexp((std::numeric_limits< double >::max)(), -QD_BITS - 1);
@@ -173,7 +165,7 @@ namespace sw { namespace universal {
/// input
/// reference to the residual
/// the product of a * b
- inline double two_prod(volatile double a, volatile double b, volatile double& r)
+ inline double two_prod(double a, double b, volatile double& r)
{
volatile double p = a * b;
if (std::isfinite(p)) {
@@ -198,8 +190,7 @@ namespace sw { namespace universal {
/// input
/// reference to the residual
/// the square product of a
- inline double two_sqr(double a, double& r)
- {
+ inline double two_sqr(double a, volatile double& r) {
volatile double p = a * a;
if (std::isfinite(p))
{
diff --git a/include/universal/native/integers.hpp b/include/universal/native/integers.hpp
index eb68fefc1..fecabe858 100644
--- a/include/universal/native/integers.hpp
+++ b/include/universal/native/integers.hpp
@@ -146,7 +146,7 @@ inline int64_t fastipow(int64_t base, uint8_t exp) {
template::value, Integer >::type
>
-inline std::string to_binary(const Integer& number, int nbits = 0, bool bNibbleMarker = true) {
+inline std::string to_binary(const Integer& number, bool bNibbleMarker = true, int nbits = 0) {
std::stringstream s;
if (nbits == 0) nbits = 8*sizeof(number);
s << "0b";
diff --git a/include/universal/number/dd/dd_impl.hpp b/include/universal/number/dd/dd_impl.hpp
index 162246a24..dea0a3cbf 100644
--- a/include/universal/number/dd/dd_impl.hpp
+++ b/include/universal/number/dd/dd_impl.hpp
@@ -103,7 +103,6 @@ class dd {
constexpr dd(unsigned long long iv) noexcept { *this = iv; }
constexpr dd(float iv) noexcept : hi{ iv }, lo{ 0.0 } {}
constexpr dd(double iv) noexcept : hi{ iv }, lo{ 0.0 } {}
- dd(long double iv) noexcept { *this = iv; }
// assignment operators for native types
constexpr dd& operator=(signed char rhs) noexcept { return convert_signed(rhs); }
@@ -118,7 +117,24 @@ class dd {
constexpr dd& operator=(unsigned long long rhs) noexcept { return convert_unsigned(rhs); }
constexpr dd& operator=(float rhs) noexcept { return convert_ieee754(rhs); }
constexpr dd& operator=(double rhs) noexcept { return convert_ieee754(rhs); }
- dd& operator=(long double rhs) noexcept { return convert_ieee754(rhs); }
+
+ // conversion operators
+ explicit operator int() const noexcept { return convert_to_signed(); }
+ explicit operator long() const noexcept { return convert_to_signed(); }
+ explicit operator long long() const noexcept { return convert_to_signed(); }
+ explicit operator unsigned int() const noexcept { return convert_to_unsigned(); }
+ explicit operator unsigned long() const noexcept { return convert_to_unsigned(); }
+ explicit operator unsigned long long() const noexcept { return convert_to_unsigned(); }
+ explicit operator float() const noexcept { return convert_to_ieee754(); }
+ explicit operator double() const noexcept { return convert_to_ieee754(); }
+
+
+#if LONG_DOUBLE_SUPPORT
+ // can't be constexpr as remainder calculation requires volatile designation
+ dd(long double iv) noexcept { *this = iv; }
+ dd& operator=(long double rhs) noexcept { return convert_ieee754(rhs); }
+ explicit operator long double() const noexcept { return convert_to_ieee754(); }
+#endif
// prefix operators
constexpr dd operator-() const noexcept {
@@ -128,10 +144,6 @@ class dd {
return negated;
}
- // conversion operators
- explicit operator float() const { return toNativeFloatingPoint(); }
- explicit operator double() const { return toNativeFloatingPoint(); }
- explicit operator long double() const { return toNativeFloatingPoint(); }
// arithmetic operators
dd& operator+=(const dd& rhs) {
@@ -449,12 +461,7 @@ class dd {
// if (fixed && (precision == 0) && (abs(*this) < 1.0)) {
// if (abs(*this) >= 0.5)
if (fixed && (precision == 0) && (std::abs(high()) < 1.0)) {
- if (std::abs(high()) >= 0.5)
-
- s += '1';
- else
- s += '0';
-
+ s += (std::abs(hi) >= 0.5) ? '1' : '0';
return s;
}
@@ -467,8 +474,7 @@ class dd {
}
}
else { // default
-
- char* t; // = new char[d+1];
+ char* t;
if (fixed) {
t = new char[d_with_extra + 1];
@@ -540,13 +546,13 @@ class dd {
if (!fixed && !isinf()) {
- /* Fill in exponent part */
+ // construct the exponent
s += uppercase ? 'E' : 'e';
append_expn(s, e);
}
}
- /* Fill in the blanks */
+ // Fill
int len = s.length();
if (len < width) {
int delta = static_cast(width) - len;
@@ -583,13 +589,7 @@ class dd {
// HELPER methods
- // convert to native floating-point, use conversion rules to cast down to float and double
- template
- NativeFloat toNativeFloatingPoint() const {
- return NativeFloat(hi + lo);
- }
-
- constexpr dd& convert_signed(int64_t v) {
+ constexpr dd& convert_signed(int64_t v) noexcept {
if (0 == v) {
setzero();
}
@@ -600,7 +600,7 @@ class dd {
return *this;
}
- constexpr dd& convert_unsigned(uint64_t v) {
+ constexpr dd& convert_unsigned(uint64_t v) noexcept {
if (0 == v) {
setzero();
}
@@ -612,16 +612,17 @@ class dd {
}
// no need to SFINAE this as it is an internal method that we ONLY call when we know the argument type is a native float
- constexpr dd& convert_ieee754(float rhs) {
+ constexpr dd& convert_ieee754(float rhs) noexcept {
hi = double(rhs);
lo = 0.0;
return *this;
}
- constexpr dd& convert_ieee754(double rhs) {
+ constexpr dd& convert_ieee754(double rhs) noexcept {
hi = rhs;
lo = 0.0;
return *this;
}
+#if LONG_DOUBLE_SUPPORT
dd& convert_ieee754(long double rhs) {
volatile long double truncated = static_cast(double(rhs));
volatile double remainder = static_cast(rhs - truncated);
@@ -629,6 +630,31 @@ class dd {
lo = remainder;
return *this;
}
+#endif
+
+ // convert to native unsigned integer, use C++ conversion rules to cast down to float and double
+ template
+ Unsigned convert_to_unsigned() const noexcept {
+ uint64_t h = hi;
+ uint64_t l = lo; // TBD: lo could be negative
+ return Unsigned(h + l);
+ }
+
+ // convert to native unsigned integer, use C++ conversion rules to cast down to float and double
+ template
+ Signed convert_to_signed() const noexcept {
+ int64_t h = hi;
+ int64_t l = lo;
+ return Signed(h + l);
+ }
+
+ // convert to native floating-point, use C++ conversion rules to cast down to float and double
+ template
+ Real convert_to_ieee754() const noexcept {
+ return Real(hi + lo);
+ }
+
+
void to_digits(char* s, int& expn, int precision) const {
int D = precision + 1; // number of digits to compute
diff --git a/static/dd/api/api.cpp b/static/dd/api/api.cpp
index 17b32b48e..421bedf61 100644
--- a/static/dd/api/api.cpp
+++ b/static/dd/api/api.cpp
@@ -1,4 +1,4 @@
-// api.cpp: application programming interface tests for doubledouble number system
+// api.cpp: application programming interface tests for double-double (dd) number system
//
// Copyright (C) 2017 Stillwater Supercomputing, Inc.
// SPDX-License-Identifier: MIT
diff --git a/static/dd/conversion/conversion.cpp b/static/dd/conversion/conversion.cpp
index 88c2bfe95..0e647460c 100644
--- a/static/dd/conversion/conversion.cpp
+++ b/static/dd/conversion/conversion.cpp
@@ -1,4 +1,4 @@
-// conversion.cpp: test suite runner for conversion operators for doubledouble
+// conversion.cpp: test suite runner for conversion operators for double-double (dd) floating-point
//
// Copyright (C) 2017 Stillwater Supercomputing, Inc.
// SPDX-License-Identifier: MIT
@@ -41,6 +41,34 @@ try {
#if MANUAL_TESTING
+ uint64_t u64;
+ int64_t i64;
+
+ // check if we get all the bit of a 64-bit int
+ u64 = 0xFFFF'FFFF'FFFF'FFFFull;
+ i64 = 0x7FFF'FFFF'FFFF'FFFFll;
+
+ {
+ std::cout << to_binary(u64, false, 64) << " : " << u64 << '\n';
+ u64 = -1;
+ std::cout << to_binary(u64, false, 64) << " : " << u64 << '\n';
+ uint64_t v{ u64 };
+ double hi = static_cast(v);
+ uint64_t h = static_cast(hi);
+ std::cout << std::fixed << hi << '\n';
+ std::cout << to_binary(h) << '\n';
+ double lo = static_cast(v - h); // difference is always positive
+
+ dd a(u64);
+ ReportValue(a, "0xFFFF'FFFF'FFFF'FFFF", 35, 32);
+ std::cout << to_pair(a) << '\n';
+ uint64_t i = uint64_t(a);
+ ReportValue(i, "0xFFFF'FFFF'FFFF'FFFF", 35, 32);
+ }
+ {
+ dd a(i64);
+ ReportValue(a, "0x7FFF'FFFF'FFFF'FFFF", 35, 32);
+ }
ReportTestSuiteResults(test_suite, nrOfFailedTestCases);
return EXIT_SUCCESS; // ignore failures