diff --git a/icicle/backend/cpu/src/field/cpu_vec_ops.cpp b/icicle/backend/cpu/src/field/cpu_vec_ops.cpp index 22c257023..09e3d8095 100644 --- a/icicle/backend/cpu/src/field/cpu_vec_ops.cpp +++ b/icicle/backend/cpu/src/field/cpu_vec_ops.cpp @@ -803,12 +803,19 @@ REGISTER_SLICE_EXT_FIELD_BACKEND("CPU", cpu_slice); /*********************************** Highest non-zero idx ***********************************/ template -eIcicleError cpu_highest_non_zero_idx( - const Device& device, const T* input, uint64_t size, const VecOpsConfig& config, int64_t* out_idx /*OUT*/) +eIcicleError cpu_highest_non_zero_idx_internal( + const Device& device, + const T* input, + uint64_t size, + const VecOpsConfig& config, + int64_t* out_idx /*OUT*/, + int32_t idx_in_batch_to_calc) { ICICLE_ASSERT(input && out_idx && size != 0) << "Error: Invalid argument"; uint64_t stride = config.columns_batch ? config.batch_size : 1; - for (uint64_t idx_in_batch = 0; idx_in_batch < config.batch_size; ++idx_in_batch) { + uint32_t start_idx = (idx_in_batch_to_calc == -1) ? 0 : idx_in_batch_to_calc; + uint32_t end_idx = (idx_in_batch_to_calc == -1) ? config.batch_size : idx_in_batch_to_calc + 1; + for (uint64_t idx_in_batch = start_idx; idx_in_batch < end_idx; ++idx_in_batch) { out_idx[idx_in_batch] = -1; // zero vector is considered '-1' since 0 would be zero in vec[0] const T* curr_input = config.columns_batch ? input + idx_in_batch : input + idx_in_batch * size; // Pointer to the current vector @@ -822,6 +829,13 @@ eIcicleError cpu_highest_non_zero_idx( return eIcicleError::SUCCESS; } +template +eIcicleError cpu_highest_non_zero_idx( + const Device& device, const T* input, uint64_t size, const VecOpsConfig& config, int64_t* out_idx /*OUT*/) +{ + return cpu_highest_non_zero_idx_internal(device, input, size, config, out_idx, -1); +} + REGISTER_HIGHEST_NON_ZERO_IDX_BACKEND("CPU", cpu_highest_non_zero_idx); /*********************************** Polynomial evaluation ***********************************/ @@ -887,13 +901,24 @@ eIcicleError cpu_poly_divide( T* r_out /*OUT*/, uint64_t r_size) { - if (config.batch_size != 1 && config.columns_batch) { - ICICLE_LOG_ERROR << "polynomial division is not implemented for column batch. Planned for v3.2"; - return eIcicleError::API_NOT_IMPLEMENTED; - } - uint32_t stride = config.columns_batch ? config.batch_size : 1; + auto numerator_deg = std::make_unique(config.batch_size); + auto denominator_deg = std::make_unique(config.batch_size); + auto deg_r = std::make_unique(config.batch_size); + cpu_highest_non_zero_idx(device, numerator, numerator_size, config, numerator_deg.get()); + cpu_highest_non_zero_idx(device, denominator, denominator_size, config, denominator_deg.get()); + memset(r_out, 0, sizeof(T) * (r_size * config.batch_size)); + memcpy(r_out, numerator, sizeof(T) * (numerator_size * config.batch_size)); + for (uint64_t idx_in_batch = 0; idx_in_batch < config.batch_size; ++idx_in_batch) { + ICICLE_ASSERT(r_size >= numerator_deg[idx_in_batch] + 1) + << "polynomial division expects r(x) size to be similar to numerator size and higher than numerator " + "degree(x).\nr_size = " + << r_size << ", numerator_deg[" << idx_in_batch << "] = " << numerator_deg[idx_in_batch]; + ICICLE_ASSERT(q_size >= (numerator_deg[idx_in_batch] - denominator_deg[idx_in_batch] + 1)) + << "polynomial division expects q(x) size to be at least deg(numerator)-deg(denominator)+1.\nq_size = " << q_size + << ", numerator_deg[" << idx_in_batch << "] = " << numerator_deg[idx_in_batch] << ", denominator_deg[" + << idx_in_batch << "] = " << denominator_deg[idx_in_batch]; const T* curr_numerator = config.columns_batch ? numerator + idx_in_batch : numerator + idx_in_batch * numerator_size; const T* curr_denominator = @@ -901,25 +926,15 @@ eIcicleError cpu_poly_divide( T* curr_q_out = config.columns_batch ? q_out + idx_in_batch : q_out + idx_in_batch * q_size; T* curr_r_out = config.columns_batch ? r_out + idx_in_batch : r_out + idx_in_batch * r_size; - int64_t numerator_deg, denominator_deg; - cpu_highest_non_zero_idx(device, curr_numerator, numerator_size, default_vec_ops_config(), &numerator_deg); - cpu_highest_non_zero_idx(device, curr_denominator, denominator_size, default_vec_ops_config(), &denominator_deg); - ICICLE_ASSERT(r_size >= numerator_deg + 1) - << "polynomial division expects r(x) size to be similar to numerator size and higher than numerator degree(x)"; - ICICLE_ASSERT(q_size >= (numerator_deg - denominator_deg + 1)) - << "polynomial division expects q(x) size to be at least deg(numerator)-deg(denominator)+1"; - - memset(curr_r_out, 0, sizeof(T) * r_size); - memcpy(curr_r_out, curr_numerator, sizeof(T) * (numerator_deg + 1)); - // invert largest coeff of b - const T& lc_b_inv = T::inverse(curr_denominator[denominator_deg * stride]); - int64_t deg_r = numerator_deg; - while (deg_r >= denominator_deg) { + const T& lc_b_inv = T::inverse(curr_denominator[denominator_deg[idx_in_batch] * stride]); + deg_r[idx_in_batch] = numerator_deg[idx_in_batch]; + while (deg_r[idx_in_batch] >= denominator_deg[idx_in_batch]) { // each iteration is removing the largest monomial in r until deg(r)::random_samples(in_a.get(), total_size); // Reference implementation @@ -797,42 +763,53 @@ TEST_F(FieldApiTestBase, polynomialEval) TEST_F(FieldApiTestBase, polynomialDivision) { - const uint64_t numerator_size = 1 << 4; - const uint64_t denominator_size = 1 << 3; + int seed = time(0); + srand(seed); + const uint64_t numerator_size = 1 << (rand() % 3 + 5); + const uint64_t denominator_size = 1 << (rand() % 2 + 3); const uint64_t q_size = numerator_size - denominator_size + 1; const uint64_t r_size = numerator_size; const int batch_size = 10 + rand() % 10; // basically we compute q(x),r(x) for a(x)=q(x)b(x)+r(x) by dividing a(x)/b(x) - // randomize matrix with rows/cols as polynomials auto numerator = std::make_unique(numerator_size * batch_size); auto denominator = std::make_unique(denominator_size * batch_size); - scalar_t::rand_host_many(numerator.get(), numerator_size * batch_size); - scalar_t::rand_host_many(denominator.get(), denominator_size * batch_size); - - // Add padding to each row so that the degree is lower than the size - const int zero_pad_length = 5; - for (int i = 0; i < batch_size; ++i) { - for (int j = 0; j < zero_pad_length; ++j) { - numerator[i * numerator_size + numerator_size - zero_pad_length + j] = scalar_t::zero(); - denominator[i * denominator_size + denominator_size - zero_pad_length + j] = scalar_t::zero(); - } - } for (auto device : s_registered_devices) { ICICLE_CHECK(icicle_set_device(device)); for (int columns_batch = 0; columns_batch <= 1; columns_batch++) { - ICICLE_LOG_DEBUG << "testing polynomial division on device " << device << " [column_batch=" << columns_batch - << "]"; + ICICLE_LOG_INFO << "testing polynomial division on device " << device << " [column_batch=" << columns_batch + << "]"; + + // randomize matrix with rows/cols as polynomials + scalar_t::rand_host_many(numerator.get(), numerator_size * batch_size); + scalar_t::rand_host_many(denominator.get(), denominator_size * batch_size); + + // Add padding to each vector so that the degree is lower than the size + const int zero_pad_length = 1; + if (columns_batch) { + for (int i = 0; i < batch_size * zero_pad_length; i++) { + numerator[batch_size * numerator_size - batch_size * zero_pad_length + i] = scalar_t::zero(); + denominator[batch_size * denominator_size - batch_size * zero_pad_length + i] = scalar_t::zero(); + } + } else { + for (int i = 0; i < batch_size; ++i) { + for (int j = 0; j < zero_pad_length; ++j) { + numerator[i * numerator_size + numerator_size - zero_pad_length + j] = scalar_t::zero(); + denominator[i * denominator_size + denominator_size - zero_pad_length + j] = scalar_t::zero(); + } + } + } + auto q = std::make_unique(q_size * batch_size); auto r = std::make_unique(r_size * batch_size); auto config = default_vec_ops_config(); - config.batch_size = columns_batch ? batch_size - zero_pad_length : batch_size; // skip the zero cols + config.batch_size = batch_size; config.columns_batch = columns_batch; // TODO v3.2 support column batch for this API - if (columns_batch) { + if (columns_batch && device == "CUDA") { ICICLE_LOG_INFO << "Skipping polynomial division column batch"; continue; } @@ -853,7 +830,6 @@ TEST_F(FieldApiTestBase, polynomialDivision) polynomial_eval(r.get(), r_size, &rand_x, 1, config, rx.get()); for (int i = 0; i < config.batch_size; ++i) { - // ICICLE_LOG_DEBUG << "ax=" << ax[i] << ", bx=" << bx[i] << ", qx=" << qx[i] << ", rx=" << rx[i]; ASSERT_EQ(ax[i], qx[i] * bx[i] + rx[i]); } } diff --git a/icicle/tests/test_polynomial_api.cpp b/icicle/tests/test_polynomial_api.cpp index c859cefd4..124eee26e 100644 --- a/icicle/tests/test_polynomial_api.cpp +++ b/icicle/tests/test_polynomial_api.cpp @@ -47,12 +47,6 @@ class PolynomialTest : public ::testing::Test auto init_domain_config = default_ntt_init_domain_config(); ntt_init_domain(scalar_t::omega(MAX_NTT_LOG_SIZE), init_domain_config); } - - // choose random device for testing - srand(time(NULL)); - const int dev_idx = rand() % s_registered_devices.size(); - icicle_set_device(s_registered_devices.at(dev_idx)); - ICICLE_LOG_INFO << "setting device " << s_registered_devices.at(dev_idx) << " for polynomial tests"; } static void TearDownTestSuite() {} @@ -137,395 +131,446 @@ class PolynomialTest : public ::testing::Test TEST_F(PolynomialTest, evaluation) { - const scalar_t coeffs[3] = {one, two, three}; - auto f = Polynomial_t::from_coefficients(coeffs, 3); - scalar_t x = scalar_t::rand_host(); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const scalar_t coeffs[3] = {one, two, three}; + auto f = Polynomial_t::from_coefficients(coeffs, 3); + scalar_t x = scalar_t::rand_host(); - auto f_x = f(x); // evaluation + auto f_x = f(x); // evaluation - auto expected_f_x = one + two * x + three * x * x; + auto expected_f_x = one + two * x + three * x * x; - EXPECT_EQ(f_x, expected_f_x); + EXPECT_EQ(f_x, expected_f_x); + } } TEST_F(PolynomialTest, evaluationOnDomain) { - int size = 1 << 5; - auto f = PolynomialTest::randomize_polynomial(size); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + int size = 1 << 5; + auto f = PolynomialTest::randomize_polynomial(size); - size *= 2; // evaluating on a larger domain - scalar_t w; - ICICLE_CHECK(get_root_of_unity_from_domain((int)log2(size), &w)); + size *= 2; // evaluating on a larger domain + scalar_t w; + ICICLE_CHECK(get_root_of_unity_from_domain((int)log2(size), &w)); - // construct domain as rou - scalar_t x = one; - auto domain = std::make_unique(size); - for (int i = 0; i < size; ++i) { - domain[i] = x; - x = x * w; - } + // construct domain as rou + scalar_t x = one; + auto domain = std::make_unique(size); + for (int i = 0; i < size; ++i) { + domain[i] = x; + x = x * w; + } - // evaluate f on the domain (equivalent to NTT) - auto evaluations = std::make_unique(size); - f.evaluate_on_domain(domain.get(), size, evaluations.get()); + // evaluate f on the domain (equivalent to NTT) + auto evaluations = std::make_unique(size); + f.evaluate_on_domain(domain.get(), size, evaluations.get()); - // construct g from the evaluations of f - auto g = Polynomial_t::from_rou_evaluations(evaluations.get(), size); + // construct g from the evaluations of f + auto g = Polynomial_t::from_rou_evaluations(evaluations.get(), size); - // check that f==g - ASSERT_EQ((f - g).degree(), -1); + // check that f==g + ASSERT_EQ((f - g).degree(), -1); + } } TEST_F(PolynomialTest, evaluateOnRouDomain) { - const int logsize = 8; - const int size = 1 << logsize; - auto f = randomize_polynomial(size); - auto g = randomize_polynomial(size, true, true /*from_evals*/); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int logsize = 8; + const int size = 1 << logsize; + auto f = randomize_polynomial(size); + auto g = randomize_polynomial(size, true, true /*from_evals*/); + + // build domain + auto test = [&](auto& p, int domain_logsize) { + const int domain_size = 1 << domain_logsize; + + scalar_t w; + ICICLE_CHECK(get_root_of_unity_from_domain(domain_logsize, &w)); + + auto domain = std::make_unique(domain_size); + domain[0] = scalar_t::one(); + for (int i = 1; i < domain_size; ++i) { + domain[i] = domain[i - 1] * w; + } + + // evaluation on domain + auto evals_naive = std::make_unique(domain_size); + START_TIMER(naive_evals); + p.evaluate_on_domain(domain.get(), domain_size, evals_naive.get()); + END_TIMER(naive_evals, "naive evals took", MEASURE); + + // evaluate on rou domain + auto evals_rou_domain = std::make_unique(domain_size); + START_TIMER(rou_domain_evals); + p.evaluate_on_rou_domain(domain_logsize, evals_rou_domain.get()); + END_TIMER(rou_domain_evals, "evals on rou domain took", MEASURE); + + ASSERT_EQ(0, memcmp(evals_naive.get(), evals_rou_domain.get(), domain_size * sizeof(scalar_t))); + }; - // build domain - auto test = [&](auto& p, int domain_logsize) { - const int domain_size = 1 << domain_logsize; + // test f (in coeffs state) + test(f, logsize + 2); // evaluate on larger domain + test(f, logsize - 3); // evaluate on smaller domain + test(f, logsize); // evaluate on domain with size like poly + // test g (in evals state) + test(g, logsize + 2); // evaluate on larger domain + test(g, logsize - 3); // evaluate on smaller domain + test(g, logsize); // evaluate on domain with size like poly + + // test f*f (in reversed evals state) + auto f_squared = f * f; + auto new_logsize = logsize + 1; // f_squared is twice the degree and size of f + test(f_squared, new_logsize + 2); // evaluate on larger domain + test(f_squared, new_logsize - 3); // evaluate on smaller domain + test(f_squared, new_logsize); // evaluate on domain with size like poly + } +} - scalar_t w; - ICICLE_CHECK(get_root_of_unity_from_domain(domain_logsize, &w)); +TEST_F(PolynomialTest, fromEvaluations) +{ + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size = 100; + const int log_size = (int)ceil(log2(size)); + const int nof_evals = 1 << log_size; + auto f = randomize_polynomial(size); - auto domain = std::make_unique(domain_size); - domain[0] = scalar_t::one(); - for (int i = 1; i < domain_size; ++i) { - domain[i] = domain[i - 1] * w; + // evaluate f on roots of unity + scalar_t omega = scalar_t::omega(log_size); + auto evals = std::make_unique(nof_evals); + scalar_t x = one; + for (int i = 0; i < nof_evals; ++i) { + evals[i] = f(x); + x = x * omega; } - // evaluation on domain - auto evals_naive = std::make_unique(domain_size); - START_TIMER(naive_evals); - p.evaluate_on_domain(domain.get(), domain_size, evals_naive.get()); - END_TIMER(naive_evals, "naive evals took", MEASURE); - - // evaluate on rou domain - auto evals_rou_domain = std::make_unique(domain_size); - START_TIMER(rou_domain_evals); - p.evaluate_on_rou_domain(domain_logsize, evals_rou_domain.get()); - END_TIMER(rou_domain_evals, "evals on rou domain took", MEASURE); - - ASSERT_EQ(0, memcmp(evals_naive.get(), evals_rou_domain.get(), domain_size * sizeof(scalar_t))); - }; - - // test f (in coeffs state) - test(f, logsize + 2); // evaluate on larger domain - test(f, logsize - 3); // evaluate on smaller domain - test(f, logsize); // evaluate on domain with size like poly - // test g (in evals state) - test(g, logsize + 2); // evaluate on larger domain - test(g, logsize - 3); // evaluate on smaller domain - test(g, logsize); // evaluate on domain with size like poly - - // test f*f (in reversed evals state) - auto f_squared = f * f; - auto new_logsize = logsize + 1; // f_squared is twice the degree and size of f - test(f_squared, new_logsize + 2); // evaluate on larger domain - test(f_squared, new_logsize - 3); // evaluate on smaller domain - test(f_squared, new_logsize); // evaluate on domain with size like poly -} + // construct g from f's evaluations + auto g = Polynomial_t::from_rou_evaluations(evals.get(), nof_evals); -TEST_F(PolynomialTest, fromEvaluations) -{ - const int size = 100; - const int log_size = (int)ceil(log2(size)); - const int nof_evals = 1 << log_size; - auto f = randomize_polynomial(size); - - // evaluate f on roots of unity - scalar_t omega = scalar_t::omega(log_size); - auto evals = std::make_unique(nof_evals); - scalar_t x = one; - for (int i = 0; i < nof_evals; ++i) { - evals[i] = f(x); - x = x * omega; - } - - // construct g from f's evaluations - auto g = Polynomial_t::from_rou_evaluations(evals.get(), nof_evals); - - // make sure they are equal, that is f-g=0 - auto h = f - g; - EXPECT_EQ(h.degree(), -1); // degree -1 is the zero polynomial + // make sure they are equal, that is f-g=0 + auto h = f - g; + EXPECT_EQ(h.degree(), -1); // degree -1 is the zero polynomial + } } TEST_F(PolynomialTest, fromEvaluationsNotPowerOfTwo) { - const int size = 100; - const int log_size = (int)ceil(log2(size)); - const int nof_evals = size; - auto f = randomize_polynomial(size); - - // evaluate f on roots of unity - scalar_t omega = scalar_t::omega(log_size); - scalar_t evals[nof_evals] = {0}; - scalar_t x = one; - for (int i = 0; i < nof_evals; ++i) { - evals[i] = f(x); - x = x * omega; - } - - // construct g from f's evaluations - auto g = Polynomial_t::from_rou_evaluations(evals, nof_evals); - - // since NTT works on a power of two (therefore the extra elements are arbitrary), f!=g but they should evaluate to - // the same values on the roots of unity due to construction. - x = one; - for (int i = 0; i < nof_evals; ++i) { - EXPECT_EQ(f(x), g(x)); - x = x * omega; + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size = 100; + const int log_size = (int)ceil(log2(size)); + const int nof_evals = size; + auto f = randomize_polynomial(size); + + // evaluate f on roots of unity + scalar_t omega = scalar_t::omega(log_size); + scalar_t evals[nof_evals] = {0}; + scalar_t x = one; + for (int i = 0; i < nof_evals; ++i) { + evals[i] = f(x); + x = x * omega; + } + + // construct g from f's evaluations + auto g = Polynomial_t::from_rou_evaluations(evals, nof_evals); + + // since NTT works on a power of two (therefore the extra elements are arbitrary), f!=g but they should evaluate to + // the same values on the roots of unity due to construction. + x = one; + for (int i = 0; i < nof_evals; ++i) { + EXPECT_EQ(f(x), g(x)); + x = x * omega; + } } } TEST_F(PolynomialTest, addition) { - const int size_0 = 12, size_1 = 17; - auto f = randomize_polynomial(size_0); - auto g = randomize_polynomial(size_1); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size_0 = 12, size_1 = 17; + auto f = randomize_polynomial(size_0); + auto g = randomize_polynomial(size_1); - scalar_t x = scalar_t::rand_host(); - auto f_x = f(x); - auto g_x = g(x); - auto fx_plus_gx = f_x + g_x; + scalar_t x = scalar_t::rand_host(); + auto f_x = f(x); + auto g_x = g(x); + auto fx_plus_gx = f_x + g_x; - auto s = f + g; - auto s_x = s(x); + auto s = f + g; + auto s_x = s(x); - EXPECT_EQ(fx_plus_gx, s_x); + EXPECT_EQ(fx_plus_gx, s_x); + } } TEST_F(PolynomialTest, addition_inplace) { - const int size_0 = 2, size_1 = 2; - auto f = randomize_polynomial(size_0); - auto g = randomize_polynomial(size_1); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size_0 = 2, size_1 = 2; + auto f = randomize_polynomial(size_0); + auto g = randomize_polynomial(size_1); - scalar_t x = scalar_t::rand_host(); - auto f_x = f(x); - auto g_x = g(x); - auto fx_plus_gx = f_x + g_x; + scalar_t x = scalar_t::rand_host(); + auto f_x = f(x); + auto g_x = g(x); + auto fx_plus_gx = f_x + g_x; - f += g; - auto s_x = f(x); + f += g; + auto s_x = f(x); - EXPECT_EQ(fx_plus_gx, s_x); + EXPECT_EQ(fx_plus_gx, s_x); + } } TEST_F(PolynomialTest, multiplication) { - const int size_0 = 1 << 15, size_1 = 1 << 12; - auto f = randomize_polynomial(size_0); - auto g = randomize_polynomial(size_1); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size_0 = 1 << 15, size_1 = 1 << 12; + auto f = randomize_polynomial(size_0); + auto g = randomize_polynomial(size_1); - scalar_t x = scalar_t::rand_host(); - auto f_x = f(x); - auto g_x = g(x); - auto fx_mul_gx = f_x * g_x; + scalar_t x = scalar_t::rand_host(); + auto f_x = f(x); + auto g_x = g(x); + auto fx_mul_gx = f_x * g_x; - START_TIMER(poly_mult_start); - auto m = f * g; - END_TIMER(poly_mult_start, "Polynomial multiplication took", MEASURE); + START_TIMER(poly_mult_start); + auto m = f * g; + END_TIMER(poly_mult_start, "Polynomial multiplication took", MEASURE); - auto m_x = m(x); + auto m_x = m(x); - EXPECT_EQ(fx_mul_gx, m_x); + EXPECT_EQ(fx_mul_gx, m_x); + } } TEST_F(PolynomialTest, multiplicationScalar) { - const int size = 1 << 15; - auto f = randomize_polynomial(size); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size = 1 << 15; + auto f = randomize_polynomial(size); - auto g = two * f; - auto h = f * three; + auto g = two * f; + auto h = f * three; - scalar_t x = scalar_t::rand_host(); - auto f_x = f(x); - auto g_x = g(x); - auto h_x = h(x); + scalar_t x = scalar_t::rand_host(); + auto f_x = f(x); + auto g_x = g(x); + auto h_x = h(x); - EXPECT_EQ(g_x, f_x * two); - EXPECT_EQ(h_x, f_x * three); + EXPECT_EQ(g_x, f_x * two); + EXPECT_EQ(h_x, f_x * three); - EXPECT_EQ(g.degree(), f.degree()); - EXPECT_EQ(h.degree(), f.degree()); + EXPECT_EQ(g.degree(), f.degree()); + EXPECT_EQ(h.degree(), f.degree()); + } } TEST_F(PolynomialTest, monomials) { - const scalar_t coeffs[3] = {one, zero, two}; // 1+2x^2 - auto f = Polynomial_t::from_coefficients(coeffs, 3); - const auto x = three; - const auto expected_f_x = one + two * x * x; - auto f_x = f(x); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const scalar_t coeffs[3] = {one, zero, two}; // 1+2x^2 + auto f = Polynomial_t::from_coefficients(coeffs, 3); + const auto x = three; + const auto expected_f_x = one + two * x * x; + auto f_x = f(x); - EXPECT_EQ(f_x, expected_f_x); + EXPECT_EQ(f_x, expected_f_x); - f.add_monomial_inplace(three, 1); // add 3x - const auto expected_addmonmon_f_x = f_x + three * x; - const auto addmonom_f_x = f(x); + f.add_monomial_inplace(three, 1); // add 3x + const auto expected_addmonmon_f_x = f_x + three * x; + const auto addmonom_f_x = f(x); - EXPECT_EQ(addmonom_f_x, expected_addmonmon_f_x); + EXPECT_EQ(addmonom_f_x, expected_addmonmon_f_x); - f.sub_monomial_inplace(one); // subtract 1. equivalent to 'f-1' - const auto expected_submonom_f_x = addmonom_f_x - one; - const auto submonom_f_x = f(x); + f.sub_monomial_inplace(one); // subtract 1. equivalent to 'f-1' + const auto expected_submonom_f_x = addmonom_f_x - one; + const auto submonom_f_x = f(x); - EXPECT_EQ(submonom_f_x, expected_submonom_f_x); + EXPECT_EQ(submonom_f_x, expected_submonom_f_x); + } } TEST_F(PolynomialTest, ReadCoeffsToHost) { - const scalar_t coeffs_f[3] = {zero, one, two}; // x+2x^2 - auto f = Polynomial_t::from_coefficients(coeffs_f, 3); - const scalar_t coeffs_g[3] = {one, one, one}; // 1+x+x^2 - auto g = Polynomial_t::from_coefficients(coeffs_g, 3); - - auto h = f + g; // 1+2x+3x^3 - const auto h0 = h.get_coeff(0); - const auto h1 = h.get_coeff(1); - const auto h2 = h.get_coeff(2); - EXPECT_EQ(h0, one); - EXPECT_EQ(h1, two); - EXPECT_EQ(h2, three); - - scalar_t h_coeffs[3] = {0}; - auto nof_coeffs = h.copy_coeffs(h_coeffs, 0, 2); // read the coefficients - EXPECT_EQ(nof_coeffs, 3); // expecting 3 due to specified indices - - scalar_t expected_h_coeffs[3] = {one, two, three}; - for (int i = 0; i < nof_coeffs; ++i) { - EXPECT_EQ(expected_h_coeffs[i], h_coeffs[i]); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const scalar_t coeffs_f[3] = {zero, one, two}; // x+2x^2 + auto f = Polynomial_t::from_coefficients(coeffs_f, 3); + const scalar_t coeffs_g[3] = {one, one, one}; // 1+x+x^2 + auto g = Polynomial_t::from_coefficients(coeffs_g, 3); + + auto h = f + g; // 1+2x+3x^3 + const auto h0 = h.get_coeff(0); + const auto h1 = h.get_coeff(1); + const auto h2 = h.get_coeff(2); + EXPECT_EQ(h0, one); + EXPECT_EQ(h1, two); + EXPECT_EQ(h2, three); + + scalar_t h_coeffs[3] = {0}; + auto nof_coeffs = h.copy_coeffs(h_coeffs, 0, 2); // read the coefficients + EXPECT_EQ(nof_coeffs, 3); // expecting 3 due to specified indices + + scalar_t expected_h_coeffs[3] = {one, two, three}; + for (int i = 0; i < nof_coeffs; ++i) { + EXPECT_EQ(expected_h_coeffs[i], h_coeffs[i]); + } } } TEST_F(PolynomialTest, divisionSimple) { - const scalar_t coeffs_a[4] = {five, zero, four, three}; // 3x^3+4x^2+5 - const scalar_t coeffs_b[3] = {minus_one, zero, one}; // x^2-1 - auto a = Polynomial_t::from_coefficients(coeffs_a, 4); - auto b = Polynomial_t::from_coefficients(coeffs_b, 3); - - auto [q, r] = a.divide(b); - scalar_t q_coeffs[2] = {0}; // 3x+4 - scalar_t r_coeffs[2] = {0}; // 3x+9 - const auto q_nof_coeffs = q.copy_coeffs(q_coeffs, 0, 1); - const auto r_nof_coeffs = r.copy_coeffs(r_coeffs, 0, 1); - - ASSERT_EQ(q_nof_coeffs, 2); - ASSERT_EQ(r_nof_coeffs, 2); - ASSERT_EQ(q_coeffs[0], scalar_t::from(4)); - ASSERT_EQ(q_coeffs[1], scalar_t::from(3)); - ASSERT_EQ(r_coeffs[0], scalar_t::from(9)); - ASSERT_EQ(r_coeffs[1], scalar_t::from(3)); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const scalar_t coeffs_a[4] = {five, zero, four, three}; // 3x^3+4x^2+5 + const scalar_t coeffs_b[3] = {minus_one, zero, one}; // x^2-1 + auto a = Polynomial_t::from_coefficients(coeffs_a, 4); + auto b = Polynomial_t::from_coefficients(coeffs_b, 3); + + auto [q, r] = a.divide(b); + scalar_t q_coeffs[2] = {0}; // 3x+4 + scalar_t r_coeffs[2] = {0}; // 3x+9 + const auto q_nof_coeffs = q.copy_coeffs(q_coeffs, 0, 1); + const auto r_nof_coeffs = r.copy_coeffs(r_coeffs, 0, 1); + + ASSERT_EQ(q_nof_coeffs, 2); + ASSERT_EQ(r_nof_coeffs, 2); + ASSERT_EQ(q_coeffs[0], scalar_t::from(4)); + ASSERT_EQ(q_coeffs[1], scalar_t::from(3)); + ASSERT_EQ(r_coeffs[0], scalar_t::from(9)); + ASSERT_EQ(r_coeffs[1], scalar_t::from(3)); + } } TEST_F(PolynomialTest, divisionLarge) { - const int size_0 = 1 << 12, size_1 = 1 << 2; - auto a = randomize_polynomial(size_0); - auto b = randomize_polynomial(size_1); - - START_TIMER(poly_mult_start); - auto [q, r] = a.divide(b); - END_TIMER(poly_mult_start, "Polynomial division took", MEASURE); - - scalar_t x = scalar_t::rand_host(); - auto a_x = a(x); - auto b_x = b(x); - auto q_x = q(x); - auto r_x = r(x); - - // a(x) = b(x)*q(x)+r(x) - EXPECT_EQ(a_x, b_x * q_x + r_x); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size_0 = 1 << 12, size_1 = 1 << 2; + auto a = randomize_polynomial(size_0); + auto b = randomize_polynomial(size_1); + + START_TIMER(poly_mult_start); + auto [q, r] = a.divide(b); + END_TIMER(poly_mult_start, "Polynomial division took", MEASURE); + + scalar_t x = scalar_t::rand_host(); + auto a_x = a(x); + auto b_x = b(x); + auto q_x = q(x); + auto r_x = r(x); + + // a(x) = b(x)*q(x)+r(x) + EXPECT_EQ(a_x, b_x * q_x + r_x); + } } TEST_F(PolynomialTest, divideByVanishingPolynomial) { - const scalar_t coeffs_v[5] = {minus_one, zero, zero, zero, one}; // x^4-1 vanishes on 4th roots of unity - auto v = Polynomial_t::from_coefficients(coeffs_v, 5); - auto h = randomize_polynomial(1 << 11, false); - auto hv = h * v; - - START_TIMER(poly_div_vanishing); - auto h_div_by_vanishing = hv.divide_by_vanishing_polynomial(4); - END_TIMER(poly_div_vanishing, "Polynomial division by vanishing (fast) took", MEASURE); - assert_equal(h_div_by_vanishing, h); - - START_TIMER(poly_div_long); - auto [h_div, R] = hv.divide(v); - END_TIMER(poly_div_long, "Polynomial division by vanishing (long division) took", MEASURE); - assert_equal(h_div, h); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const scalar_t coeffs_v[5] = {minus_one, zero, zero, zero, one}; // x^4-1 vanishes on 4th roots of unity + auto v = Polynomial_t::from_coefficients(coeffs_v, 5); + auto h = randomize_polynomial(1 << 11, false); + auto hv = h * v; + + START_TIMER(poly_div_vanishing); + auto h_div_by_vanishing = hv.divide_by_vanishing_polynomial(4); + END_TIMER(poly_div_vanishing, "Polynomial division by vanishing (fast) took", MEASURE); + assert_equal(h_div_by_vanishing, h); + + START_TIMER(poly_div_long); + auto [h_div, R] = hv.divide(v); + END_TIMER(poly_div_long, "Polynomial division by vanishing (long division) took", MEASURE); + assert_equal(h_div, h); + } } TEST_F(PolynomialTest, clone) { - const int size = 1 << 10; - auto f = randomize_polynomial(size); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size = 1 << 10; + auto f = randomize_polynomial(size); - const auto x = scalar_t::rand_host(); - const auto f_x = f(x); + const auto x = scalar_t::rand_host(); + const auto f_x = f(x); - Polynomial_t g; - g = f.clone(); // operator=(&&) - g += f; + Polynomial_t g; + g = f.clone(); // operator=(&&) + g += f; - auto h = g.clone(); // move constructor + auto h = g.clone(); // move constructor - ASSERT_EQ(g(x), two * f_x); - ASSERT_EQ(h(x), g(x)); + ASSERT_EQ(g(x), two * f_x); + ASSERT_EQ(h(x), g(x)); + } } TEST_F(PolynomialTest, View) { - const int size = 1 << 6; + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size = 1 << 6; - auto f = randomize_polynomial(size); - auto [d_coeff, N] = f.get_coefficients_view(); + auto f = randomize_polynomial(size); + auto [d_coeff, N] = f.get_coefficients_view(); - EXPECT_EQ(d_coeff.isValid(), true); - auto g = f + f; - // expecting the view to remain valid in that case - EXPECT_EQ(d_coeff.isValid(), true); + EXPECT_EQ(d_coeff.isValid(), true); + auto g = f + f; + // expecting the view to remain valid in that case + EXPECT_EQ(d_coeff.isValid(), true); - f += f; - // expecting view to be invalidated since f is modified - EXPECT_EQ(d_coeff.isValid(), false); + f += f; + // expecting view to be invalidated since f is modified + EXPECT_EQ(d_coeff.isValid(), false); + } } TEST_F(PolynomialTest, slicing) { - auto body = [](int size) { - const bool is_odd_size = size % 2 == 1; - auto f = randomize_polynomial(size); - const auto x = scalar_t::rand_host(); - - // computing e(x) and o(x) directly - auto expected_even = scalar_t::zero(); - auto expected_odd = scalar_t::zero(); - for (int i = size - 1; i >= 0; --i) { - if (i % 2 == 0) - expected_even = expected_even * x + f.get_coeff(i); - else - expected_odd = expected_odd * x + f.get_coeff(i); - } - - auto e = f.even(); - auto o = f.odd(); - - ASSERT_EQ(o.degree(), size / 2 - 1); - ASSERT_EQ(e.degree(), is_odd_size ? size / 2 : size / 2 - 1); - - // compute even and odd polynomials and compute them at x - ASSERT_EQ(f.even()(x), expected_even); - ASSERT_EQ(f.odd()(x), expected_odd); - }; + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + auto body = [](int size) { + const bool is_odd_size = size % 2 == 1; + auto f = randomize_polynomial(size); + const auto x = scalar_t::rand_host(); + + // computing e(x) and o(x) directly + auto expected_even = scalar_t::zero(); + auto expected_odd = scalar_t::zero(); + for (int i = size - 1; i >= 0; --i) { + if (i % 2 == 0) + expected_even = expected_even * x + f.get_coeff(i); + else + expected_odd = expected_odd * x + f.get_coeff(i); + } + + auto e = f.even(); + auto o = f.odd(); + + ASSERT_EQ(o.degree(), size / 2 - 1); + ASSERT_EQ(e.degree(), is_odd_size ? size / 2 : size / 2 - 1); + + // compute even and odd polynomials and compute them at x + ASSERT_EQ(f.even()(x), expected_even); + ASSERT_EQ(f.odd()(x), expected_odd); + }; - body(1 << 10); // test even size - body((1 << 10) - 1); // test odd size + body(1 << 10); // test even size + body((1 << 10) - 1); // test odd size + } } #ifdef MSM @@ -899,106 +944,124 @@ class Groth16Example TEST_F(PolynomialTest, QAP) { - // (1) construct R1CS and QAP for circuit with N inputs - Groth16Example QAP(300 /*=N*/); - - // (2) compute witness: randomize inputs and compute other entries [1,out,...N inputs..., ... intermediate values...] - auto witness = QAP.random_witness_inputs(); - QAP.compute_witness(witness); - - // (3) compute L(x),R(x),O(x) using the witness - const int nof_cols = QAP.witness_size; - - Polynomial_t Lx = QAP.L_QAP[0].clone(); - Polynomial_t Rx = QAP.R_QAP[0].clone(); - Polynomial_t Ox = QAP.O_QAP[0].clone(); - for (int col = 1; col < nof_cols; ++col) { - Lx += witness[col] * QAP.L_QAP[col]; - Rx += witness[col] * QAP.R_QAP[col]; - Ox += witness[col] * QAP.O_QAP[col]; - } - - const int nof_constraints = - QAP.nof_inputs - 1; // multiplying N numbers yields N-1 constraints for this circuit construction - const int vanishing_poly_deg = ceil_to_power_of_two(nof_constraints); - // (4) sanity check: verify AB=C at the evaluation points - { - scalar_t w; - ICICLE_CHECK(get_root_of_unity_from_domain((int)ceil(log2(nof_constraints)), &w)); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + // (1) construct R1CS and QAP for circuit with N inputs + Groth16Example QAP(300 /*=N*/); + + // (2) compute witness: randomize inputs and compute other entries [1,out,...N inputs..., ... intermediate + // values...] + auto witness = QAP.random_witness_inputs(); + QAP.compute_witness(witness); + + // (3) compute L(x),R(x),O(x) using the witness + const int nof_cols = QAP.witness_size; + + Polynomial_t Lx = QAP.L_QAP[0].clone(); + Polynomial_t Rx = QAP.R_QAP[0].clone(); + Polynomial_t Ox = QAP.O_QAP[0].clone(); + for (int col = 1; col < nof_cols; ++col) { + Lx += witness[col] * QAP.L_QAP[col]; + Rx += witness[col] * QAP.R_QAP[col]; + Ox += witness[col] * QAP.O_QAP[col]; + } - auto x = scalar_t::one(); - for (int i = 0; i < vanishing_poly_deg; ++i) { - ASSERT_EQ(Lx(x) * Rx(x), Ox(x)); - x = x * w; + const int nof_constraints = + QAP.nof_inputs - 1; // multiplying N numbers yields N-1 constraints for this circuit construction + const int vanishing_poly_deg = ceil_to_power_of_two(nof_constraints); + // (4) sanity check: verify AB=C at the evaluation points + { + scalar_t w; + ICICLE_CHECK(get_root_of_unity_from_domain((int)ceil(log2(nof_constraints)), &w)); + + auto x = scalar_t::one(); + for (int i = 0; i < vanishing_poly_deg; ++i) { + ASSERT_EQ(Lx(x) * Rx(x), Ox(x)); + x = x * w; + } } - } - // (5) compute h(x) as '(L(x)R(x)-O(x)) / t(x)' - Polynomial_t h = (Lx * Rx - Ox).divide_by_vanishing_polynomial(vanishing_poly_deg); + // (5) compute h(x) as '(L(x)R(x)-O(x)) / t(x)' + Polynomial_t h = (Lx * Rx - Ox).divide_by_vanishing_polynomial(vanishing_poly_deg); - // (6) sanity check: vanishing-polynomial divides (LR-O) without remainder - { - auto [h_long_div, r] = (Lx * Rx - Ox).divide(vanishing_polynomial(vanishing_poly_deg)); - EXPECT_EQ(r.degree(), -1); // zero polynomial (expecting division without remainder) - assert_equal(h, h_long_div); + // (6) sanity check: vanishing-polynomial divides (LR-O) without remainder + { + auto temp = Lx * Rx - Ox; + auto v = vanishing_polynomial(vanishing_poly_deg); + auto [q, r] = temp.divide(v); + auto rand_x = scalar_t::rand_host(); + EXPECT_EQ(temp(rand_x), q(rand_x) * v(rand_x) + r(rand_x)); + EXPECT_EQ(r.degree(), -1); // zero polynomial (expecting division without remainder) + assert_equal(h, q); + } } } #ifdef MSM TEST_F(PolynomialTest, commitMSM) { - const int size = 1 << 6; - auto f = randomize_polynomial(size); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + const int size = 1 << 6; + auto f = randomize_polynomial(size); - auto [d_coeff, N] = f.get_coefficients_view(); - auto msm_config = default_msm_config(); - msm_config.are_scalars_on_device = true; + auto [d_coeff, N] = f.get_coefficients_view(); + auto msm_config = default_msm_config(); + msm_config.are_scalars_on_device = true; - auto points = std::make_unique(size); - projective_t result; + auto points = std::make_unique(size); + projective_t result; - auto tau = scalar_t::rand_host(); - projective_t g = projective_t::rand_host(); - compute_powers_of_tau(g, tau, points.get(), size); + auto tau = scalar_t::rand_host(); + projective_t g = projective_t::rand_host(); + compute_powers_of_tau(g, tau, points.get(), size); - EXPECT_EQ(d_coeff.isValid(), true); - ICICLE_CHECK(msm(d_coeff.get(), points.get(), size, msm_config, &result)); + EXPECT_EQ(d_coeff.isValid(), true); + ICICLE_CHECK(msm(d_coeff.get(), points.get(), size, msm_config, &result)); - EXPECT_EQ(result, f(tau) * g); + EXPECT_EQ(result, f(tau) * g); - f += f; // this is invalidating the d_coeff integrity-pointer + f += f; // this is invalidating the d_coeff integrity-pointer - EXPECT_EQ(d_coeff.isValid(), false); + EXPECT_EQ(d_coeff.isValid(), false); + } } TEST_F(PolynomialTest, DummyGroth16) { - // (1) construct R1CS and QAP for circuit with N inputs - Groth16Example groth16_example(30 /*=N*/); - - // (2) compute witness: randomize inputs and compute other entries [1,out,...N inputs..., ... intermediate values...] - auto witness = groth16_example.random_witness_inputs(); - groth16_example.compute_witness(witness); - - groth16_example.setup(); - auto proof = groth16_example.prove(witness); - ASSERT_EQ(groth16_example.dummy_verify(proof, witness), true); + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + // (1) construct R1CS and QAP for circuit with N inputs + Groth16Example groth16_example(30 /*=N*/); + + // (2) compute witness: randomize inputs and compute other entries [1,out,...N inputs..., ... intermediate + // values...] + auto witness = groth16_example.random_witness_inputs(); + groth16_example.compute_witness(witness); + + groth16_example.setup(); + auto proof = groth16_example.prove(witness); + ASSERT_EQ(groth16_example.dummy_verify(proof, witness), true); + } } #ifdef G2 TEST_F(PolynomialTest, Groth16) { - // (1) construct R1CS and QAP for circuit with N inputs - Groth16Example groth16_example(30 /*=N*/); - - // (2) compute witness: randomize inputs and compute other entries [1,out,...N inputs..., ... intermediate - // values...] - auto witness = groth16_example.random_witness_inputs(); - groth16_example.compute_witness(witness); - - groth16_example.setup(); - auto proof = groth16_example.prove(witness); - // groth16_example.verify(proof); // cannot implement without pairing + for (auto device : s_registered_devices) { + ICICLE_CHECK(icicle_set_device(device)); + // (1) construct R1CS and QAP for circuit with N inputs + Groth16Example groth16_example(30 /*=N*/); + + // (2) compute witness: randomize inputs and compute other entries [1,out,...N inputs..., ... intermediate + // values...] + auto witness = groth16_example.random_witness_inputs(); + groth16_example.compute_witness(witness); + + groth16_example.setup(); + auto proof = groth16_example.prove(witness); + // groth16_example.verify(proof); // cannot implement without pairing + } } #endif // G2