Skip to content

Commit

Permalink
Refactor size_t to std::size_t and include cstddef everywhere i…
Browse files Browse the repository at this point in the history
…t is used

Maintains stylistic consistency and removes reliance on compiler interpretation
  • Loading branch information
ndgrigorian committed Jan 5, 2025
1 parent 3a1a7c5 commit dd53397
Show file tree
Hide file tree
Showing 150 changed files with 1,857 additions and 1,718 deletions.
1 change: 1 addition & 0 deletions dpctl/_host_task_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

#pragma once
#include <exception>
#include <stddef.h>
#include <sycl/sycl.hpp>

#include "Python.h"
Expand Down
6 changes: 4 additions & 2 deletions dpctl/apis/include/dpctl4pybind11.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@

#include "dpctl_capi.h"
#include <complex>
#include <cstddef> // for std::size_t for C++ linkage
#include <memory>
#include <pybind11/pybind11.h>
#include <stddef.h> // for size_t for C linkage
#include <stdexcept>
#include <sycl/sycl.hpp>
#include <utility>
Expand Down Expand Up @@ -759,7 +761,7 @@ class usm_memory : public py::object
* lifetime of the USM allocation.
*/
usm_memory(void *usm_ptr,
size_t nbytes,
std::size_t nbytes,
const sycl::queue &q,
std::shared_ptr<void> shptr)
{
Expand Down Expand Up @@ -819,7 +821,7 @@ class usm_memory : public py::object
return reinterpret_cast<char *>(MRef);
}

size_t get_nbytes() const
std::size_t get_nbytes() const
{
auto const &api = ::dpctl::detail::dpctl_capi::get();
Py_MemoryObject *mem_obj = reinterpret_cast<Py_MemoryObject *>(m_ptr);
Expand Down
1 change: 1 addition & 0 deletions dpctl/tensor/libtensor/include/kernels/accumulators.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <new>
Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/libtensor/include/kernels/alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace kernels
namespace alignment_utils
{

static constexpr size_t required_alignment = 64UL;
static constexpr std::size_t required_alignment = 64UL;

template <std::uintptr_t alignment, typename Ptr> bool is_aligned(Ptr p)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
//===---------------------------------------------------------------------===//

#pragma once
#include <cstddef>
#include <cstdint>
#include <limits>
#include <sycl/sycl.hpp>
Expand Down Expand Up @@ -55,7 +56,7 @@ struct MaskedExtractStridedFunctor
MaskedExtractStridedFunctor(const dataT *src_data_p,
const indT *cumsum_data_p,
dataT *dst_data_p,
size_t masked_iter_size,
std::size_t masked_iter_size,
const OrthogIndexerT &orthog_src_dst_indexer_,
const MaskedSrcIndexerT &masked_src_indexer_,
const MaskedDstIndexerT &masked_dst_indexer_,
Expand All @@ -81,7 +82,7 @@ struct MaskedExtractStridedFunctor

const std::size_t max_offset = masked_nelems + 1;
for (std::uint32_t i = l_i; i < lacc.size(); i += lws) {
const size_t offset = masked_block_start + i;
const std::size_t offset = masked_block_start + i;
lacc[i] = (offset == 0) ? indT(0)
: (offset < max_offset) ? cumsum[offset - 1]
: cumsum[masked_nelems - 1] + 1;
Expand All @@ -99,9 +100,10 @@ struct MaskedExtractStridedFunctor
if (mask_set && (masked_i < masked_nelems)) {
const auto &orthog_offsets = orthog_src_dst_indexer(orthog_i);

const size_t total_src_offset = masked_src_indexer(masked_i) +
orthog_offsets.get_first_offset();
const size_t total_dst_offset =
const std::size_t total_src_offset =
masked_src_indexer(masked_i) +
orthog_offsets.get_first_offset();
const std::size_t total_dst_offset =
masked_dst_indexer(current_running_count - 1) +
orthog_offsets.get_second_offset();

Expand All @@ -113,7 +115,7 @@ struct MaskedExtractStridedFunctor
const dataT *src = nullptr;
const indT *cumsum = nullptr;
dataT *dst = nullptr;
const size_t masked_nelems = 0;
const std::size_t masked_nelems = 0;
// has nd, shape, src_strides, dst_strides for
// dimensions that ARE NOT masked
const OrthogIndexerT orthog_src_dst_indexer;
Expand All @@ -136,7 +138,7 @@ struct MaskedPlaceStridedFunctor
MaskedPlaceStridedFunctor(dataT *dst_data_p,
const indT *cumsum_data_p,
const dataT *rhs_data_p,
size_t masked_iter_size,
std::size_t masked_iter_size,
const OrthogIndexerT &orthog_dst_rhs_indexer_,
const MaskedDstIndexerT &masked_dst_indexer_,
const MaskedRhsIndexerT &masked_rhs_indexer_,
Expand All @@ -157,12 +159,12 @@ struct MaskedPlaceStridedFunctor
const std::uint32_t l_i = ndit.get_local_id(1);
const std::uint32_t lws = ndit.get_local_range(1);

const size_t masked_i = ndit.get_global_id(1);
const size_t masked_block_start = masked_i - l_i;
const std::size_t masked_i = ndit.get_global_id(1);
const std::size_t masked_block_start = masked_i - l_i;

const std::size_t max_offset = masked_nelems + 1;
for (std::uint32_t i = l_i; i < lacc.size(); i += lws) {
const size_t offset = masked_block_start + i;
const std::size_t offset = masked_block_start + i;
lacc[i] = (offset == 0) ? indT(0)
: (offset < max_offset) ? cumsum[offset - 1]
: cumsum[masked_nelems - 1] + 1;
Expand All @@ -180,9 +182,10 @@ struct MaskedPlaceStridedFunctor
if (mask_set && (masked_i < masked_nelems)) {
const auto &orthog_offsets = orthog_dst_rhs_indexer(orthog_i);

const size_t total_dst_offset = masked_dst_indexer(masked_i) +
orthog_offsets.get_first_offset();
const size_t total_rhs_offset =
const std::size_t total_dst_offset =
masked_dst_indexer(masked_i) +
orthog_offsets.get_first_offset();
const std::size_t total_rhs_offset =
masked_rhs_indexer(current_running_count - 1) +
orthog_offsets.get_second_offset();

Expand All @@ -194,7 +197,7 @@ struct MaskedPlaceStridedFunctor
dataT *dst = nullptr;
const indT *cumsum = nullptr;
const dataT *rhs = nullptr;
const size_t masked_nelems = 0;
const std::size_t masked_nelems = 0;
// has nd, shape, dst_strides, rhs_strides for
// dimensions that ARE NOT masked
const OrthogIndexerT orthog_dst_rhs_indexer;
Expand Down Expand Up @@ -450,8 +453,8 @@ sycl::event masked_extract_some_slices_strided_impl(

const std::size_t lws = get_lws(masked_extent);

const size_t n_groups = ((masked_extent + lws - 1) / lws);
const size_t orthog_extent = static_cast<size_t>(orthog_nelems);
const std::size_t n_groups = ((masked_extent + lws - 1) / lws);
const std::size_t orthog_extent = static_cast<std::size_t>(orthog_nelems);

sycl::range<2> gRange{orthog_extent, n_groups * lws};
sycl::range<2> lRange{1, lws};
Expand Down Expand Up @@ -809,7 +812,7 @@ sycl::event non_zero_indexes_impl(sycl::queue &exec_q,
const std::size_t masked_block_start = group_i * lws;

for (std::uint32_t i = l_i; i < lacc.size(); i += lws) {
const size_t offset = masked_block_start + i;
const std::size_t offset = masked_block_start + i;
lacc[i] = (offset == 0) ? indT1(0)
: (offset - 1 < masked_extent)
? cumsum_data[offset - 1]
Expand Down
35 changes: 18 additions & 17 deletions dpctl/tensor/libtensor/include/kernels/clip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#pragma once
#include <algorithm>
#include <complex>
#include <cstddef>
#include <cstdint>
#include <sycl/sycl.hpp>
#include <type_traits>
Expand Down Expand Up @@ -85,14 +86,14 @@ template <typename T,
class ClipContigFunctor
{
private:
size_t nelems = 0;
std::size_t nelems = 0;
const T *x_p = nullptr;
const T *min_p = nullptr;
const T *max_p = nullptr;
T *dst_p = nullptr;

public:
ClipContigFunctor(size_t nelems_,
ClipContigFunctor(std::size_t nelems_,
const T *x_p_,
const T *min_p_,
const T *max_p_,
Expand All @@ -110,30 +111,30 @@ class ClipContigFunctor
if constexpr (is_complex<T>::value || !enable_sg_loadstore) {
const std::uint16_t sgSize =
ndit.get_sub_group().get_local_range()[0];
const size_t gid = ndit.get_global_linear_id();
const std::size_t gid = ndit.get_global_linear_id();
const uint16_t nelems_per_sg = sgSize * nelems_per_wi;

const size_t start =
const std::size_t start =
(gid / sgSize) * (nelems_per_sg - sgSize) + gid;
const size_t end = std::min(nelems, start + nelems_per_sg);
const std::size_t end = std::min(nelems, start + nelems_per_sg);

for (size_t offset = start; offset < end; offset += sgSize) {
for (std::size_t offset = start; offset < end; offset += sgSize) {
dst_p[offset] = clip(x_p[offset], min_p[offset], max_p[offset]);
}
}
else {
auto sg = ndit.get_sub_group();
const std::uint16_t sgSize = sg.get_max_local_range()[0];

const size_t base =
const std::size_t base =
nelems_per_wi * (ndit.get_group(0) * ndit.get_local_range(0) +
sg.get_group_id()[0] * sgSize);

if (base + nelems_per_wi * sgSize < nelems) {
sycl::vec<T, vec_sz> dst_vec;
#pragma unroll
for (std::uint8_t it = 0; it < n_vecs * vec_sz; it += vec_sz) {
const size_t idx = base + it * sgSize;
const std::size_t idx = base + it * sgSize;
auto x_multi_ptr = sycl::address_space_cast<
sycl::access::address_space::global_space,
sycl::access::decorated::yes>(&x_p[idx]);
Expand Down Expand Up @@ -162,8 +163,8 @@ class ClipContigFunctor
}
}
else {
const size_t lane_id = sg.get_local_id()[0];
for (size_t k = base + lane_id; k < nelems; k += sgSize) {
const std::size_t lane_id = sg.get_local_id()[0];
for (std::size_t k = base + lane_id; k < nelems; k += sgSize) {
dst_p[k] = clip(x_p[k], min_p[k], max_p[k]);
}
}
Expand All @@ -175,7 +176,7 @@ template <typename T, int vec_sz, int n_vecs> class clip_contig_kernel;

typedef sycl::event (*clip_contig_impl_fn_ptr_t)(
sycl::queue &,
size_t,
std::size_t,
const char *,
const char *,
const char *,
Expand All @@ -184,7 +185,7 @@ typedef sycl::event (*clip_contig_impl_fn_ptr_t)(

template <typename T>
sycl::event clip_contig_impl(sycl::queue &q,
size_t nelems,
std::size_t nelems,
const char *x_cp,
const char *min_cp,
const char *max_cp,
Expand All @@ -199,10 +200,10 @@ sycl::event clip_contig_impl(sycl::queue &q,
sycl::event clip_ev = q.submit([&](sycl::handler &cgh) {
cgh.depends_on(depends);

size_t lws = 64;
std::size_t lws = 64;
constexpr std::uint8_t vec_sz = 4;
constexpr std::uint8_t n_vecs = 2;
const size_t n_groups =
const std::size_t n_groups =
((nelems + lws * n_vecs * vec_sz - 1) / (lws * n_vecs * vec_sz));
const auto gws_range = sycl::range<1>(n_groups * lws);
const auto lws_range = sycl::range<1>(lws);
Expand Down Expand Up @@ -258,7 +259,7 @@ template <typename T, typename IndexerT> class ClipStridedFunctor

void operator()(sycl::id<1> id) const
{
size_t gid = id[0];
std::size_t gid = id[0];
auto offsets = indexer(static_cast<ssize_t>(gid));
dst_p[offsets.get_fourth_offset()] = clip(
x_p[offsets.get_first_offset()], min_p[offsets.get_second_offset()],
Expand All @@ -270,7 +271,7 @@ template <typename T, typename IndexerT> class clip_strided_kernel;

typedef sycl::event (*clip_strided_impl_fn_ptr_t)(
sycl::queue &,
size_t,
std::size_t,
int,
const char *,
const char *,
Expand All @@ -285,7 +286,7 @@ typedef sycl::event (*clip_strided_impl_fn_ptr_t)(

template <typename T>
sycl::event clip_strided_impl(sycl::queue &q,
size_t nelems,
std::size_t nelems,
int nd,
const char *x_cp,
const char *min_cp,
Expand Down
17 changes: 9 additions & 8 deletions dpctl/tensor/libtensor/include/kernels/constructors.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "utils/strided_iters.hpp"
#include "utils/type_utils.hpp"
#include <complex>
#include <cstddef>
#include <sycl/sycl.hpp>

namespace dpctl
Expand Down Expand Up @@ -97,7 +98,7 @@ template <typename Ty> class LinearSequenceStepFunctor
*/
template <typename Ty>
sycl::event lin_space_step_impl(sycl::queue &exec_q,
size_t nelems,
std::size_t nelems,
Ty start_v,
Ty step_v,
char *array_data,
Expand All @@ -123,10 +124,10 @@ template <typename Ty, typename wTy> class LinearSequenceAffineFunctor
Ty *p = nullptr;
Ty start_v;
Ty end_v;
size_t n;
std::size_t n;

public:
LinearSequenceAffineFunctor(char *dst_p, Ty v0, Ty v1, size_t den)
LinearSequenceAffineFunctor(char *dst_p, Ty v0, Ty v1, std::size_t den)
: p(reinterpret_cast<Ty *>(dst_p)), start_v(v0), end_v(v1),
n((den == 0) ? 1 : den)
{
Expand Down Expand Up @@ -188,7 +189,7 @@ template <typename Ty, typename wTy> class LinearSequenceAffineFunctor
*/
template <typename Ty>
sycl::event lin_space_affine_impl(sycl::queue &exec_q,
size_t nelems,
std::size_t nelems,
Ty start_v,
Ty end_v,
bool include_endpoint,
Expand Down Expand Up @@ -238,7 +239,7 @@ sycl::event lin_space_affine_impl(sycl::queue &exec_q,
*/
template <typename dstTy>
sycl::event full_contig_impl(sycl::queue &q,
size_t nelems,
std::size_t nelems,
dstTy fill_v,
char *dst_p,
const std::vector<sycl::event> &depends)
Expand Down Expand Up @@ -294,7 +295,7 @@ template <typename Ty, typename IndexerT> class FullStridedFunctor
template <typename dstTy>
sycl::event full_strided_impl(sycl::queue &q,
int nd,
size_t nelems,
std::size_t nelems,
const ssize_t *shape_strides,
dstTy fill_v,
char *dst_p,
Expand All @@ -321,7 +322,7 @@ sycl::event full_strided_impl(sycl::queue &q,
/* ================ Eye ================== */

typedef sycl::event (*eye_fn_ptr_t)(sycl::queue &,
size_t nelems, // num_elements
std::size_t nelems, // num_elements
ssize_t start,
ssize_t end,
ssize_t step,
Expand Down Expand Up @@ -375,7 +376,7 @@ template <typename Ty> class EyeFunctor
*/
template <typename Ty>
sycl::event eye_impl(sycl::queue &exec_q,
size_t nelems,
std::size_t nelems,
const ssize_t start,
const ssize_t end,
const ssize_t step,
Expand Down
Loading

0 comments on commit dd53397

Please sign in to comment.