Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partially support data parallel for_loop #6608

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,31 @@ namespace hpx::parallel::util::detail {
}
};

template <typename I>
struct datapar_loop_step<I, std::enable_if_t<std::is_integral_v<I>>>
{
using V1 = traits::vector_pack_type_t<I, 1>;
using V = traits::vector_pack_type_t<I>;

template <typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void call1(F&& f, I& i)
{
V1 tmp(i);
HPX_INVOKE(f, tmp);
++i;
}

template <typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr void callv(F&& f, I& i)
{
V tmp;
for (std::size_t e = 0; e != traits::size(tmp); ++e)
traits::set(tmp, e, static_cast<I>(i + e));
HPX_INVOKE(f, tmp);
i += traits::vector_pack_size_v<V>;
}
};

///////////////////////////////////////////////////////////////////////////
template <typename Iter, typename Enable = void>
struct datapar_loop_pred_step
Expand Down
77 changes: 67 additions & 10 deletions libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2007-2023 Hartmut Kaiser
// Copyright (c) 2007-2025 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
Expand All @@ -15,6 +15,7 @@
#include <hpx/execution/traits/vector_pack_load_store.hpp>
#include <hpx/execution/traits/vector_pack_type.hpp>
#include <hpx/executors/datapar/execution_policy.hpp>
#include <hpx/iterator_support/traits/is_iterator.hpp>
#include <hpx/parallel/datapar/iterator_helpers.hpp>
#include <hpx/parallel/util/loop.hpp>

Expand Down Expand Up @@ -238,8 +239,12 @@ namespace hpx::parallel::util {
};

///////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Enable = void>
struct datapar_loop_n;

template <typename Iterator>
struct datapar_loop_n
struct datapar_loop_n<Iterator,
std::enable_if_t<hpx::traits::is_iterator_v<Iterator>>>
{
using iterator_type = std::decay_t<Iterator>;
using value_type =
Expand All @@ -258,8 +263,9 @@ namespace hpx::parallel::util {
{
std::size_t len = count;

// clang-format off
for (/* */; !detail::is_data_aligned(first) && len != 0;
--len)
--len)
{
datapar_loop_step<InIter>::call1(f, first);
}
Expand All @@ -268,16 +274,18 @@ namespace hpx::parallel::util {

for (auto len_v =
static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step<InIter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
datapar_loop_step<InIter>::call1(f, first);
}

return first;
}
else
Expand All @@ -302,6 +310,51 @@ namespace hpx::parallel::util {
}
};

template <typename I>
struct datapar_loop_n<I, std::enable_if_t<std::is_integral_v<I>>>
{
using V = traits::vector_pack_type_t<I>;

template <typename Iter, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
Iter first, std::size_t count, F&& f)
{
std::size_t len = count;
constexpr std::size_t size = traits::vector_pack_size_v<V>;

for (size_t i = first % size; i != 0 && len != 0; --i, --len)
{
datapar_loop_step<Iter>::call1(f, first);
}

// clang-format off
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step<Iter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
datapar_loop_step<Iter>::call1(f, first);
}
return first;
}

template <typename Iter, typename CancelToken, typename F>
HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Iter call(
Iter first, std::size_t count, CancelToken& tok, F&& f)
{
// check at the start of a partition only
if (tok.was_cancelled())
return first;

return call(first, count, HPX_FORWARD(F, f));
}
};

///////////////////////////////////////////////////////////////////////
template <typename Iterator>
struct datapar_loop_n_ind
Expand All @@ -323,8 +376,9 @@ namespace hpx::parallel::util {
{
std::size_t len = count;

// clang-format off
for (/* */; !detail::is_data_aligned(first) && len != 0;
--len)
--len)
{
datapar_loop_step_ind<InIter>::call1(f, first);
}
Expand All @@ -333,11 +387,12 @@ namespace hpx::parallel::util {

for (auto len_v =
static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_step_ind<InIter>::callv(f, first);
}
// clang-format on

for (/* */; len != 0; --len)
{
Expand Down Expand Up @@ -381,14 +436,16 @@ namespace hpx::parallel::util {

constexpr std::size_t size = traits::vector_pack_size_v<V>;

// clang-format off
for (auto len_v = static_cast<std::int64_t>(len - (size + 1));
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
len_v > 0;
len_v -= static_cast<std::int64_t>(size), len -= size)
{
datapar_loop_idx_step<Iter>::callv(f, it, base_idx);
std::advance(it, size);
base_idx += size;
}
// clang-format on

for (/* */; len != 0; --len)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ if(HPX_WITH_DATAPAR)
foreach_datapar
foreach_datapar_zipiter
foreachn_datapar
for_loop_datapar
generate_datapar
generaten_datapar
mismatch_binary_datapar
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (c) 2016-2025 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/algorithm.hpp>
#include <hpx/datapar.hpp>
#include <hpx/init.hpp>
#include <hpx/modules/testing.hpp>

#include <algorithm>
#include <cstddef>
#include <iostream>
#include <numeric>
#include <random>
#include <string>
#include <utility>
#include <vector>

///////////////////////////////////////////////////////////////////////////////
unsigned int seed = std::random_device{}();
std::mt19937 gen(seed);

///////////////////////////////////////////////////////////////////////////////
template <typename ExPolicy>
void test_for_loop_idx(ExPolicy&& policy)
{
static_assert(hpx::is_execution_policy_v<ExPolicy>,
"hpx::is_execution_policy_v<ExPolicy>");

std::vector<std::size_t> c(10007);
std::iota(std::begin(c), std::end(c), gen());

hpx::experimental::for_loop(
std::forward<ExPolicy>(policy), 0, int(c.size()), [&c](auto i) {
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
c[hpx::parallel::traits::get(i, e)] = 42;
});

// verify values
std::size_t count = 0;
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
HPX_TEST_EQ(v, std::size_t(42));
++count;
});
HPX_TEST_EQ(count, c.size());
}

template <typename ExPolicy>
void test_for_loop_idx_async(ExPolicy&& p)
{
std::vector<std::size_t> c(10007);
std::iota(std::begin(c), std::end(c), gen());

auto f = hpx::experimental::for_loop(
std::forward<ExPolicy>(p), 0, int(c.size()), [&c](auto i) {
for (std::size_t e = 0; e < hpx::parallel::traits::size(i); ++e)
c[hpx::parallel::traits::get(i, e)] = 42;
});
f.wait();

// verify values
std::size_t count = 0;
std::for_each(std::begin(c), std::end(c), [&count](std::size_t v) -> void {
HPX_TEST_EQ(v, std::size_t(42));
++count;
});
HPX_TEST_EQ(count, c.size());
}

void for_loop_test_idx()
{
using namespace hpx::execution;

test_for_loop_idx(simd);
test_for_loop_idx(par_simd);

test_for_loop_idx_async(simd(task));
test_for_loop_idx_async(par_simd(task));
}

///////////////////////////////////////////////////////////////////////////////
int hpx_main(hpx::program_options::variables_map& vm)
{
if (vm.count("seed"))
seed = vm["seed"].as<unsigned int>();

std::cout << "using seed: " << seed << std::endl;
gen.seed(seed);

for_loop_test_idx();

return hpx::local::finalize();
}

int main(int argc, char* argv[])
{
// add command line option which controls the random number generator seed
using namespace hpx::program_options;
options_description desc_commandline(
"Usage: " HPX_APPLICATION_STRING " [options]");

desc_commandline.add_options()("seed,s", value<unsigned int>(),
"the random number generator seed to use for this run");

// By default this test should run on all available cores
std::vector<std::string> const cfg = {"hpx.os_threads=all"};

// Initialize and run HPX
hpx::local::init_params init_args;
init_args.desc_cmdline = desc_commandline;
init_args.cfg = cfg;

HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0,
"HPX main exited with non-zero status");

return hpx::util::report_errors();
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,27 @@
#include <hpx/config.hpp>

#if defined(HPX_HAVE_DATAPAR_EVE)
#include <hpx/concepts/concepts.hpp>
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>

#include <cstddef>

namespace hpx::parallel::traits {

///////////////////////////////////////////////////////////////////////
template <typename Vector>
template <typename Vector,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
Vector& vec, std::size_t index) noexcept
{
return vec.get(index);
}

///////////////////////////////////////////////////////////////////////
template <typename Vector, typename T>
template <typename Vector, typename T,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
Vector& vec, std::size_t index, T val) noexcept
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@

#if defined(HPX_HAVE_DATAPAR_EXPERIMENTAL_SIMD)

#include <hpx/concepts/concepts.hpp>
#include <hpx/execution/traits/detail/simd/vector_pack_simd.hpp>
#include <hpx/execution/traits/vector_pack_alignment_size.hpp>

#include <cstddef>

namespace hpx::parallel::traits {

///////////////////////////////////////////////////////////////////////
template <typename Vector>
template <typename Vector,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto get(
Vector& vec, std::size_t index) noexcept
{
return vec[index];
}

///////////////////////////////////////////////////////////////////////
template <typename Vector, typename T>
template <typename Vector, typename T,
HPX_CONCEPT_REQUIRES_(
is_vector_pack_v<Vector> || is_scalar_vector_pack_v<Vector>)>
HPX_HOST_DEVICE HPX_FORCEINLINE auto set(
Vector& vec, std::size_t index, T val) noexcept
{
Expand Down
Loading
Loading