|
| 1 | +/*************************************************************************** |
| 2 | + * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * |
| 3 | + * Martin Renou * |
| 4 | + * Copyright (c) QuantStack * |
| 5 | + * Copyright (c) Serge Guelton * |
| 6 | + * * |
| 7 | + * Distributed under the terms of the BSD 3-Clause License. * |
| 8 | + * * |
| 9 | + * The full license is in the file LICENSE, distributed with this software. * |
| 10 | + ****************************************************************************/ |
| 11 | + |
| 12 | +#ifndef XSIMD_ALGORITHMS_REDUCE_HPP |
| 13 | +#define XSIMD_ALGORITHMS_REDUCE_HPP |
| 14 | + |
| 15 | +#include <array> |
| 16 | +#include <cstddef> |
| 17 | +#include <iterator> |
| 18 | +#include <type_traits> |
| 19 | + |
| 20 | +#include "xsimd/xsimd.hpp" |
| 21 | + |
| 22 | +namespace xsimd |
| 23 | +{ |
| 24 | + // TODO: Remove this once we drop C++11 support |
| 25 | + namespace detail |
| 26 | + { |
| 27 | + struct plus |
| 28 | + { |
| 29 | + template <class X, class Y> |
| 30 | + auto operator()(X&& x, Y&& y) noexcept -> decltype(x + y) { return x + y; } |
| 31 | + }; |
| 32 | + } |
| 33 | + |
| 34 | + template <class Arch = default_arch, class Iterator1, class Iterator2, class Init, class BinaryFunction = detail::plus> |
| 35 | + Init reduce(Iterator1 first, Iterator2 last, Init init, BinaryFunction&& binfun = detail::plus {}) noexcept |
| 36 | + { |
| 37 | + using value_type = typename std::decay<decltype(*first)>::type; |
| 38 | + using batch_type = batch<value_type, Arch>; |
| 39 | + |
| 40 | + std::size_t size = static_cast<std::size_t>(std::distance(first, last)); |
| 41 | + constexpr std::size_t simd_size = batch_type::size; |
| 42 | + |
| 43 | + if (size < simd_size) |
| 44 | + { |
| 45 | + while (first != last) |
| 46 | + { |
| 47 | + init = binfun(init, *first++); |
| 48 | + } |
| 49 | + return init; |
| 50 | + } |
| 51 | + |
| 52 | + const auto* const ptr_begin = &(*first); |
| 53 | + |
| 54 | + std::size_t align_begin = xsimd::get_alignment_offset(ptr_begin, size, simd_size); |
| 55 | + std::size_t align_end = align_begin + ((size - align_begin) & ~(simd_size - 1)); |
| 56 | + |
| 57 | + // reduce initial unaligned part |
| 58 | + for (std::size_t i = 0; i < align_begin; ++i) |
| 59 | + { |
| 60 | + init = binfun(init, first[i]); |
| 61 | + } |
| 62 | + |
| 63 | + // reduce aligned part |
| 64 | + auto ptr = ptr_begin + align_begin; |
| 65 | + batch_type batch_init = batch_type::load_aligned(ptr); |
| 66 | + ptr += simd_size; |
| 67 | + for (auto const end = ptr_begin + align_end; ptr < end; ptr += simd_size) |
| 68 | + { |
| 69 | + batch_type batch = batch_type::load_aligned(ptr); |
| 70 | + batch_init = binfun(batch_init, batch); |
| 71 | + } |
| 72 | + |
| 73 | + // reduce across batch |
| 74 | + alignas(batch_type) std::array<value_type, simd_size> arr; |
| 75 | + xsimd::store_aligned(arr.data(), batch_init); |
| 76 | + for (auto x : arr) |
| 77 | + init = binfun(init, x); |
| 78 | + |
| 79 | + // reduce final unaligned part |
| 80 | + for (std::size_t i = align_end; i < size; ++i) |
| 81 | + { |
| 82 | + init = binfun(init, first[i]); |
| 83 | + } |
| 84 | + |
| 85 | + return init; |
| 86 | + } |
| 87 | + |
| 88 | +} |
| 89 | + |
| 90 | +#endif |
0 commit comments