Skip to content

Commit

Permalink
Implement sumWithSIMD using xsimd
Browse files Browse the repository at this point in the history
  • Loading branch information
Vika-F committed Mar 4, 2025
1 parent 7cc1c9b commit 6f073c4
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 3 deletions.
4 changes: 4 additions & 0 deletions cpp/daal/src/data_management/finiteness_checker_avx2_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#ifndef __FINITENESS_CHECKER_AVX2_IMPL_I__
#define __FINITENESS_CHECKER_AVX2_IMPL_I__

#ifndef ONEDAL_XSIMD_ENABLED

/*
// Computes sum of the elements of input array of type `float` with AVX2 instructions.
*/
Expand Down Expand Up @@ -76,6 +78,8 @@ double sumWithSIMD<double, avx2>(size_t n, const double * dataPtr)
return sum;
}

#endif // ONEDAL_XSIMD_ENABLED

template <>
float computeSum<float, avx2>(size_t nDataPtrs, size_t nElementsPerPtr, const float ** dataPtrs)
{
Expand Down
4 changes: 4 additions & 0 deletions cpp/daal/src/data_management/finiteness_checker_avx512_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#ifndef __FINITENESS_CHECKER_AVX512_IMPL_I__
#define __FINITENESS_CHECKER_AVX512_IMPL_I__

#ifndef ONEDAL_XSIMD_ENABLED

/*
// Computes sum of the elements of input array of type `float` with AVX512 instructions.
*/
Expand Down Expand Up @@ -60,6 +62,8 @@ double sumWithSIMD<double, avx512>(size_t n, const double * dataPtr)
return sum;
}

#endif // ONEDAL_XSIMD_ENABLED

template <>
float computeSum<float, avx512>(size_t nDataPtrs, size_t nElementsPerPtr, const float ** dataPtrs)
{
Expand Down
58 changes: 58 additions & 0 deletions cpp/daal/src/data_management/finiteness_checker_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@

#include "finiteness_checker_impl.i"

#ifdef ONEDAL_XSIMD_ENABLED

namespace xs = xsimd;

#endif

namespace daal
{
namespace data_management
Expand Down Expand Up @@ -58,6 +64,57 @@ DataType getInf()
template <typename DataType, daal::CpuType cpu>
DataType sumWithSIMD(size_t n, const DataType * dataPtr);

#ifdef ONEDAL_XSIMD_ENABLED

/*
// Computes sum of the elements of input array using XSIMD.
//
// @tparam DataType Data type of the input array
// @tparam XSIMDArch XSIMD CPU architecture
//
// @param[in] n Number of elements in the input array
// @param[in] dataPtr Pointer to the input array
//
// @return Sum of the elements of the input array
*/
template <typename DataType, typename XSIMDArch>
DataType sumWithXSIMD(size_t n, const DataType * dataPtr)
{
constexpr size_t nPerInstr = xs::batch<DataType, XSIMDArch>::size;
DataType sum;

xs::batch<DataType, XSIMDArch> xs_sums(0.0);
const DataType * curDataPtr = dataPtr;
const size_t iEnd = n / nPerInstr;
for (size_t i = 0; i < iEnd; i++, curDataPtr += nPerInstr)
{
xs::batch<DataType, XSIMDArch> xs_data = xs::load_unaligned(curDataPtr);
xs_sums += xs_data;
}
sum = xs::reduce_add(xs_sums);

for (size_t i = iEnd * nPerInstr; i < n; ++i) sum += dataPtr[i];

return sum;
}


#if (__CPUID__(DAAL_CPU) != __sse2__)

template <>
float sumWithSIMD<float, DAAL_CPU>(size_t n, const float * dataPtr) {
return sumWithXSIMD<float, ONEDAL_XSIMD_ARCH>(n, dataPtr);
}

template <>
double sumWithSIMD<double, DAAL_CPU>(size_t n, const double * dataPtr) {
return sumWithXSIMD<double, ONEDAL_XSIMD_ARCH>(n, dataPtr);
}

#endif

#endif

/*
// Computes multi-threaded sum of a numeric table via summation using SIMD calls
*/
Expand Down Expand Up @@ -266,6 +323,7 @@ bool checkFinitenessSOASIMD(NumericTable & table, bool allowNaN, services::Statu

return valuesAreFinite;
}

#if (__CPUID__(DAAL_CPU) == __avx512__)

#include "finiteness_checker_avx512_impl.i"
Expand Down
4 changes: 4 additions & 0 deletions cpp/daal/src/data_management/finiteness_checker_sve_impl.i
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

#include <arm_sve.h>

#ifndef ONEDAL_XSIMD_ENABLED

/*
// Computes sum of the elements of input array of type `float` with sve instructions.
*/
Expand Down Expand Up @@ -80,6 +82,8 @@ double sumWithSIMD<double, sve>(size_t n, const double * dataPtr)
return sum;
}

#endif // ONEDAL_XSIMD_ENABLED

template <>
float computeSum<float, sve>(size_t nDataPtrs, size_t nElementsPerPtr, const float ** dataPtrs)
{
Expand Down
38 changes: 38 additions & 0 deletions cpp/daal/src/services/service_defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@
#include <stdint.h>
#include "services/env_detect.h"


#ifdef ONEDAL_XSIMD_ENABLED

#include "xsimd/xsimd.hpp"

#endif

DAAL_EXPORT int __daal_serv_cpu_detect(int);

void run_cpuid(uint32_t eax, uint32_t ecx, uint32_t * abcd);
Expand Down Expand Up @@ -155,6 +162,37 @@ enum DataFormat
#define __CPUID__(cpu) __GLUE__(CPU_, cpu)
#define __FPTYPE__(type) __GLUE__(FPTYPE_, type)


#ifdef ONEDAL_XSIMD_ENABLED

#if (__CPUID__(DAAL_CPU) == __avx512__)

#define ONEDAL_XSIMD_ARCH xsimd::avx512bw

#elif (__CPUID__(DAAL_CPU) == __avx2__)

#define ONEDAL_XSIMD_ARCH xsimd::fma3<xsimd::avx2>

#elif (__CPUID__(DAAL_CPU) == __sse42__)

#define ONEDAL_XSIMD_ARCH xsimd::sse4_2

#elif (__CPUID__(DAAL_CPU) == __sse2__)

#define ONEDAL_XSIMD_ARCH xsimd::sse2

#elif (__CPUID__(DAAL_CPU) == __sve__)

#if (__FPTYPE__(DAAL_DATA_TYPE) == __float__)
#define ONEDAL_XSIMD_ARCH xsimd::sve<16>
#else
#define ONEDAL_XSIMD_ARCH xsimd::sve<8>
#endif

#endif // __CPUID__(DAAL_CPU)

#endif

/*
// Set of macro definitions
// for FP values bit fields easy access
Expand Down
13 changes: 10 additions & 3 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,13 @@ ifeq ($(REQPROFILE), yes)
VTUNESDK.LIBS_A := $(if $(OS_is_lnx), $(VTUNESDK.libia)/libittnotify.a,)
endif

#================================= XSIMD folders ========================================

ifeq ($(REQXSIMD), yes)
-DXSIMD_ENABLED := -DONEDAL_XSIMD_ENABLED
XSIMD.include := $(XSIMD_DIR)/include
endif

#===============================================================================
# Release library names
#===============================================================================
Expand Down Expand Up @@ -447,7 +454,7 @@ CORE.srcdirs := $(CORE.SERV.srcdir) $(CORE.srcdir) \
$(CPPDIR.daal)/src/data_management

CORE.incdirs.common := $(RELEASEDIR.include) $(CPPDIR.daal) $(WORKDIR)
CORE.incdirs.thirdp := $(daaldep.math_backend.incdir) $(VTUNESDK.include) $(TBBDIR.include)
CORE.incdirs.thirdp := $(daaldep.math_backend.incdir) $(VTUNESDK.include) $(TBBDIR.include) $(XSIMD.include)
CORE.incdirs := $(CORE.incdirs.common) $(CORE.incdirs.thirdp)

$(info CORE.incdirs: $(CORE.incdirs))
Expand Down Expand Up @@ -505,7 +512,7 @@ $(WORKDIR.lib)/$(core_y): $(daaldep.math_backend.ext) $(VTUNES
$(CORE.tmpdir_y)/$(core_y:%.$y=%_link.txt) ; $(LINK.DYNAMIC) ; $(LINK.DYNAMIC.POST)

$(CORE.objs_a): $(CORE.tmpdir_a)/inc_a_folders.txt
$(CORE.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(-DPROFILER)
$(CORE.objs_a): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(-DPROFILER) $(-DXSIMD_ENABLED)
$(CORE.objs_a): COPT += -D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \
-DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \
$(if $(CHECK_DLL_SIG),-DDAAL_CHECK_DLL_SIG)
Expand All @@ -514,7 +521,7 @@ $(CORE.objs_a): COPT += @$(CORE.tmpdir_a)/inc_a_folders.txt
$(eval $(call append_uarch_copt,$(CORE.objs_a)))

$(CORE.objs_y): $(CORE.tmpdir_y)/inc_y_folders.txt
$(CORE.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(-DPROFILER)
$(CORE.objs_y): COPT += $(-fPIC) $(-cxx17) $(-Zl) $(-DEBC) $(-DMKL_ILP64) $(-DPROFILER) $(-DXSIMD_ENABLED)
$(CORE.objs_y): COPT += -D__DAAL_IMPLEMENTATION \
-D__TBB_NO_IMPLICIT_LINKAGE -DDAAL_NOTHROW_EXCEPTIONS \
-DDAAL_HIDE_DEPRECATED -DTBB_USE_ASSERT=0 -D_ENABLE_ATOMIC_ALIGNMENT_FIX \
Expand Down

0 comments on commit 6f073c4

Please sign in to comment.