Skip to content

Commit

Permalink
[enhancement] SVE Implementation of sumWithSIMD (#3079)
Browse files Browse the repository at this point in the history
* sve implementation of sumWithSIMD

* compile fix

* add missing file

* clang-fix

* Update cpp/daal/src/data_management/finiteness_checker_sve_impl.i

Co-authored-by: Victoriya Fedotova <viktoria.nn@gmail.com>

* Update cpp/daal/src/data_management/finiteness_checker_sve_impl.i

Co-authored-by: Victoriya Fedotova <viktoria.nn@gmail.com>

* Update cpp/daal/src/data_management/finiteness_checker_sve_impl.i

Co-authored-by: Victoriya Fedotova <viktoria.nn@gmail.com>

---------

Co-authored-by: Victoriya Fedotova <viktoria.nn@gmail.com>
  • Loading branch information
rakshithgb-fujitsu and Vika-F authored Feb 28, 2025
1 parent 7884821 commit 7cc1c9b
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 3 deletions.
17 changes: 14 additions & 3 deletions cpp/daal/src/data_management/finiteness_checker_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ namespace internal
{
using namespace daal::internal;

#if defined(DAAL_INTEL_CPP_COMPILER)
#if defined(DAAL_INTEL_CPP_COMPILER) || (__CPUID__(DAAL_CPU) == __sve__)

const size_t BLOCK_SIZE = 8192;
const size_t THREADING_BORDER = 262144;
Expand All @@ -51,7 +51,7 @@ DataType getInf()
return inf;
}

// These functions are used for both AVX2 and AVX512
// These functions are used for AVX2, AVX512 and SVE
// and are therefore outside of their separate
// implementations

Expand Down Expand Up @@ -84,6 +84,7 @@ DataType computeSumSIMD(size_t nDataPtrs, size_t nElementsPerPtr, const DataType
size_t end = blockIdxInPtr == nBlocksPerPtr - 1 ? start + nPerBlock + nSurplus : start + nPerBlock;

//sumWithSIMD defined for AVX2 and AVX512 in finiteness_checker_avx2_impl.i and finiteness_checker_avx512_impl.i
//sumWithSIMD defined for SVE in finiteness_checker_sve_impl.i
pSums[iBlock] = sumWithSIMD<DataType, cpu>(end - start, dataPtrs[ptrIdx] + start);
});

Expand Down Expand Up @@ -161,6 +162,17 @@ double computeSumSOASIMD(NumericTable & table, bool & sumIsFinite, services::Sta

return sum;
}
#if defined(TARGET_ARM)
#if (__CPUID__(DAAL_CPU) == __sve__)

#include "finiteness_checker_sve_impl.i"

#endif // __CPUID__(DAAL_CPU) == __sve__
#endif

#endif

#if defined(DAAL_INTEL_CPP_COMPILER)

template <daal::CpuType cpu>
services::Status checkFinitenessInBlocks(const float ** dataPtrs, bool inParallel, size_t nTotalBlocks, size_t nBlocksPerPtr, size_t nPerBlock,
Expand Down Expand Up @@ -254,7 +266,6 @@ bool checkFinitenessSOASIMD(NumericTable & table, bool allowNaN, services::Statu

return valuesAreFinite;
}

#if (__CPUID__(DAAL_CPU) == __avx512__)

#include "finiteness_checker_avx512_impl.i"
Expand Down
106 changes: 106 additions & 0 deletions cpp/daal/src/data_management/finiteness_checker_sve_impl.i
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*******************************************************************************
* Copyright contributors to the oneDAL project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
/*
* Contains SVE optimizations for sumWithSIMD algorithm.
*/

#ifndef __FINITENESS_CHECKER_SVE_IMPL_I__
#define __FINITENESS_CHECKER_SVE_IMPL_I__

#include <arm_sve.h>

/*
// Computes sum of the elements of input array of type `float` with sve instructions.
*/
template <>
float sumWithSIMD<float, sve>(size_t n, const float * dataPtr)
{
float sum = 0.0f;
svfloat32_t sums = svdup_f32(0.0f); // Vector register initialized to zero

// Pointer to the data
const float * ptr = dataPtr;
size_t i = 0;

// Single loop that handles both full and remainder elements
svbool_t pg = svwhilelt_b32(i, n);
while (svptest_any(svptrue_b32(), pg))
{ // Check if there's any active lane
svfloat32_t data = svld1_f32(pg, &ptr[i]); // Load elements
sums = svadd_f32_x(pg, sums, data); // Vector sum
i += svcntw(); // Advance by number of elements processed
pg = svwhilelt_b32(i, n); // Update predicate for next iteration
}

// Horizontal sum
sum = svaddv_f32(svptrue_b32(), sums);

return sum;
}

/*
// Computes sum of the elements of input array of type `double` with sve instructions.
*/
template <>
double sumWithSIMD<double, sve>(size_t n, const double * dataPtr)
{
double sum = 0.0;
svfloat64_t sums = svdup_f64(0.0); // Vector register initialized to zero

// Pointer to the data
const double * ptr = dataPtr;
size_t i = 0;

// Single loop that handles both full and remainder elements
svbool_t pg = svwhilelt_b64(i, n);
while (svptest_any(svptrue_b64(), pg))
{ // Check if there's any active lane
svfloat64_t data = svld1_f64(pg, &ptr[i]); // Load elements
sums = svadd_f64_x(pg, sums, data); // Vector sum
i += svcntd(); // Advance by number of elements processed
pg = svwhilelt_b64(i, n); // Update predicate for next iteration
}

// Horizontal sum
sum = svaddv_f64(svptrue_b64(), sums);

return sum;
}

template <>
float computeSum<float, sve>(size_t nDataPtrs, size_t nElementsPerPtr, const float ** dataPtrs)
{
// computeSumSIMD defined in finiteness_checker_cpu.cpp
return computeSumSIMD<float, sve>(nDataPtrs, nElementsPerPtr, dataPtrs);
}

template <>
double computeSum<double, sve>(size_t nDataPtrs, size_t nElementsPerPtr, const double ** dataPtrs)
{
// computeSumSIMD defined in finiteness_checker_cpu.cpp
return computeSumSIMD<double, sve>(nDataPtrs, nElementsPerPtr, dataPtrs);
}

template <>
double computeSumSOA<sve>(NumericTable & table, bool & sumIsFinite, services::Status & st)
{
// computeSumSOASIMD defined in finiteness_checker_cpu.cpp
return computeSumSOASIMD<sve>(table, sumIsFinite, st);
}

//TODO: Implement checkFinitenessInBlocks()

#endif // __FINITENESS_CHECKER_SVE_IMPL_I__

0 comments on commit 7cc1c9b

Please sign in to comment.