From 38d74fd078afe2a8ba50365219bed20a1012a92a Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 4 Mar 2026 10:50:52 +0000 Subject: [PATCH] fix: Lack of epsilon aware comparison in NETopKV for Fp32 data type. Align NETopKV FP32 comparison semantics with the scalar reference Resolves COMPMID-8829 Change-Id: I995a07e0f38587b69b0ac08ee2c85949704f0e60 Signed-off-by: Pablo Marquez Tello --- src/cpu/kernels/topkv/generic/neon/fp32.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/cpu/kernels/topkv/generic/neon/fp32.cpp b/src/cpu/kernels/topkv/generic/neon/fp32.cpp index afe49f45b0..d11fa928e3 100644 --- a/src/cpu/kernels/topkv/generic/neon/fp32.cpp +++ b/src/cpu/kernels/topkv/generic/neon/fp32.cpp @@ -26,6 +26,7 @@ #include "src/cpu/kernels/topkv/generic/neon/impl.h" #include +#include namespace arm_compute { @@ -44,21 +45,24 @@ static inline uint32_t reduce_u32x4(uint32x4_t v) #endif } -// Explicit specialization for float: may use float32x4_t etc (only in this TU) +// Explicit specialization for float: may use float32x4_t template <> uint32_t count_gt_block(const float *ptr, float threshold) { using Tag = wrapper::traits::neon_bitvector_tag_t; - const auto thr_vec = wrapper::vdup_n(threshold, Tag{}); - const auto v = wrapper::vloadq(ptr); - const auto mask = wrapper::vcgt(v, thr_vec); // underlying uint32x4_t + const auto v = wrapper::vloadq(ptr); + + // epsilon-aware compare: treat a > b only when (a - b) > epsilon + const float eps_val = std::numeric_limits::epsilon(); + const float thr_with_eps = threshold + eps_val; + const auto thr_eps_vec = wrapper::vdup_n(thr_with_eps, Tag{}); + const auto mask = wrapper::vcgt(v, thr_eps_vec); // new: v > (threshold + eps) const uint32x4_t m = mask; const uint32x4_t b = vshrq_n_u32(m, 31); return reduce_u32x4(b); } - } // namespace detail void topkv_fp32_neon(const ITensor *predictions, const ITensor *targets, ITensor *out, uint32_t k, const Window &win)