Added Vector max abs SIMD function

master
Xavier Arteaga 7 years ago
parent e18ba937dc
commit f01f7b4945

@ -500,6 +500,25 @@ static inline simd_f_t srslte_simd_f_neg_mask(simd_f_t a, simd_f_t mask) {
#endif /* LV_HAVE_AVX512 */ #endif /* LV_HAVE_AVX512 */
} }
static inline simd_f_t srslte_simd_f_abs(simd_f_t a) {
#ifdef LV_HAVE_AVX512
return _mm512_andnot_ps(_mm512_set1_ps(-0.0f), a);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_andnot_ps(_mm256_set1_ps(-0.0f), a);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_andnot_ps(_mm_set1_ps(-0.0f), a);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return vqabsq_s32(a);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
#endif /* SRSLTE_SIMD_F_SIZE */ #endif /* SRSLTE_SIMD_F_SIZE */

@ -136,6 +136,8 @@ SRSLTE_API void srslte_vec_apply_cfo_simd(const cf_t *x, float cfo, cf_t *z, int
/* SIMD Find Max functions */ /* SIMD Find Max functions */
SRSLTE_API uint32_t srslte_vec_max_fi_simd(const float *x, const int len); SRSLTE_API uint32_t srslte_vec_max_fi_simd(const float *x, const int len);
SRSLTE_API uint32_t srslte_vec_max_abs_fi_simd(const float *x, const int len);
SRSLTE_API uint32_t srslte_vec_max_ci_simd(const cf_t *x, const int len); SRSLTE_API uint32_t srslte_vec_max_ci_simd(const cf_t *x, const int len);
#ifdef __cplusplus #ifdef __cplusplus

@ -48,8 +48,7 @@ bool verbose = false;
#define MAX_FUNCTIONS (64) #define MAX_FUNCTIONS (64)
#define MAX_BLOCKS (16) #define MAX_BLOCKS (16)
#define RANDOM_F() (((float) rand()) / ((float) RAND_MAX) * 2.0f - 1.0f)
#define RANDOM_F() ((float)rand())/((float)RAND_MAX)
#define RANDOM_S() ((int16_t)(rand() & 0x800F)) #define RANDOM_S() ((int16_t)(rand() & 0x800F))
#define RANDOM_B() ((int8_t)(rand() & 0x8008)) #define RANDOM_B() ((int8_t)(rand() & 0x8008))
#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) #define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F())
@ -705,6 +704,29 @@ TEST(srslte_vec_max_fi,
free(x); free(x);
) )
TEST(srslte_vec_max_abs_fi,
MALLOC(float, x);
for (int i = 0; i < block_size; i++) {
x[i] = RANDOM_F();
}
uint32_t max_index = 0;
TEST_CALL(max_index = srslte_vec_max_abs_fi(x, block_size);)
float gold_value = -INFINITY;
uint32_t gold_index = 0;
for (int i = 0; i < block_size; i++) {
if (gold_value < fabsf(x[i])) {
gold_value = fabsf(x[i]);
gold_index = i;
}
}
mse = (gold_index != max_index) ? 1:0;
free(x);
)
TEST(srslte_vec_max_abs_ci, TEST(srslte_vec_max_abs_ci,
MALLOC(cf_t, x); MALLOC(cf_t, x);
@ -899,6 +921,9 @@ int main(int argc, char **argv) {
passed[func_count][size_count] = test_srslte_vec_max_fi(func_names[func_count], &timmings[func_count][size_count], block_size); passed[func_count][size_count] = test_srslte_vec_max_fi(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++; func_count++;
passed[func_count][size_count] = test_srslte_vec_max_abs_fi(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size); passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++; func_count++;

@ -369,7 +369,7 @@ uint32_t srslte_vec_max_fi(const float *x, const uint32_t len) {
} }
uint32_t srslte_vec_max_abs_fi(const float *x, const uint32_t len) { uint32_t srslte_vec_max_abs_fi(const float *x, const uint32_t len) {
return srslte_vec_max_fi_simd(x, len); return srslte_vec_max_abs_fi_simd(x, len);
} }
// CP autocorr // CP autocorr

@ -1092,6 +1092,63 @@ uint32_t srslte_vec_max_fi_simd(const float *x, const int len) {
return max_index; return max_index;
} }
uint32_t srslte_vec_max_abs_fi_simd(const float *x, const int len) {
int i = 0;
float max_value = -INFINITY;
uint32_t max_index = 0;
#if SRSLTE_SIMD_I_SIZE
__attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(int)))) int indexes_buffer[SRSLTE_SIMD_I_SIZE] = {0};
__attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(float)))) float values_buffer[SRSLTE_SIMD_I_SIZE] = {0};
for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) indexes_buffer[k] = k;
simd_i_t simd_inc = srslte_simd_i_set1(SRSLTE_SIMD_I_SIZE);
simd_i_t simd_indexes = srslte_simd_i_load(indexes_buffer);
simd_i_t simd_max_indexes = srslte_simd_i_set1(0);
simd_f_t simd_max_values = srslte_simd_f_set1(-INFINITY);
if (SRSLTE_IS_ALIGNED(x)) {
for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) {
simd_f_t a = srslte_simd_f_abs(srslte_simd_f_load(&x[i]));
simd_sel_t res = srslte_simd_f_max(a, simd_max_values);
simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res);
simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) a, res);
simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc);
}
} else {
for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) {
simd_f_t a = srslte_simd_f_abs(srslte_simd_f_loadu(&x[i]));
simd_sel_t res = srslte_simd_f_max(a, simd_max_values);
simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res);
simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) a, res);
simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc);
}
}
srslte_simd_i_store(indexes_buffer, simd_max_indexes);
srslte_simd_f_store(values_buffer, simd_max_values);
for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) {
if (values_buffer[k] > max_value) {
max_value = values_buffer[k];
max_index = (uint32_t) indexes_buffer[k];
}
}
#endif /* SRSLTE_SIMD_I_SIZE */
for (; i < len; i++) {
float a = fabsf(x[i]);
if (a > max_value) {
max_value = a;
max_index = (uint32_t)i;
}
}
return max_index;
}
uint32_t srslte_vec_max_ci_simd(const cf_t *x, const int len) { uint32_t srslte_vec_max_ci_simd(const cf_t *x, const int len) {
int i = 0; int i = 0;

Loading…
Cancel
Save