Renamed module algebra (it is now named mat) and moved usefl math SIMD macros to simd.h

master
Xavier Arteaga 7 years ago
parent 0947173fc1
commit d933f1b817

@ -1,161 +0,0 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#ifndef SRSLTE_ALGEBRA_H
#define SRSLTE_ALGEBRA_H
#include "srslte/config.h"
/*
* Generic Macros
*/
#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX))
/*
* SSE Macros
*/
#ifdef LV_HAVE_SSE
#define _MM_SWAP(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,3,0,1)))
#define _MM_PERM(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,1,3,0)))
#define _MM_MULJ_PS(X) _MM_SWAP(_MM_CONJ_PS(X))
#define _MM_CONJ_PS(X) (_mm_xor_ps(X, _mm_set_ps(-0.0f, 0.0f, -0.0f, 0.0f)))
#define _MM_SQMOD_PS(X) _MM_PERM(_mm_hadd_ps(_mm_mul_ps(X,X), _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)))
#define _MM_PROD_PS(a, b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(\
_mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b)))
#endif /* LV_HAVE_SSE */
/*
* AVX Macros
*/
#ifdef LV_HAVE_AVX
#define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001)
#define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f)))
#ifdef LV_HAVE_FMA
#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_mul_ps(B,B)), \
_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_PS(a, b) _mm256_fmaddsub_ps(a,_mm256_moveldup_ps(b),\
_mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))
#else
#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_add_ps(_mm256_mul_ps(A,A), _mm256_mul_ps(B,B)), \
_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\
_mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))
#endif /* LV_HAVE_FMA */
#endif /* LV_HAVE_AVX */
/*
* AVX extension with FMA Macros
*/
#ifdef LV_HAVE_FMA
#define _MM256_SQMOD_ADD_PS(A, B, C) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_fmadd_ps(B, B, C)),\
_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_ADD_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\
_mm256_fmaddsub_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))
#define _MM256_PROD_SUB_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\
_mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))
#endif /* LV_HAVE_FMA */
/* Generic implementation for complex reciprocal */
SRSLTE_API cf_t srslte_algebra_cf_recip_gen(cf_t a);
/* Generic implementation for 2x2 determinant */
SRSLTE_API cf_t srslte_algebra_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11);
/* Generic implementation for 2x2 Matrix Inversion */
SRSLTE_API void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11,
cf_t *r00, cf_t *r01, cf_t *r10, cf_t *r11);
/* Generic implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_algebra_2x2_zf_gen(cf_t y0, cf_t y1,
cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1,
float norm);
/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1,
cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1,
float noise_estimate,
float norm);
SRSLTE_API float srslte_algebra_2x2_cn(cf_t h00,
cf_t h01,
cf_t h10,
cf_t h11);
#ifdef LV_HAVE_SSE
/* SSE implementation for complex reciprocal */
SRSLTE_API __m128 srslte_algebra_cf_recip_sse(__m128 a);
/* SSE implementation for 2x2 determinant */
SRSLTE_API __m128 srslte_algebra_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11);
/* SSE implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_algebra_2x2_zf_sse(__m128 y0, __m128 y1,
__m128 h00, __m128 h01, __m128 h10, __m128 h11,
__m128 *x0, __m128 *x1,
float norm);
/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1,
__m128 h00, __m128 h01, __m128 h10, __m128 h11,
__m128 *x0, __m128 *x1,
float noise_estimate, float norm);
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_AVX
/* AVX implementation for complex reciprocal */
SRSLTE_API __m256 srslte_algebra_cf_recip_avx(__m256 a);
/* AVX implementation for 2x2 determinant */
SRSLTE_API __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11);
/* AVX implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1,
__m256 h00, __m256 h01, __m256 h10, __m256 h11,
__m256 *x0, __m256 *x1,
float norm);
/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1,
__m256 h00, __m256 h01, __m256 h10, __m256 h11,
__m256 *x0, __m256 *x1,
float noise_estimate, float norm);
#endif /* LV_HAVE_AVX */
#endif //SRSLTE_ALGEBRA_H

@ -0,0 +1,111 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#ifndef SRSLTE_MAT_H
#define SRSLTE_MAT_H
#include "srslte/phy/utils/simd.h"
#include "srslte/config.h"
/*
* Generic Macros
*/
#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX))
/* Generic implementation for complex reciprocal */
SRSLTE_API cf_t srslte_mat_cf_recip_gen(cf_t a);
/* Generic implementation for 2x2 determinant */
SRSLTE_API cf_t srslte_mat_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11);
/* Generic implementation for 2x2 Matrix Inversion */
SRSLTE_API void srslte_mat_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11,
cf_t *r00, cf_t *r01, cf_t *r10, cf_t *r11);
/* Generic implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_mat_2x2_zf_gen(cf_t y0, cf_t y1,
cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1,
float norm);
/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1,
cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1,
float noise_estimate,
float norm);
SRSLTE_API float srslte_mat_2x2_cn(cf_t h00,
cf_t h01,
cf_t h10,
cf_t h11);
#ifdef LV_HAVE_SSE
/* SSE implementation for complex reciprocal */
SRSLTE_API __m128 srslte_mat_cf_recip_sse(__m128 a);
/* SSE implementation for 2x2 determinant */
SRSLTE_API __m128 srslte_mat_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11);
/* SSE implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_mat_2x2_zf_sse(__m128 y0, __m128 y1,
__m128 h00, __m128 h01, __m128 h10, __m128 h11,
__m128 *x0, __m128 *x1,
float norm);
/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_mat_2x2_mmse_sse(__m128 y0, __m128 y1,
__m128 h00, __m128 h01, __m128 h10, __m128 h11,
__m128 *x0, __m128 *x1,
float noise_estimate, float norm);
#endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_AVX
/* AVX implementation for complex reciprocal */
SRSLTE_API __m256 srslte_mat_cf_recip_avx(__m256 a);
/* AVX implementation for 2x2 determinant */
SRSLTE_API __m256 srslte_mat_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11);
/* AVX implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_mat_2x2_zf_avx(__m256 y0, __m256 y1,
__m256 h00, __m256 h01, __m256 h10, __m256 h11,
__m256 *x0, __m256 *x1,
float norm);
/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_mat_2x2_mmse_avx(__m256 y0, __m256 y1,
__m256 h00, __m256 h01, __m256 h10, __m256 h11,
__m256 *x0, __m256 *x1,
float noise_estimate, float norm);
#endif /* LV_HAVE_AVX */
#endif /* SRSLTE_MAT_H */

@ -0,0 +1,81 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 Software Radio Systems Limited
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#ifndef SRSLTE_SIMD_H_H
#define SRSLTE_SIMD_H_H
/*
* SSE Macros
*/
#ifdef LV_HAVE_SSE
#define _MM_SWAP(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,3,0,1)))
#define _MM_PERM(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,1,3,0)))
#define _MM_MULJ_PS(X) _MM_SWAP(_MM_CONJ_PS(X))
#define _MM_CONJ_PS(X) (_mm_xor_ps(X, _mm_set_ps(-0.0f, 0.0f, -0.0f, 0.0f)))
#define _MM_SQMOD_PS(X) _MM_PERM(_mm_hadd_ps(_mm_mul_ps(X,X), _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)))
#define _MM_PROD_PS(a, b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(\
_mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b)))
#endif /* LV_HAVE_SSE */
/*
* AVX Macros
*/
#ifdef LV_HAVE_AVX
#define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001)
#define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f)))
#ifdef LV_HAVE_FMA
#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_mul_ps(B,B)), \
_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_PS(a, b) _mm256_fmaddsub_ps(a,_mm256_moveldup_ps(b),\
_mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))
#else
#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_add_ps(_mm256_mul_ps(A,A), _mm256_mul_ps(B,B)), \
_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\
_mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))
#endif /* LV_HAVE_FMA */
#endif /* LV_HAVE_AVX */
/*
* AVX extension with FMA Macros
*/
#ifdef LV_HAVE_FMA
#define _MM256_SQMOD_ADD_PS(A, B, C) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_fmadd_ps(B, B, C)),\
_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_ADD_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\
_mm256_fmaddsub_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))
#define _MM256_PROD_SUB_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\
_mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))
#endif /* LV_HAVE_FMA */
#endif //SRSLTE_SIMD_H_H

@ -36,14 +36,14 @@
#ifdef LV_HAVE_SSE
#include <immintrin.h>
#include "srslte/phy/utils/algebra.h"
#include "srslte/phy/utils/mat.h"
int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate);
int srslte_predecoding_diversity2_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_rxant, int nof_symbols);
#endif
#ifdef LV_HAVE_AVX
#include <immintrin.h>
#include "srslte/phy/utils/algebra.h"
#include "srslte/phy/utils/mat.h"
int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate);
#endif
@ -597,7 +597,7 @@ int srslte_predecoding_ccd_2x2_zf_avx(cf_t *y[SRSLTE_MAX_PORTS],
__m256 x0, x1;
srslte_algebra_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f);
srslte_mat_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f);
_mm256_store_ps((float *) &x[0][i], x0);
_mm256_store_ps((float *) &x[1][i], x1);
@ -634,7 +634,7 @@ int srslte_predecoding_ccd_2x2_zf_sse(cf_t *y[SRSLTE_MAX_PORTS],
__m128 x0, x1;
srslte_algebra_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f);
srslte_mat_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f);
_mm_store_ps((float *) &x[0][i], x0);
_mm_store_ps((float *) &x[1][i], x1);
@ -731,7 +731,7 @@ int srslte_predecoding_ccd_2x2_mmse_avx(cf_t *y[SRSLTE_MAX_PORTS],
__m256 x0, x1;
srslte_algebra_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f);
srslte_mat_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f);
_mm256_store_ps((float *) &x[0][i], x0);
_mm256_store_ps((float *) &x[1][i], x1);
@ -768,7 +768,7 @@ int srslte_predecoding_ccd_2x2_mmse_sse(cf_t *y[SRSLTE_MAX_PORTS],
__m128 x0, x1;
srslte_algebra_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f);
srslte_mat_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f);
_mm_store_ps((float *) &x[0][i], x0);
_mm_store_ps((float *) &x[1][i], x1);
@ -789,7 +789,7 @@ int srslte_predecoding_ccd_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLT
h10 = +h[0][1][i] + h[1][1][i];
h01 = +h[0][0][i] - h[1][0][i];
h11 = +h[0][1][i] - h[1][1][i];
srslte_algebra_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f);
srslte_mat_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f);
i++;
@ -798,7 +798,7 @@ int srslte_predecoding_ccd_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLT
h10 = h[0][1][i] - h[1][1][i];
h01 = h[0][0][i] + h[1][0][i];
h11 = h[0][1][i] + h[1][1][i];
srslte_algebra_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f);
srslte_mat_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f);
}
return SRSLTE_SUCCESS;
}
@ -886,7 +886,7 @@ int srslte_predecoding_multiplex_2x2_zf_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[S
__m256 x0, x1;
srslte_algebra_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, norm);
srslte_mat_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, norm);
_mm256_store_ps((float *) &x[0][i], x0);
_mm256_store_ps((float *) &x[1][i], x1);
@ -954,7 +954,7 @@ int srslte_predecoding_multiplex_2x2_zf_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[S
__m128 x0, x1;
srslte_algebra_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, norm);
srslte_mat_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, norm);
_mm_store_ps((float *) &x[0][i], x0);
_mm_store_ps((float *) &x[1][i], x1);
@ -1078,7 +1078,7 @@ int srslte_predecoding_multiplex_2x2_mmse_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h
__m256 x0, x1;
srslte_algebra_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm);
srslte_mat_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm);
_mm256_store_ps((float *) &x[0][i], x0);
_mm256_store_ps((float *) &x[1][i], x1);
@ -1148,7 +1148,7 @@ int srslte_predecoding_multiplex_2x2_mmse_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h
__m128 x0, x1;
srslte_algebra_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm);
srslte_mat_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm);
_mm_store_ps((float *) &x[0][i], x0);
_mm_store_ps((float *) &x[1][i], x1);
@ -1205,7 +1205,7 @@ int srslte_predecoding_multiplex_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h
return SRSLTE_ERROR;
}
srslte_algebra_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, norm);
srslte_mat_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, norm);
}
return SRSLTE_SUCCESS;
}
@ -2295,8 +2295,8 @@ int srslte_precoding_pmi_select_2l_sse(cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORT
c11 = _mm_add_ps(c11, sse_noise_estimate);
/* 5. detC */
__m128 detC = srslte_algebra_2x2_det_sse(c00, c01, c10, c11);
__m128 inv_detC = srslte_algebra_cf_recip_sse(detC);
__m128 detC = srslte_mat_2x2_det_sse(c00, c01, c10, c11);
__m128 inv_detC = srslte_mat_cf_recip_sse(detC);
inv_detC = _mm_mul_ps(sse_noise_estimate, inv_detC);
__m128 den0 = _MM_PROD_PS(c00, inv_detC);
@ -2442,8 +2442,8 @@ int srslte_precoding_pmi_select_2l_avx(cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORT
c11 = _mm256_add_ps(c11, avx_noise_estimate);
/* 5. detC */
__m256 detC = srslte_algebra_2x2_det_avx(c00, c01, c10, c11);
__m256 inv_detC = srslte_algebra_cf_recip_avx(detC);
__m256 detC = srslte_mat_2x2_det_avx(c00, c01, c10, c11);
__m256 inv_detC = srslte_mat_cf_recip_avx(detC);
inv_detC = _mm256_mul_ps(avx_noise_estimate, inv_detC);
__m256 den0 = _MM256_PROD_PS(c00, inv_detC);
@ -2530,7 +2530,7 @@ float srslte_precoding_2x2_cn_gen(cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], u
cf_t h10 = h[0][1][i];
cf_t h11 = h[1][1][i];
cn_avg += srslte_algebra_2x2_cn(h00, h01, h10, h11);
cn_avg += srslte_mat_2x2_cn(h00, h01, h10, h11);
count++;
}

@ -28,23 +28,23 @@
#include <immintrin.h>
#include <math.h>
#include "srslte/phy/utils/algebra.h"
#include "srslte/phy/utils/mat.h"
/* Generic implementation for complex reciprocal */
inline cf_t srslte_algebra_cf_recip_gen(cf_t a) {
inline cf_t srslte_mat_cf_recip_gen(cf_t a) {
return conjf(a) / (crealf(a) * crealf(a) + cimagf(a) * cimagf(a));
}
/* Generic implementation for 2x2 determinant */
inline cf_t srslte_algebra_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11) {
inline cf_t srslte_mat_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11) {
return a00 * a11 - a01 * a10;
}
/* 2x2 Matrix inversion, generic implementation */
inline void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11,
inline void srslte_mat_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11,
cf_t *r00, cf_t *r01, cf_t *r10, cf_t *r11) {
cf_t div = srslte_algebra_cf_recip_gen(srslte_algebra_2x2_det_gen(a00, a01, a10, a11));
cf_t div = srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(a00, a01, a10, a11));
*r00 = a11 * div;
*r01 = -a01 * div;
*r10 = -a10 * div;
@ -52,15 +52,15 @@ inline void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11,
}
/* Generic implementation for Zero Forcing (ZF) solver */
inline void srslte_algebra_2x2_zf_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11,
inline void srslte_mat_2x2_zf_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1, float norm) {
cf_t _norm = srslte_algebra_cf_recip_gen(srslte_algebra_2x2_det_gen(h00, h01, h10, h11)) * norm;
cf_t _norm = srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(h00, h01, h10, h11)) * norm;
*x0 = (y0 * h11 - h01 * y1) * _norm;
*x1 = (y1 * h00 - h10 * y0) * _norm;
}
/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */
inline void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11,
inline void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1, float noise_estimate, float norm) {
/* Create conjugated matrix */
cf_t _h00 = conjf(h00);
@ -79,7 +79,7 @@ inline void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf
cf_t b01 = -a01;
cf_t b10 = -a10;
cf_t b11 = a00;
cf_t _norm = norm * srslte_algebra_cf_recip_gen(srslte_algebra_2x2_det_gen(a00, a01, a10, a11));
cf_t _norm = norm * srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(a00, a01, a10, a11));
/* 3. W = inv(H' x H + No) x H' = B x H' */
@ -93,7 +93,7 @@ inline void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf
*x1 = (y0 * w10 + y1 * w11) * _norm;
}
inline float srslte_algebra_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) {
inline float srslte_mat_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) {
/* 1. A = H * H' (A = A') */
float a00 =
crealf(h00) * crealf(h00) + crealf(h01) * crealf(h01) + cimagf(h00) * cimagf(h00) + cimagf(h01) * cimagf(h01);
@ -118,7 +118,7 @@ inline float srslte_algebra_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) {
#ifdef LV_HAVE_SSE
/* SSE implementation for complex reciprocal */
inline __m128 srslte_algebra_cf_recip_sse(__m128 a) {
inline __m128 srslte_mat_cf_recip_sse(__m128 a) {
__m128 conj = _MM_CONJ_PS(a);
__m128 sqabs = _mm_mul_ps(a, a);
sqabs = _mm_add_ps(_mm_movehdup_ps(sqabs), _mm_moveldup_ps(sqabs));
@ -129,25 +129,25 @@ inline __m128 srslte_algebra_cf_recip_sse(__m128 a) {
}
/* SSE implementation for 2x2 determinant */
inline __m128 srslte_algebra_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11) {
inline __m128 srslte_mat_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11) {
return _mm_sub_ps(_MM_PROD_PS(a00, a11), _MM_PROD_PS(a01, a10));
}
/* SSE implementation for Zero Forcing (ZF) solver */
inline void srslte_algebra_2x2_zf_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11,
inline void srslte_mat_2x2_zf_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11,
__m128 *x0, __m128 *x1, float norm) {
__m128 detmult1 = _MM_PROD_PS(h00, h11);
__m128 detmult2 = _MM_PROD_PS(h01, h10);
__m128 det = _mm_sub_ps(detmult1, detmult2);
__m128 detrec = _mm_mul_ps(srslte_algebra_cf_recip_sse(det), _mm_set1_ps(norm));
__m128 detrec = _mm_mul_ps(srslte_mat_cf_recip_sse(det), _mm_set1_ps(norm));
*x0 = _MM_PROD_PS(_mm_sub_ps(_MM_PROD_PS(h11, y0), _MM_PROD_PS(h01, y1)), detrec);
*x1 = _MM_PROD_PS(_mm_sub_ps(_MM_PROD_PS(h00, y1), _MM_PROD_PS(h10, y0)), detrec);
}
/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */
inline void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11,
inline void srslte_mat_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11,
__m128 *x0, __m128 *x1, float noise_estimate, float norm) {
__m128 _noise_estimate = _mm_set_ps(0.0f, noise_estimate, 0.0f, noise_estimate);
__m128 _norm = _mm_set1_ps(norm);
@ -169,7 +169,7 @@ inline void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128
__m128 b01 = _mm_xor_ps(a01, _mm_set1_ps(-0.0f));
__m128 b10 = _mm_xor_ps(a10, _mm_set1_ps(-0.0f));
__m128 b11 = a00;
_norm = _mm_mul_ps(_norm, srslte_algebra_cf_recip_sse(srslte_algebra_2x2_det_sse(a00, a01, a10, a11)));
_norm = _mm_mul_ps(_norm, srslte_mat_cf_recip_sse(srslte_mat_2x2_det_sse(a00, a01, a10, a11)));
/* 3. W = inv(H' x H + No) x H' = B x H' */
@ -188,7 +188,7 @@ inline void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128
#ifdef LV_HAVE_AVX
/* AVX implementation for complex reciprocal */
inline __m256 srslte_algebra_cf_recip_avx(__m256 a) {
inline __m256 srslte_mat_cf_recip_avx(__m256 a) {
__m256 conj = _MM256_CONJ_PS(a);
__m256 sqabs = _mm256_mul_ps(a, a);
sqabs = _mm256_add_ps(_mm256_movehdup_ps(sqabs), _mm256_moveldup_ps(sqabs));
@ -199,7 +199,7 @@ inline __m256 srslte_algebra_cf_recip_avx(__m256 a) {
}
/* AVX implementation for 2x2 determinant */
inline __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11) {
inline __m256 srslte_mat_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11) {
#ifdef LV_HAVE_FMA
return _MM256_PROD_SUB_PS(a00, a11, _MM256_PROD_PS(a01, a10));
#else
@ -208,11 +208,11 @@ inline __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m
}
/* AVX implementation for Zero Forcing (ZF) solver */
inline void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11,
inline void srslte_mat_2x2_zf_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11,
__m256 *x0, __m256 *x1, float norm) {
__m256 det = srslte_algebra_2x2_det_avx(h00, h01, h10, h11);
__m256 detrec = _mm256_mul_ps(srslte_algebra_cf_recip_avx(det), _mm256_set1_ps(norm));
__m256 det = srslte_mat_2x2_det_avx(h00, h01, h10, h11);
__m256 detrec = _mm256_mul_ps(srslte_mat_cf_recip_avx(det), _mm256_set1_ps(norm));
#ifdef LV_HAVE_FMA
*x0 = _MM256_PROD_PS(_MM256_PROD_SUB_PS(h11, y0, _MM256_PROD_PS(h01, y1)), detrec);
@ -224,7 +224,7 @@ inline void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h
}
/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */
inline void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11,
inline void srslte_mat_2x2_mmse_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11,
__m256 *x0, __m256 *x1, float noise_estimate, float norm) {
__m256 _noise_estimate = _mm256_set_ps(0.0f, noise_estimate, 0.0f, noise_estimate,
0.0f, noise_estimate, 0.0f, noise_estimate);
@ -254,7 +254,7 @@ inline void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1, __m256 h00, __m256
__m256 b01 = _mm256_xor_ps(a01, _mm256_set1_ps(-0.0f));
__m256 b10 = _mm256_xor_ps(a10, _mm256_set1_ps(-0.0f));
__m256 b11 = a00;
_norm = _mm256_mul_ps(_norm, srslte_algebra_cf_recip_avx(srslte_algebra_2x2_det_avx(a00, a01, a10, a11)));
_norm = _mm256_mul_ps(_norm, srslte_mat_cf_recip_avx(srslte_mat_2x2_det_avx(a00, a01, a10, a11)));
/* 3. W = inv(H' x H + No) x H' = B x H' */

@ -37,7 +37,7 @@ add_test(dft_odd_dc dft_test -N 255 -b -d) # Odd-length, backwards first, handle
# Algebra TEST
########################################################################
add_executable(algebra_test algebra_test.c)
add_executable(algebra_test mat_test.c)
target_link_libraries(algebra_test srslte_phy)
add_test(algebra_2x2_zf_solver_test algebra_test -z)

@ -32,7 +32,7 @@
#include <immintrin.h>
#include <sys/time.h>
#include "srslte/phy/utils/algebra.h"
#include "srslte/phy/utils/mat.h"
bool zf_solver = false;
@ -104,7 +104,7 @@ bool test_zf_solver_gen(void) {
cf_t y0 = x0_gold * h00 + x1_gold * h01;
cf_t y1 = x0_gold * h10 + x1_gold * h11;
srslte_algebra_2x2_zf_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 1.0f);
srslte_mat_2x2_zf_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 1.0f);
cf_error0 = x0 - x0_gold;
cf_error1 = x1 - x1_gold;
@ -127,7 +127,7 @@ bool test_mmse_solver_gen(void) {
cf_t y0 = x0_gold * h00 + x1_gold * h01;
cf_t y1 = x0_gold * h10 + x1_gold * h11;
srslte_algebra_2x2_mmse_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 0.0f, 1.0f);
srslte_mat_2x2_mmse_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 0.0f, 1.0f);
cf_error0 = x0 - x0_gold;
cf_error1 = x1 - x1_gold;
@ -171,7 +171,7 @@ bool test_zf_solver_sse(void) {
__m128 _x0, _x1;
srslte_algebra_2x2_zf_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f);
srslte_mat_2x2_zf_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f);
__attribute__((aligned(128))) cf_t x0[2];
@ -225,7 +225,7 @@ bool test_mmse_solver_sse(void) {
__m128 _x0, _x1;
srslte_algebra_2x2_mmse_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f);
srslte_mat_2x2_mmse_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f);
__attribute__((aligned(128))) cf_t x0[2];
@ -289,7 +289,7 @@ bool test_zf_solver_avx(void) {
__m256 _x0, _x1;
srslte_algebra_2x2_zf_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f);
srslte_mat_2x2_zf_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f);
__attribute__((aligned(256))) cf_t x0[4];
@ -349,7 +349,7 @@ bool test_mmse_solver_avx(void) {
__m256 _x0, _x1;
srslte_algebra_2x2_mmse_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f);
srslte_mat_2x2_mmse_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f);
__attribute__((aligned(256))) cf_t x0[4];
Loading…
Cancel
Save