diff --git a/lib/include/srslte/phy/utils/algebra.h b/lib/include/srslte/phy/utils/algebra.h deleted file mode 100644 index ee681d558..000000000 --- a/lib/include/srslte/phy/utils/algebra.h +++ /dev/null @@ -1,161 +0,0 @@ -/** - * - * \section COPYRIGHT - * - * Copyright 2013-2015 Software Radio Systems Limited - * - * \section LICENSE - * - * This file is part of the srsLTE library. - * - * srsLTE is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as - * published by the Free Software Foundation, either version 3 of - * the License, or (at your option) any later version. - * - * srsLTE is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * A copy of the GNU Affero General Public License can be found in - * the LICENSE file in the top-level directory of this distribution - * and at http://www.gnu.org/licenses/. - * - */ - -#ifndef SRSLTE_ALGEBRA_H -#define SRSLTE_ALGEBRA_H - -#include "srslte/config.h" - -/* - * Generic Macros - */ -#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX)) - -/* - * SSE Macros - */ -#ifdef LV_HAVE_SSE -#define _MM_SWAP(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,3,0,1))) -#define _MM_PERM(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,1,3,0))) -#define _MM_MULJ_PS(X) _MM_SWAP(_MM_CONJ_PS(X)) -#define _MM_CONJ_PS(X) (_mm_xor_ps(X, _mm_set_ps(-0.0f, 0.0f, -0.0f, 0.0f))) -#define _MM_SQMOD_PS(X) _MM_PERM(_mm_hadd_ps(_mm_mul_ps(X,X), _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))) -#define _MM_PROD_PS(a, b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(\ - _mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b))) - -#endif /* LV_HAVE_SSE */ - - -/* - * AVX Macros - */ -#ifdef LV_HAVE_AVX - -#define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001) -#define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f))) - -#ifdef LV_HAVE_FMA -#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_mul_ps(B,B)), \ - _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100) -#define _MM256_PROD_PS(a, b) _mm256_fmaddsub_ps(a,_mm256_moveldup_ps(b),\ - _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b))) -#else -#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_add_ps(_mm256_mul_ps(A,A), _mm256_mul_ps(B,B)), \ - _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100) -#define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\ - _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b))) -#endif /* LV_HAVE_FMA */ -#endif /* LV_HAVE_AVX */ - -/* - * AVX extension with FMA Macros - */ -#ifdef LV_HAVE_FMA - -#define _MM256_SQMOD_ADD_PS(A, B, C) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_fmadd_ps(B, B, C)),\ - _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100) - -#define _MM256_PROD_ADD_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\ - _mm256_fmaddsub_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C)) - -#define _MM256_PROD_SUB_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\ - _mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C)) -#endif /* LV_HAVE_FMA */ - -/* Generic implementation for complex reciprocal */ -SRSLTE_API cf_t srslte_algebra_cf_recip_gen(cf_t a); - -/* Generic implementation for 2x2 determinant */ -SRSLTE_API cf_t srslte_algebra_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11); - -/* Generic implementation for 2x2 Matrix Inversion */ -SRSLTE_API void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11, - cf_t *r00, cf_t *r01, cf_t *r10, cf_t *r11); - -/* Generic implementation for Zero Forcing (ZF) solver */ -SRSLTE_API void srslte_algebra_2x2_zf_gen(cf_t y0, cf_t y1, - cf_t h00, cf_t h01, cf_t h10, cf_t h11, - cf_t *x0, cf_t *x1, - float norm); - -/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */ -SRSLTE_API void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1, - cf_t h00, cf_t h01, cf_t h10, cf_t h11, - cf_t *x0, cf_t *x1, - float noise_estimate, - float norm); - -SRSLTE_API float srslte_algebra_2x2_cn(cf_t h00, - cf_t h01, - cf_t h10, - cf_t h11); - - -#ifdef LV_HAVE_SSE - -/* SSE implementation for complex reciprocal */ -SRSLTE_API __m128 srslte_algebra_cf_recip_sse(__m128 a); - -/* SSE implementation for 2x2 determinant */ -SRSLTE_API __m128 srslte_algebra_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11); - -/* SSE implementation for Zero Forcing (ZF) solver */ -SRSLTE_API void srslte_algebra_2x2_zf_sse(__m128 y0, __m128 y1, - __m128 h00, __m128 h01, __m128 h10, __m128 h11, - __m128 *x0, __m128 *x1, - float norm); - -/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */ -SRSLTE_API void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1, - __m128 h00, __m128 h01, __m128 h10, __m128 h11, - __m128 *x0, __m128 *x1, - float noise_estimate, float norm); - -#endif /* LV_HAVE_SSE */ - -#ifdef LV_HAVE_AVX - -/* AVX implementation for complex reciprocal */ -SRSLTE_API __m256 srslte_algebra_cf_recip_avx(__m256 a); - -/* AVX implementation for 2x2 determinant */ -SRSLTE_API __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11); - -/* AVX implementation for Zero Forcing (ZF) solver */ -SRSLTE_API void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1, - __m256 h00, __m256 h01, __m256 h10, __m256 h11, - __m256 *x0, __m256 *x1, - float norm); - -/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */ -SRSLTE_API void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1, - __m256 h00, __m256 h01, __m256 h10, __m256 h11, - __m256 *x0, __m256 *x1, - float noise_estimate, float norm); - -#endif /* LV_HAVE_AVX */ - -#endif //SRSLTE_ALGEBRA_H diff --git a/lib/include/srslte/phy/utils/mat.h b/lib/include/srslte/phy/utils/mat.h new file mode 100644 index 000000000..48d3b776d --- /dev/null +++ b/lib/include/srslte/phy/utils/mat.h @@ -0,0 +1,111 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#ifndef SRSLTE_MAT_H +#define SRSLTE_MAT_H + +#include "srslte/phy/utils/simd.h" +#include "srslte/config.h" + +/* + * Generic Macros + */ +#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX)) + +/* Generic implementation for complex reciprocal */ +SRSLTE_API cf_t srslte_mat_cf_recip_gen(cf_t a); + +/* Generic implementation for 2x2 determinant */ +SRSLTE_API cf_t srslte_mat_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11); + +/* Generic implementation for 2x2 Matrix Inversion */ +SRSLTE_API void srslte_mat_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11, + cf_t *r00, cf_t *r01, cf_t *r10, cf_t *r11); + +/* Generic implementation for Zero Forcing (ZF) solver */ +SRSLTE_API void srslte_mat_2x2_zf_gen(cf_t y0, cf_t y1, + cf_t h00, cf_t h01, cf_t h10, cf_t h11, + cf_t *x0, cf_t *x1, + float norm); + +/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */ +SRSLTE_API void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1, + cf_t h00, cf_t h01, cf_t h10, cf_t h11, + cf_t *x0, cf_t *x1, + float noise_estimate, + float norm); + +SRSLTE_API float srslte_mat_2x2_cn(cf_t h00, + cf_t h01, + cf_t h10, + cf_t h11); + + +#ifdef LV_HAVE_SSE + +/* SSE implementation for complex reciprocal */ +SRSLTE_API __m128 srslte_mat_cf_recip_sse(__m128 a); + +/* SSE implementation for 2x2 determinant */ +SRSLTE_API __m128 srslte_mat_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11); + +/* SSE implementation for Zero Forcing (ZF) solver */ +SRSLTE_API void srslte_mat_2x2_zf_sse(__m128 y0, __m128 y1, + __m128 h00, __m128 h01, __m128 h10, __m128 h11, + __m128 *x0, __m128 *x1, + float norm); + +/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */ +SRSLTE_API void srslte_mat_2x2_mmse_sse(__m128 y0, __m128 y1, + __m128 h00, __m128 h01, __m128 h10, __m128 h11, + __m128 *x0, __m128 *x1, + float noise_estimate, float norm); + +#endif /* LV_HAVE_SSE */ + +#ifdef LV_HAVE_AVX + +/* AVX implementation for complex reciprocal */ +SRSLTE_API __m256 srslte_mat_cf_recip_avx(__m256 a); + +/* AVX implementation for 2x2 determinant */ +SRSLTE_API __m256 srslte_mat_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11); + +/* AVX implementation for Zero Forcing (ZF) solver */ +SRSLTE_API void srslte_mat_2x2_zf_avx(__m256 y0, __m256 y1, + __m256 h00, __m256 h01, __m256 h10, __m256 h11, + __m256 *x0, __m256 *x1, + float norm); + +/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */ +SRSLTE_API void srslte_mat_2x2_mmse_avx(__m256 y0, __m256 y1, + __m256 h00, __m256 h01, __m256 h10, __m256 h11, + __m256 *x0, __m256 *x1, + float noise_estimate, float norm); + +#endif /* LV_HAVE_AVX */ + +#endif /* SRSLTE_MAT_H */ diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h new file mode 100644 index 000000000..420d07213 --- /dev/null +++ b/lib/include/srslte/phy/utils/simd.h @@ -0,0 +1,81 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#ifndef SRSLTE_SIMD_H_H +#define SRSLTE_SIMD_H_H + +/* + * SSE Macros + */ +#ifdef LV_HAVE_SSE +#define _MM_SWAP(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,3,0,1))) +#define _MM_PERM(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,1,3,0))) +#define _MM_MULJ_PS(X) _MM_SWAP(_MM_CONJ_PS(X)) +#define _MM_CONJ_PS(X) (_mm_xor_ps(X, _mm_set_ps(-0.0f, 0.0f, -0.0f, 0.0f))) +#define _MM_SQMOD_PS(X) _MM_PERM(_mm_hadd_ps(_mm_mul_ps(X,X), _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f))) +#define _MM_PROD_PS(a, b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(\ + _mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b))) + +#endif /* LV_HAVE_SSE */ + +/* + * AVX Macros + */ +#ifdef LV_HAVE_AVX + +#define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001) +#define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f))) + +#ifdef LV_HAVE_FMA +#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_mul_ps(B,B)), \ + _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100) +#define _MM256_PROD_PS(a, b) _mm256_fmaddsub_ps(a,_mm256_moveldup_ps(b),\ + _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b))) +#else +#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_add_ps(_mm256_mul_ps(A,A), _mm256_mul_ps(B,B)), \ + _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100) +#define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\ + _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b))) +#endif /* LV_HAVE_FMA */ +#endif /* LV_HAVE_AVX */ + + +/* + * AVX extension with FMA Macros + */ +#ifdef LV_HAVE_FMA + +#define _MM256_SQMOD_ADD_PS(A, B, C) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_fmadd_ps(B, B, C)),\ + _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100) + +#define _MM256_PROD_ADD_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\ + _mm256_fmaddsub_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C)) + +#define _MM256_PROD_SUB_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\ + _mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C)) +#endif /* LV_HAVE_FMA */ + +#endif //SRSLTE_SIMD_H_H diff --git a/lib/src/phy/mimo/precoding.c b/lib/src/phy/mimo/precoding.c index 0e898448a..02faa6e1c 100644 --- a/lib/src/phy/mimo/precoding.c +++ b/lib/src/phy/mimo/precoding.c @@ -36,14 +36,14 @@ #ifdef LV_HAVE_SSE #include -#include "srslte/phy/utils/algebra.h" +#include "srslte/phy/utils/mat.h" int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); int srslte_predecoding_diversity2_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_rxant, int nof_symbols); #endif #ifdef LV_HAVE_AVX #include -#include "srslte/phy/utils/algebra.h" +#include "srslte/phy/utils/mat.h" int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); #endif @@ -597,7 +597,7 @@ int srslte_predecoding_ccd_2x2_zf_avx(cf_t *y[SRSLTE_MAX_PORTS], __m256 x0, x1; - srslte_algebra_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f); + srslte_mat_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f); _mm256_store_ps((float *) &x[0][i], x0); _mm256_store_ps((float *) &x[1][i], x1); @@ -634,7 +634,7 @@ int srslte_predecoding_ccd_2x2_zf_sse(cf_t *y[SRSLTE_MAX_PORTS], __m128 x0, x1; - srslte_algebra_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f); + srslte_mat_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f); _mm_store_ps((float *) &x[0][i], x0); _mm_store_ps((float *) &x[1][i], x1); @@ -731,7 +731,7 @@ int srslte_predecoding_ccd_2x2_mmse_avx(cf_t *y[SRSLTE_MAX_PORTS], __m256 x0, x1; - srslte_algebra_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f); + srslte_mat_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f); _mm256_store_ps((float *) &x[0][i], x0); _mm256_store_ps((float *) &x[1][i], x1); @@ -768,7 +768,7 @@ int srslte_predecoding_ccd_2x2_mmse_sse(cf_t *y[SRSLTE_MAX_PORTS], __m128 x0, x1; - srslte_algebra_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f); + srslte_mat_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f); _mm_store_ps((float *) &x[0][i], x0); _mm_store_ps((float *) &x[1][i], x1); @@ -789,7 +789,7 @@ int srslte_predecoding_ccd_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLT h10 = +h[0][1][i] + h[1][1][i]; h01 = +h[0][0][i] - h[1][0][i]; h11 = +h[0][1][i] - h[1][1][i]; - srslte_algebra_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f); + srslte_mat_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f); i++; @@ -798,7 +798,7 @@ int srslte_predecoding_ccd_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLT h10 = h[0][1][i] - h[1][1][i]; h01 = h[0][0][i] + h[1][0][i]; h11 = h[0][1][i] + h[1][1][i]; - srslte_algebra_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f); + srslte_mat_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, 2.0f); } return SRSLTE_SUCCESS; } @@ -886,7 +886,7 @@ int srslte_predecoding_multiplex_2x2_zf_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[S __m256 x0, x1; - srslte_algebra_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, norm); + srslte_mat_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, norm); _mm256_store_ps((float *) &x[0][i], x0); _mm256_store_ps((float *) &x[1][i], x1); @@ -954,7 +954,7 @@ int srslte_predecoding_multiplex_2x2_zf_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[S __m128 x0, x1; - srslte_algebra_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, norm); + srslte_mat_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, norm); _mm_store_ps((float *) &x[0][i], x0); _mm_store_ps((float *) &x[1][i], x1); @@ -1078,7 +1078,7 @@ int srslte_predecoding_multiplex_2x2_mmse_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h __m256 x0, x1; - srslte_algebra_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm); + srslte_mat_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm); _mm256_store_ps((float *) &x[0][i], x0); _mm256_store_ps((float *) &x[1][i], x1); @@ -1148,7 +1148,7 @@ int srslte_predecoding_multiplex_2x2_mmse_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h __m128 x0, x1; - srslte_algebra_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm); + srslte_mat_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm); _mm_store_ps((float *) &x[0][i], x0); _mm_store_ps((float *) &x[1][i], x1); @@ -1205,7 +1205,7 @@ int srslte_predecoding_multiplex_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h return SRSLTE_ERROR; } - srslte_algebra_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, norm); + srslte_mat_2x2_mmse_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], noise_estimate, norm); } return SRSLTE_SUCCESS; } @@ -2295,8 +2295,8 @@ int srslte_precoding_pmi_select_2l_sse(cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORT c11 = _mm_add_ps(c11, sse_noise_estimate); /* 5. detC */ - __m128 detC = srslte_algebra_2x2_det_sse(c00, c01, c10, c11); - __m128 inv_detC = srslte_algebra_cf_recip_sse(detC); + __m128 detC = srslte_mat_2x2_det_sse(c00, c01, c10, c11); + __m128 inv_detC = srslte_mat_cf_recip_sse(detC); inv_detC = _mm_mul_ps(sse_noise_estimate, inv_detC); __m128 den0 = _MM_PROD_PS(c00, inv_detC); @@ -2442,8 +2442,8 @@ int srslte_precoding_pmi_select_2l_avx(cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORT c11 = _mm256_add_ps(c11, avx_noise_estimate); /* 5. detC */ - __m256 detC = srslte_algebra_2x2_det_avx(c00, c01, c10, c11); - __m256 inv_detC = srslte_algebra_cf_recip_avx(detC); + __m256 detC = srslte_mat_2x2_det_avx(c00, c01, c10, c11); + __m256 inv_detC = srslte_mat_cf_recip_avx(detC); inv_detC = _mm256_mul_ps(avx_noise_estimate, inv_detC); __m256 den0 = _MM256_PROD_PS(c00, inv_detC); @@ -2530,7 +2530,7 @@ float srslte_precoding_2x2_cn_gen(cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], u cf_t h10 = h[0][1][i]; cf_t h11 = h[1][1][i]; - cn_avg += srslte_algebra_2x2_cn(h00, h01, h10, h11); + cn_avg += srslte_mat_2x2_cn(h00, h01, h10, h11); count++; } diff --git a/lib/src/phy/utils/algebra.c b/lib/src/phy/utils/mat.c similarity index 82% rename from lib/src/phy/utils/algebra.c rename to lib/src/phy/utils/mat.c index 943ae57dd..3c81d4a13 100644 --- a/lib/src/phy/utils/algebra.c +++ b/lib/src/phy/utils/mat.c @@ -28,23 +28,23 @@ #include #include -#include "srslte/phy/utils/algebra.h" +#include "srslte/phy/utils/mat.h" /* Generic implementation for complex reciprocal */ -inline cf_t srslte_algebra_cf_recip_gen(cf_t a) { +inline cf_t srslte_mat_cf_recip_gen(cf_t a) { return conjf(a) / (crealf(a) * crealf(a) + cimagf(a) * cimagf(a)); } /* Generic implementation for 2x2 determinant */ -inline cf_t srslte_algebra_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11) { +inline cf_t srslte_mat_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11) { return a00 * a11 - a01 * a10; } /* 2x2 Matrix inversion, generic implementation */ -inline void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11, +inline void srslte_mat_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11, cf_t *r00, cf_t *r01, cf_t *r10, cf_t *r11) { - cf_t div = srslte_algebra_cf_recip_gen(srslte_algebra_2x2_det_gen(a00, a01, a10, a11)); + cf_t div = srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(a00, a01, a10, a11)); *r00 = a11 * div; *r01 = -a01 * div; *r10 = -a10 * div; @@ -52,15 +52,15 @@ inline void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11, } /* Generic implementation for Zero Forcing (ZF) solver */ -inline void srslte_algebra_2x2_zf_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11, +inline void srslte_mat_2x2_zf_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11, cf_t *x0, cf_t *x1, float norm) { - cf_t _norm = srslte_algebra_cf_recip_gen(srslte_algebra_2x2_det_gen(h00, h01, h10, h11)) * norm; + cf_t _norm = srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(h00, h01, h10, h11)) * norm; *x0 = (y0 * h11 - h01 * y1) * _norm; *x1 = (y1 * h00 - h10 * y0) * _norm; } /* Generic implementation for Minimum Mean Squared Error (MMSE) solver */ -inline void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11, +inline void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11, cf_t *x0, cf_t *x1, float noise_estimate, float norm) { /* Create conjugated matrix */ cf_t _h00 = conjf(h00); @@ -79,7 +79,7 @@ inline void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf cf_t b01 = -a01; cf_t b10 = -a10; cf_t b11 = a00; - cf_t _norm = norm * srslte_algebra_cf_recip_gen(srslte_algebra_2x2_det_gen(a00, a01, a10, a11)); + cf_t _norm = norm * srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(a00, a01, a10, a11)); /* 3. W = inv(H' x H + No) x H' = B x H' */ @@ -93,7 +93,7 @@ inline void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf *x1 = (y0 * w10 + y1 * w11) * _norm; } -inline float srslte_algebra_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) { +inline float srslte_mat_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) { /* 1. A = H * H' (A = A') */ float a00 = crealf(h00) * crealf(h00) + crealf(h01) * crealf(h01) + cimagf(h00) * cimagf(h00) + cimagf(h01) * cimagf(h01); @@ -118,7 +118,7 @@ inline float srslte_algebra_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) { #ifdef LV_HAVE_SSE /* SSE implementation for complex reciprocal */ -inline __m128 srslte_algebra_cf_recip_sse(__m128 a) { +inline __m128 srslte_mat_cf_recip_sse(__m128 a) { __m128 conj = _MM_CONJ_PS(a); __m128 sqabs = _mm_mul_ps(a, a); sqabs = _mm_add_ps(_mm_movehdup_ps(sqabs), _mm_moveldup_ps(sqabs)); @@ -129,25 +129,25 @@ inline __m128 srslte_algebra_cf_recip_sse(__m128 a) { } /* SSE implementation for 2x2 determinant */ -inline __m128 srslte_algebra_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11) { +inline __m128 srslte_mat_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11) { return _mm_sub_ps(_MM_PROD_PS(a00, a11), _MM_PROD_PS(a01, a10)); } /* SSE implementation for Zero Forcing (ZF) solver */ -inline void srslte_algebra_2x2_zf_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11, +inline void srslte_mat_2x2_zf_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11, __m128 *x0, __m128 *x1, float norm) { __m128 detmult1 = _MM_PROD_PS(h00, h11); __m128 detmult2 = _MM_PROD_PS(h01, h10); __m128 det = _mm_sub_ps(detmult1, detmult2); - __m128 detrec = _mm_mul_ps(srslte_algebra_cf_recip_sse(det), _mm_set1_ps(norm)); + __m128 detrec = _mm_mul_ps(srslte_mat_cf_recip_sse(det), _mm_set1_ps(norm)); *x0 = _MM_PROD_PS(_mm_sub_ps(_MM_PROD_PS(h11, y0), _MM_PROD_PS(h01, y1)), detrec); *x1 = _MM_PROD_PS(_mm_sub_ps(_MM_PROD_PS(h00, y1), _MM_PROD_PS(h10, y0)), detrec); } /* SSE implementation for Minimum Mean Squared Error (MMSE) solver */ -inline void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11, +inline void srslte_mat_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128 h01, __m128 h10, __m128 h11, __m128 *x0, __m128 *x1, float noise_estimate, float norm) { __m128 _noise_estimate = _mm_set_ps(0.0f, noise_estimate, 0.0f, noise_estimate); __m128 _norm = _mm_set1_ps(norm); @@ -169,7 +169,7 @@ inline void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128 __m128 b01 = _mm_xor_ps(a01, _mm_set1_ps(-0.0f)); __m128 b10 = _mm_xor_ps(a10, _mm_set1_ps(-0.0f)); __m128 b11 = a00; - _norm = _mm_mul_ps(_norm, srslte_algebra_cf_recip_sse(srslte_algebra_2x2_det_sse(a00, a01, a10, a11))); + _norm = _mm_mul_ps(_norm, srslte_mat_cf_recip_sse(srslte_mat_2x2_det_sse(a00, a01, a10, a11))); /* 3. W = inv(H' x H + No) x H' = B x H' */ @@ -188,7 +188,7 @@ inline void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1, __m128 h00, __m128 #ifdef LV_HAVE_AVX /* AVX implementation for complex reciprocal */ -inline __m256 srslte_algebra_cf_recip_avx(__m256 a) { +inline __m256 srslte_mat_cf_recip_avx(__m256 a) { __m256 conj = _MM256_CONJ_PS(a); __m256 sqabs = _mm256_mul_ps(a, a); sqabs = _mm256_add_ps(_mm256_movehdup_ps(sqabs), _mm256_moveldup_ps(sqabs)); @@ -199,7 +199,7 @@ inline __m256 srslte_algebra_cf_recip_avx(__m256 a) { } /* AVX implementation for 2x2 determinant */ -inline __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11) { +inline __m256 srslte_mat_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11) { #ifdef LV_HAVE_FMA return _MM256_PROD_SUB_PS(a00, a11, _MM256_PROD_PS(a01, a10)); #else @@ -208,11 +208,11 @@ inline __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m } /* AVX implementation for Zero Forcing (ZF) solver */ -inline void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11, +inline void srslte_mat_2x2_zf_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11, __m256 *x0, __m256 *x1, float norm) { - __m256 det = srslte_algebra_2x2_det_avx(h00, h01, h10, h11); - __m256 detrec = _mm256_mul_ps(srslte_algebra_cf_recip_avx(det), _mm256_set1_ps(norm)); + __m256 det = srslte_mat_2x2_det_avx(h00, h01, h10, h11); + __m256 detrec = _mm256_mul_ps(srslte_mat_cf_recip_avx(det), _mm256_set1_ps(norm)); #ifdef LV_HAVE_FMA *x0 = _MM256_PROD_PS(_MM256_PROD_SUB_PS(h11, y0, _MM256_PROD_PS(h01, y1)), detrec); @@ -224,7 +224,7 @@ inline void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h } /* AVX implementation for Minimum Mean Squared Error (MMSE) solver */ -inline void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11, +inline void srslte_mat_2x2_mmse_avx(__m256 y0, __m256 y1, __m256 h00, __m256 h01, __m256 h10, __m256 h11, __m256 *x0, __m256 *x1, float noise_estimate, float norm) { __m256 _noise_estimate = _mm256_set_ps(0.0f, noise_estimate, 0.0f, noise_estimate, 0.0f, noise_estimate, 0.0f, noise_estimate); @@ -254,7 +254,7 @@ inline void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1, __m256 h00, __m256 __m256 b01 = _mm256_xor_ps(a01, _mm256_set1_ps(-0.0f)); __m256 b10 = _mm256_xor_ps(a10, _mm256_set1_ps(-0.0f)); __m256 b11 = a00; - _norm = _mm256_mul_ps(_norm, srslte_algebra_cf_recip_avx(srslte_algebra_2x2_det_avx(a00, a01, a10, a11))); + _norm = _mm256_mul_ps(_norm, srslte_mat_cf_recip_avx(srslte_mat_2x2_det_avx(a00, a01, a10, a11))); /* 3. W = inv(H' x H + No) x H' = B x H' */ diff --git a/lib/src/phy/utils/test/CMakeLists.txt b/lib/src/phy/utils/test/CMakeLists.txt index 1f516da8f..4dccbf2a0 100644 --- a/lib/src/phy/utils/test/CMakeLists.txt +++ b/lib/src/phy/utils/test/CMakeLists.txt @@ -37,7 +37,7 @@ add_test(dft_odd_dc dft_test -N 255 -b -d) # Odd-length, backwards first, handle # Algebra TEST ######################################################################## -add_executable(algebra_test algebra_test.c) +add_executable(algebra_test mat_test.c) target_link_libraries(algebra_test srslte_phy) add_test(algebra_2x2_zf_solver_test algebra_test -z) diff --git a/lib/src/phy/utils/test/algebra_test.c b/lib/src/phy/utils/test/mat_test.c similarity index 96% rename from lib/src/phy/utils/test/algebra_test.c rename to lib/src/phy/utils/test/mat_test.c index a5bc7e773..49be5c9ae 100644 --- a/lib/src/phy/utils/test/algebra_test.c +++ b/lib/src/phy/utils/test/mat_test.c @@ -32,7 +32,7 @@ #include #include -#include "srslte/phy/utils/algebra.h" +#include "srslte/phy/utils/mat.h" bool zf_solver = false; @@ -104,7 +104,7 @@ bool test_zf_solver_gen(void) { cf_t y0 = x0_gold * h00 + x1_gold * h01; cf_t y1 = x0_gold * h10 + x1_gold * h11; - srslte_algebra_2x2_zf_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 1.0f); + srslte_mat_2x2_zf_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 1.0f); cf_error0 = x0 - x0_gold; cf_error1 = x1 - x1_gold; @@ -127,7 +127,7 @@ bool test_mmse_solver_gen(void) { cf_t y0 = x0_gold * h00 + x1_gold * h01; cf_t y1 = x0_gold * h10 + x1_gold * h11; - srslte_algebra_2x2_mmse_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 0.0f, 1.0f); + srslte_mat_2x2_mmse_gen(y0, y1, h00, h01, h10, h11, &x0, &x1, 0.0f, 1.0f); cf_error0 = x0 - x0_gold; cf_error1 = x1 - x1_gold; @@ -171,7 +171,7 @@ bool test_zf_solver_sse(void) { __m128 _x0, _x1; - srslte_algebra_2x2_zf_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f); + srslte_mat_2x2_zf_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f); __attribute__((aligned(128))) cf_t x0[2]; @@ -225,7 +225,7 @@ bool test_mmse_solver_sse(void) { __m128 _x0, _x1; - srslte_algebra_2x2_mmse_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f); + srslte_mat_2x2_mmse_sse(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f); __attribute__((aligned(128))) cf_t x0[2]; @@ -289,7 +289,7 @@ bool test_zf_solver_avx(void) { __m256 _x0, _x1; - srslte_algebra_2x2_zf_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f); + srslte_mat_2x2_zf_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f); __attribute__((aligned(256))) cf_t x0[4]; @@ -349,7 +349,7 @@ bool test_mmse_solver_avx(void) { __m256 _x0, _x1; - srslte_algebra_2x2_mmse_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f); + srslte_mat_2x2_mmse_avx(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f); __attribute__((aligned(256))) cf_t x0[4];