srsRAN_4G/lib/include/srslte/phy/utils/algebra.h

/**
 *
 * \section COPYRIGHT
 *
 * Copyright 2013-2015 Software Radio Systems Limited
 *
 * \section LICENSE
 *
 * This file is part of the srsLTE library.
 *
 * srsLTE is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * srsLTE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * A copy of the GNU Affero General Public License can be found in
 * the LICENSE file in the top-level directory of this distribution
 * and at http://www.gnu.org/licenses/.
 *
 */

#ifndef SRSLTE_ALGEBRA_H
#define SRSLTE_ALGEBRA_H

#include "srslte/config.h"

/*
 * Generic Macros
 */
#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX))

/*
 * SSE Macros
 */
#ifdef LV_HAVE_SSE
#define _MM_SWAP(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,3,0,1)))
#define _MM_PERM(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,1,3,0)))
#define _MM_MULJ_PS(X) _MM_SWAP(_MM_CONJ_PS(X))
#define _MM_CONJ_PS(X) (_mm_xor_ps(X, _mm_set_ps(-0.0f, 0.0f, -0.0f, 0.0f)))
#define _MM_SQMOD_PS(X) _MM_PERM(_mm_hadd_ps(_mm_mul_ps(X,X), _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)))
#define _MM_PROD_PS(a, b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(\
                            _mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b)))

#endif /* LV_HAVE_SSE */


/*
 * AVX Macros
 */
#ifdef LV_HAVE_AVX

#define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001)
#define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f)))

#ifdef LV_HAVE_FMA
#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_mul_ps(B,B)), \
                                           _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_PS(a, b) _mm256_fmaddsub_ps(a,_mm256_moveldup_ps(b),\
                              _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))
#else
#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_add_ps(_mm256_mul_ps(A,A), _mm256_mul_ps(B,B)), \
                                           _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)
#define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\
                              _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))
#endif /* LV_HAVE_FMA */
#endif /* LV_HAVE_AVX */

/*
 * AVX extension with FMA Macros
 */
#ifdef LV_HAVE_FMA

#define _MM256_SQMOD_ADD_PS(A, B, C) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_fmadd_ps(B, B, C)),\
                                           _mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)

#define _MM256_PROD_ADD_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\
                              _mm256_fmaddsub_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))

#define _MM256_PROD_SUB_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\
                              _mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))
#endif /* LV_HAVE_FMA */

/* Generic implementation for complex reciprocal */
SRSLTE_API cf_t srslte_algebra_cf_recip_gen(cf_t a);

/* Generic implementation for 2x2 determinant */
SRSLTE_API cf_t srslte_algebra_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11);

/* Generic implementation for 2x2 Matrix Inversion */
SRSLTE_API void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11,
                                           cf_t *r00, cf_t *r01, cf_t *r10, cf_t *r11);

/* Generic implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_algebra_2x2_zf_gen(cf_t y0, cf_t y1,
                                          cf_t h00, cf_t h01, cf_t h10, cf_t h11,
                                          cf_t *x0, cf_t *x1,
                                          float norm);

/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1,
                                            cf_t h00, cf_t h01, cf_t h10, cf_t h11,
                                            cf_t *x0, cf_t *x1,
                                            float noise_estimate,
                                            float norm);

#ifdef LV_HAVE_SSE

/* SSE implementation for complex reciprocal */
SRSLTE_API __m128 srslte_algebra_cf_recip_sse(__m128 a);

/* SSE implementation for 2x2 determinant */
SRSLTE_API __m128 srslte_algebra_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11);

/* SSE implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_algebra_2x2_zf_sse(__m128 y0, __m128 y1,
                                          __m128 h00, __m128 h01, __m128 h10, __m128 h11,
                                          __m128 *x0, __m128 *x1,
                                          float norm);

/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1,
                                            __m128 h00, __m128 h01, __m128 h10, __m128 h11,
                                            __m128 *x0, __m128 *x1,
                                            float noise_estimate, float norm);

#endif /* LV_HAVE_SSE */

#ifdef LV_HAVE_AVX

/* AVX implementation for complex reciprocal */
SRSLTE_API __m256 srslte_algebra_cf_recip_avx(__m256 a);

/* AVX implementation for 2x2 determinant */
SRSLTE_API __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11);

/* AVX implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1,
                                          __m256 h00, __m256 h01, __m256 h10, __m256 h11,
                                          __m256 *x0, __m256 *x1,
                                          float norm);

/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1,
                                            __m256 h00, __m256 h01, __m256 h10, __m256 h11,
                                            __m256 *x0, __m256 *x1,
                                            float noise_estimate, float norm);

#endif /* LV_HAVE_AVX */

#endif //SRSLTE_ALGEBRA_H
Merge with next_with_matlab 7 years ago			`/**`
			`*`
			`* \section COPYRIGHT`
			`*`
			`* Copyright 2013-2015 Software Radio Systems Limited`
			`*`
			`* \section LICENSE`
			`*`
			`* This file is part of the srsLTE library.`
			`*`
			`* srsLTE is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU Affero General Public License as`
			`* published by the Free Software Foundation, either version 3 of`
			`* the License, or (at your option) any later version.`
			`*`
			`* srsLTE is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU Affero General Public License for more details.`
			`*`
			`* A copy of the GNU Affero General Public License can be found in`
			`* the LICENSE file in the top-level directory of this distribution`
			`* and at http://www.gnu.org/licenses/.`
			`*`
			`*/`

			`#ifndef SRSLTE_ALGEBRA_H`
			`#define SRSLTE_ALGEBRA_H`

			`#include "srslte/config.h"`

Added algebra SSE, AVX (+FMA) implementatiokns with test 7 years ago			`/*`
			`* Generic Macros`
			`*/`
			`#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX))`
Merge with next_with_matlab 7 years ago
Added algebra SSE, AVX (+FMA) implementatiokns with test 7 years ago			`/*`
			`* SSE Macros`
			`*/`
			`#ifdef LV_HAVE_SSE`
			`#define _MM_SWAP(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,3,0,1)))`
			`#define _MM_PERM(X) ((__m128)_mm_shuffle_ps(X, X, _MM_SHUFFLE(2,1,3,0)))`
			`#define _MM_MULJ_PS(X) _MM_SWAP(_MM_CONJ_PS(X))`
			`#define _MM_CONJ_PS(X) (_mm_xor_ps(X, _mm_set_ps(-0.0f, 0.0f, -0.0f, 0.0f)))`
			`#define _MM_SQMOD_PS(X) _MM_PERM(_mm_hadd_ps(_mm_mul_ps(X,X), _mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)))`
Merge with next_with_matlab 7 years ago			`#define _MM_PROD_PS(a, b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(\`
			`_mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b)))`

			`#endif /* LV_HAVE_SSE */`

Added algebra SSE, AVX (+FMA) implementatiokns with test 7 years ago
			`/*`
			`* AVX Macros`
			`*/`
Merge with next_with_matlab 7 years ago			`#ifdef LV_HAVE_AVX`

			`#define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001)`
Added algebra SSE, AVX (+FMA) implementatiokns with test 7 years ago			`#define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f)))`

			`#ifdef LV_HAVE_FMA`
			`#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_mul_ps(B,B)), \`
			`_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)`
			`#define _MM256_PROD_PS(a, b) _mm256_fmaddsub_ps(a,_mm256_moveldup_ps(b),\`
			`_mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))`
			`#else`
			`#define _MM256_SQMOD_PS(A, B) _mm256_permute_ps(_mm256_hadd_ps(_mm256_add_ps(_mm256_mul_ps(A,A), _mm256_mul_ps(B,B)), \`
			`_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)`
Merge with next_with_matlab 7 years ago			`#define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\`
			`_mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))`
Added algebra SSE, AVX (+FMA) implementatiokns with test 7 years ago			`#endif /* LV_HAVE_FMA */`
			`#endif /* LV_HAVE_AVX */`

			`/*`
			`* AVX extension with FMA Macros`
			`*/`
			`#ifdef LV_HAVE_FMA`

			`#define _MM256_SQMOD_ADD_PS(A, B, C) _mm256_permute_ps(_mm256_hadd_ps(_mm256_fmadd_ps(A, A, _mm256_fmadd_ps(B, B, C)),\`
			`_mm256_set_ps(0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f)), 0b11011100)`

			`#define _MM256_PROD_ADD_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\`
			`_mm256_fmaddsub_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))`

			`#define _MM256_PROD_SUB_PS(A, B, C) _mm256_fmaddsub_ps(A,_mm256_moveldup_ps(B),\`
			`_mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C))`
			`#endif /* LV_HAVE_FMA */`

			`/* Generic implementation for complex reciprocal */`
			`SRSLTE_API cf_t srslte_algebra_cf_recip_gen(cf_t a);`

			`/* Generic implementation for 2x2 determinant */`
			`SRSLTE_API cf_t srslte_algebra_2x2_det_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11);`
Merge with next_with_matlab 7 years ago
Added algebra SSE, AVX (+FMA) implementatiokns with test 7 years ago			`/* Generic implementation for 2x2 Matrix Inversion */`
			`SRSLTE_API void srslte_algebra_2x2_inv_gen(cf_t a00, cf_t a01, cf_t a10, cf_t a11,`
			`cf_t r00, cf_t r01, cf_t r10, cf_t r11);`

			`/* Generic implementation for Zero Forcing (ZF) solver */`
			`SRSLTE_API void srslte_algebra_2x2_zf_gen(cf_t y0, cf_t y1,`
			`cf_t h00, cf_t h01, cf_t h10, cf_t h11,`
			`cf_t x0, cf_t x1,`
			`float norm);`

			`/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */`
			`SRSLTE_API void srslte_algebra_2x2_mmse_gen(cf_t y0, cf_t y1,`
			`cf_t h00, cf_t h01, cf_t h10, cf_t h11,`
			`cf_t x0, cf_t x1,`
			`float noise_estimate,`
			`float norm);`

			`#ifdef LV_HAVE_SSE`

			`/* SSE implementation for complex reciprocal */`
			`SRSLTE_API __m128 srslte_algebra_cf_recip_sse(__m128 a);`

			`/* SSE implementation for 2x2 determinant */`
			`SRSLTE_API __m128 srslte_algebra_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11);`

			`/* SSE implementation for Zero Forcing (ZF) solver */`
			`SRSLTE_API void srslte_algebra_2x2_zf_sse(__m128 y0, __m128 y1,`
			`__m128 h00, __m128 h01, __m128 h10, __m128 h11,`
			`__m128 x0, __m128 x1,`
			`float norm);`

			`/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */`
			`SRSLTE_API void srslte_algebra_2x2_mmse_sse(__m128 y0, __m128 y1,`
			`__m128 h00, __m128 h01, __m128 h10, __m128 h11,`
			`__m128 x0, __m128 x1,`
			`float noise_estimate, float norm);`

			`#endif /* LV_HAVE_SSE */`

			`#ifdef LV_HAVE_AVX`

			`/* AVX implementation for complex reciprocal */`
			`SRSLTE_API __m256 srslte_algebra_cf_recip_avx(__m256 a);`

			`/* AVX implementation for 2x2 determinant */`
			`SRSLTE_API __m256 srslte_algebra_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11);`

			`/* AVX implementation for Zero Forcing (ZF) solver */`
			`SRSLTE_API void srslte_algebra_2x2_zf_avx(__m256 y0, __m256 y1,`
			`__m256 h00, __m256 h01, __m256 h10, __m256 h11,`
			`__m256 x0, __m256 x1,`
Merge with next_with_matlab 7 years ago			`float norm);`

Added algebra SSE, AVX (+FMA) implementatiokns with test 7 years ago			`/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */`
			`SRSLTE_API void srslte_algebra_2x2_mmse_avx(__m256 y0, __m256 y1,`
			`__m256 h00, __m256 h01, __m256 h10, __m256 h11,`
			`__m256 x0, __m256 x1,`
			`float noise_estimate, float norm);`

Merge with next_with_matlab 7 years ago			`#endif /* LV_HAVE_AVX */`

			`#endif //SRSLTE_ALGEBRA_H`