|
|
|
@ -37,13 +37,13 @@
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
#include <xmmintrin.h>
|
|
|
|
|
#include <pmmintrin.h>
|
|
|
|
|
int srslte_predecoding_single_sse(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate);
|
|
|
|
|
int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate);
|
|
|
|
|
int srslte_predecoding_diversity2_sse(cf_t *y, cf_t *h[SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_symbols);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
|
#include <immintrin.h>
|
|
|
|
|
int srslte_predecoding_single_avx(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate);
|
|
|
|
|
int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -58,40 +58,75 @@ int srslte_predecoding_single_avx(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, fl
|
|
|
|
|
|
|
|
|
|
#define PROD(a,b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(_mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b)))
|
|
|
|
|
|
|
|
|
|
int srslte_predecoding_single_sse(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
|
|
|
|
int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) {
|
|
|
|
|
|
|
|
|
|
float *xPtr = (float*) x;
|
|
|
|
|
const float *hPtr = (const float*) h;
|
|
|
|
|
const float *yPtr = (const float*) y;
|
|
|
|
|
const float *hPtr1 = (const float*) h[0];
|
|
|
|
|
const float *yPtr1 = (const float*) y[0];
|
|
|
|
|
const float *hPtr2 = (const float*) h[1];
|
|
|
|
|
const float *yPtr2 = (const float*) y[1];
|
|
|
|
|
|
|
|
|
|
__m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
|
|
|
|
|
|
|
|
|
|
__m128 noise = _mm_set1_ps(noise_estimate);
|
|
|
|
|
__m128 h1Val, h2Val, y1Val, y2Val, h12square, h1square, h2square, h1conj, h2conj, x1Val, x2Val;
|
|
|
|
|
__m128 h1Val1, h2Val1, y1Val1, y2Val1;
|
|
|
|
|
__m128 h1Val2, h2Val2, y1Val2, y2Val2;
|
|
|
|
|
__m128 h12square1, h1square1, h2square1, h1conj1, h2conj1, x1Val1, x2Val1;
|
|
|
|
|
__m128 h12square2, h1square2, h2square2, h1conj2, h2conj2, x1Val2, x2Val2;
|
|
|
|
|
|
|
|
|
|
for (int i=0;i<nof_symbols/4;i++) {
|
|
|
|
|
y1Val = _mm_load_ps(yPtr); yPtr+=4;
|
|
|
|
|
y2Val = _mm_load_ps(yPtr); yPtr+=4;
|
|
|
|
|
h1Val = _mm_load_ps(hPtr); hPtr+=4;
|
|
|
|
|
h2Val = _mm_load_ps(hPtr); hPtr+=4;
|
|
|
|
|
y1Val1 = _mm_load_ps(yPtr1); yPtr1+=4;
|
|
|
|
|
y2Val1 = _mm_load_ps(yPtr1); yPtr1+=4;
|
|
|
|
|
h1Val1 = _mm_load_ps(hPtr1); hPtr1+=4;
|
|
|
|
|
h2Val1 = _mm_load_ps(hPtr1); hPtr1+=4;
|
|
|
|
|
|
|
|
|
|
if (nof_rxant == 2) {
|
|
|
|
|
y1Val2 = _mm_load_ps(yPtr2); yPtr2+=4;
|
|
|
|
|
y2Val2 = _mm_load_ps(yPtr2); yPtr2+=4;
|
|
|
|
|
h1Val2 = _mm_load_ps(hPtr2); hPtr2+=4;
|
|
|
|
|
h2Val2 = _mm_load_ps(hPtr2); hPtr2+=4;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h12square = _mm_hadd_ps(_mm_mul_ps(h1Val, h1Val), _mm_mul_ps(h2Val, h2Val));
|
|
|
|
|
h12square1 = _mm_hadd_ps(_mm_mul_ps(h1Val1, h1Val1), _mm_mul_ps(h2Val1, h2Val1));
|
|
|
|
|
if (nof_rxant == 2) {
|
|
|
|
|
h12square2 = _mm_hadd_ps(_mm_mul_ps(h1Val2, h1Val2), _mm_mul_ps(h2Val2, h2Val2));
|
|
|
|
|
h12square1 = _mm_add_ps(h12square1, h12square2);
|
|
|
|
|
}
|
|
|
|
|
if (noise_estimate > 0) {
|
|
|
|
|
h12square = _mm_add_ps(h12square, noise);
|
|
|
|
|
h12square1 = _mm_add_ps(h12square1, noise);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
h1square = _mm_shuffle_ps(h12square, h12square, _MM_SHUFFLE(1, 1, 0, 0));
|
|
|
|
|
h2square = _mm_shuffle_ps(h12square, h12square, _MM_SHUFFLE(3, 3, 2, 2));
|
|
|
|
|
h1square1 = _mm_shuffle_ps(h12square1, h12square1, _MM_SHUFFLE(1, 1, 0, 0));
|
|
|
|
|
h2square1 = _mm_shuffle_ps(h12square1, h12square1, _MM_SHUFFLE(3, 3, 2, 2));
|
|
|
|
|
|
|
|
|
|
if (nof_rxant == 2) {
|
|
|
|
|
h1square2 = _mm_shuffle_ps(h12square2, h12square2, _MM_SHUFFLE(1, 1, 0, 0));
|
|
|
|
|
h2square2 = _mm_shuffle_ps(h12square2, h12square2, _MM_SHUFFLE(3, 3, 2, 2));
|
|
|
|
|
|
|
|
|
|
h1square1 = _mm_add_ps(h1square1, h1square2);
|
|
|
|
|
h2square1 = _mm_add_ps(h2square1, h2square2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Conjugate channel */
|
|
|
|
|
h1conj = _mm_xor_ps(h1Val, conjugator);
|
|
|
|
|
h2conj = _mm_xor_ps(h2Val, conjugator);
|
|
|
|
|
h1conj1 = _mm_xor_ps(h1Val1, conjugator);
|
|
|
|
|
h2conj1 = _mm_xor_ps(h2Val1, conjugator);
|
|
|
|
|
|
|
|
|
|
if (nof_rxant == 2) {
|
|
|
|
|
h1conj2 = _mm_xor_ps(h1Val2, conjugator);
|
|
|
|
|
h2conj2 = _mm_xor_ps(h2Val2, conjugator);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Complex product */
|
|
|
|
|
x1Val = PROD(y1Val, h1conj);
|
|
|
|
|
x2Val = PROD(y2Val, h2conj);
|
|
|
|
|
x1Val1 = PROD(y1Val1, h1conj1);
|
|
|
|
|
x2Val1 = PROD(y2Val1, h2conj1);
|
|
|
|
|
|
|
|
|
|
x1Val = _mm_div_ps(x1Val, h1square);
|
|
|
|
|
x2Val = _mm_div_ps(x2Val, h2square);
|
|
|
|
|
if (nof_rxant == 2) {
|
|
|
|
|
x1Val2 = PROD(y1Val2, h1conj2);
|
|
|
|
|
x2Val2 = PROD(y2Val2, h2conj2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
x1Val1 = _mm_div_ps(x1Val1, h1square1);
|
|
|
|
|
x2Val1 = _mm_div_ps(x2Val1, h2square1);
|
|
|
|
|
|
|
|
|
|
_mm_store_ps(xPtr, x1Val); xPtr+=4;
|
|
|
|
|
_mm_store_ps(xPtr, x2Val); xPtr+=4;
|
|
|
|
@ -110,7 +145,7 @@ int srslte_predecoding_single_sse(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, fl
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int srslte_predecoding_single_avx(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
|
|
|
|
int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) {
|
|
|
|
|
|
|
|
|
|
float *xPtr = (float*) x;
|
|
|
|
|
const float *hPtr = (const float*) h;
|
|
|
|
@ -160,15 +195,28 @@ int srslte_predecoding_single_avx(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, fl
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
int srslte_predecoding_single_gen(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
|
|
|
|
int srslte_predecoding_single_gen(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) {
|
|
|
|
|
for (int i=0;i<nof_symbols;i++) {
|
|
|
|
|
x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
|
|
|
|
|
cf_t r = 0;
|
|
|
|
|
cf_t hh = 0;
|
|
|
|
|
for (int p=0;p<nof_rxant;p++) {
|
|
|
|
|
r += y[p][i]*conj(h[p][i]);
|
|
|
|
|
hh += conj(h[p][i])*h[p][i];
|
|
|
|
|
}
|
|
|
|
|
x[i] = r/(hh+noise_estimate);
|
|
|
|
|
}
|
|
|
|
|
return nof_symbols;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ZF/MMSE SISO equalizer x=y(h'h+no)^(-1)h' (ZF if n0=0.0)*/
|
|
|
|
|
int srslte_predecoding_single(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
|
|
|
|
int srslte_predecoding_single(cf_t *y_, cf_t *h_, cf_t *x, int nof_symbols, float noise_estimate) {
|
|
|
|
|
|
|
|
|
|
cf_t *y[SRSLTE_MAX_RXANT];
|
|
|
|
|
cf_t *h[SRSLTE_MAX_RXANT];
|
|
|
|
|
y[0] = y_;
|
|
|
|
|
h[0] = h_;
|
|
|
|
|
int nof_rxant = 1;
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
|
if (nof_symbols > 32) {
|
|
|
|
|
return srslte_predecoding_single_avx(y, h, x, nof_symbols, noise_estimate);
|
|
|
|
@ -183,7 +231,28 @@ int srslte_predecoding_single(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float
|
|
|
|
|
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* ZF/MMSE SISO equalizer x=y(h'h+no)^(-1)h' (ZF if n0=0.0)*/
|
|
|
|
|
int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) {
|
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
|
if (nof_symbols > 32) {
|
|
|
|
|
return srslte_predecoding_single_avx(y, h, x, nof_symbols, noise_estimate);
|
|
|
|
|
} else {
|
|
|
|
|
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
if (nof_symbols > 32) {
|
|
|
|
|
return srslte_predecoding_single_sse(y, h, x, nof_symbols, noise_estimate);
|
|
|
|
|
} else {
|
|
|
|
|
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
|
|
|
|
|
#endif
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|