Added DL CSI decoding to TM2 and TM3

master
Xavier Arteaga 7 years ago
parent 5ad6ef4d1d
commit 0bc3be7abb

@ -87,7 +87,7 @@ SRSLTE_API int srslte_predecoding_single(cf_t *y,
SRSLTE_API int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_PORTS], SRSLTE_API int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_PORTS],
cf_t *h[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS],
cf_t *x, cf_t *x,
float *csi, float *csi[SRSLTE_MAX_CODEWORDS],
int nof_rxant, int nof_rxant,
int nof_symbols, int nof_symbols,
float scaling, float scaling,
@ -102,7 +102,8 @@ SRSLTE_API int srslte_predecoding_diversity(cf_t *y,
SRSLTE_API int srslte_predecoding_diversity_multi(cf_t *y[SRSLTE_MAX_PORTS], SRSLTE_API int srslte_predecoding_diversity_multi(cf_t *y[SRSLTE_MAX_PORTS],
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS], cf_t *x[SRSLTE_MAX_LAYERS],
float *csi[SRSLTE_MAX_LAYERS],
int nof_rxant, int nof_rxant,
int nof_ports, int nof_ports,
int nof_symbols, int nof_symbols,
@ -113,7 +114,7 @@ SRSLTE_API void srslte_predecoding_set_mimo_decoder (srslte_mimo_decoder_t _mimo
SRSLTE_API int srslte_predecoding_type(cf_t *y[SRSLTE_MAX_PORTS], SRSLTE_API int srslte_predecoding_type(cf_t *y[SRSLTE_MAX_PORTS],
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS], cf_t *x[SRSLTE_MAX_LAYERS],
float *csi, float *csi[SRSLTE_MAX_CODEWORDS],
int nof_rxant, int nof_rxant,
int nof_ports, int nof_ports,
int nof_layers, int nof_layers,

@ -53,12 +53,17 @@ SRSLTE_API void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1,
float noise_estimate, float noise_estimate,
float norm); float norm);
SRSLTE_API void srslte_mat_2x2_mmse_csi_gen(cf_t y0, cf_t y1,
cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1, float *csi0, float *csi1,
float noise_estimate,
float norm);
SRSLTE_API float srslte_mat_2x2_cn(cf_t h00, SRSLTE_API float srslte_mat_2x2_cn(cf_t h00,
cf_t h01, cf_t h01,
cf_t h10, cf_t h10,
cf_t h11); cf_t h11);
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
/* SSE implementation for complex reciprocal */ /* SSE implementation for complex reciprocal */
@ -103,4 +108,114 @@ SRSLTE_API void srslte_mat_2x2_mmse_avx(__m256 y0, __m256 y1,
#endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX */
#endif // SRSLTE_MAT_H #if SRSLTE_SIMD_CF_SIZE != 0
/* Generic SIMD implementation for 2x2 determinant */
static inline simd_cf_t srslte_mat_2x2_det_simd(simd_cf_t a00, simd_cf_t a01, simd_cf_t a10, simd_cf_t a11) {
return srslte_simd_cf_sub(srslte_simd_cf_prod(a00, a11), srslte_simd_cf_prod(a01, a10));
}
/* Generic SIMD implementation for Zero Forcing (ZF) solver */
static inline void srslte_mat_2x2_zf_csi_simd(simd_cf_t y0,
simd_cf_t y1,
simd_cf_t h00,
simd_cf_t h01,
simd_cf_t h10,
simd_cf_t h11,
simd_cf_t *x0,
simd_cf_t *x1,
simd_f_t *csi0,
simd_f_t *csi1,
float norm) {
simd_cf_t det = srslte_mat_2x2_det_simd(h00, h01, h10, h11);
simd_cf_t detrec = srslte_simd_cf_mul(srslte_simd_cf_rcp(det), srslte_simd_f_set1(norm));
*x0 = srslte_simd_cf_prod(srslte_simd_cf_sub(srslte_simd_cf_prod(h11, y0), srslte_simd_cf_prod(h01, y1)), detrec);
*x1 = srslte_simd_cf_prod(srslte_simd_cf_sub(srslte_simd_cf_prod(h00, y1), srslte_simd_cf_prod(h10, y0)), detrec);
*csi0 = srslte_simd_f_set1(1.0f);
*csi1 = srslte_simd_f_set1(1.0f);
}
static inline void srslte_mat_2x2_zf_simd(simd_cf_t y0,
simd_cf_t y1,
simd_cf_t h00,
simd_cf_t h01,
simd_cf_t h10,
simd_cf_t h11,
simd_cf_t *x0,
simd_cf_t *x1,
float norm) {
simd_f_t csi1, csi2;
srslte_mat_2x2_zf_csi_simd(y0, y1, h00, h01, h10, h11, x0, x1, &csi1, &csi2, norm);
}
/* Generic SIMD implementation for Minimum Mean Squared Error (MMSE) solver */
static inline void srslte_mat_2x2_mmse_csi_simd(simd_cf_t y0,
simd_cf_t y1,
simd_cf_t h00,
simd_cf_t h01,
simd_cf_t h10,
simd_cf_t h11,
simd_cf_t *x0,
simd_cf_t *x1,
simd_f_t *csi0,
simd_f_t *csi1,
float noise_estimate,
float norm) {
simd_cf_t _noise_estimate;
simd_f_t _norm = srslte_simd_f_set1(norm);
_noise_estimate.re = srslte_simd_f_set1(noise_estimate);
_noise_estimate.im = srslte_simd_f_zero();
/* 1. A = H' x H + No*/
simd_cf_t a00 =
srslte_simd_cf_add(srslte_simd_cf_add(srslte_simd_cf_conjprod(h00, h00), srslte_simd_cf_conjprod(h10, h10)),
_noise_estimate);
simd_cf_t a01 = srslte_simd_cf_add(srslte_simd_cf_conjprod(h01, h00), srslte_simd_cf_conjprod(h11, h10));
simd_cf_t a10 = srslte_simd_cf_add(srslte_simd_cf_conjprod(h00, h01), srslte_simd_cf_conjprod(h10, h11));
simd_cf_t a11 =
srslte_simd_cf_add(srslte_simd_cf_add(srslte_simd_cf_conjprod(h01, h01), srslte_simd_cf_conjprod(h11, h11)),
_noise_estimate);
simd_cf_t a_det_rcp = srslte_simd_cf_rcp(srslte_mat_2x2_det_simd(a00, a01, a10, a11));
/* 2. B = inv(H' x H + No) = inv(A) */
simd_cf_t _norm2 = srslte_simd_cf_mul(a_det_rcp, _norm);
simd_cf_t b00 = srslte_simd_cf_prod(a11, _norm2);
simd_cf_t b01 = srslte_simd_cf_prod(srslte_simd_cf_neg(a01), _norm2);
simd_cf_t b10 = srslte_simd_cf_prod(srslte_simd_cf_neg(a10), _norm2);
simd_cf_t b11 = srslte_simd_cf_prod(a00, _norm2);
/* 3. W = inv(H' x H + No) x H' = B x H' */
simd_cf_t w00 = srslte_simd_cf_add(srslte_simd_cf_conjprod(b00, h00), srslte_simd_cf_conjprod(b01, h01));
simd_cf_t w01 = srslte_simd_cf_add(srslte_simd_cf_conjprod(b00, h10), srslte_simd_cf_conjprod(b01, h11));
simd_cf_t w10 = srslte_simd_cf_add(srslte_simd_cf_conjprod(b10, h00), srslte_simd_cf_conjprod(b11, h01));
simd_cf_t w11 = srslte_simd_cf_add(srslte_simd_cf_conjprod(b10, h10), srslte_simd_cf_conjprod(b11, h11));
/* 4. X = W x Y */
*x0 = srslte_simd_cf_add(srslte_simd_cf_prod(y0, w00), srslte_simd_cf_prod(y1, w01));
*x1 = srslte_simd_cf_add(srslte_simd_cf_prod(y0, w10), srslte_simd_cf_prod(y1, w11));
/* 5. Extract CSI */
*csi0 = srslte_simd_f_rcp(srslte_simd_cf_re(b00));
*csi1 = srslte_simd_f_rcp(srslte_simd_cf_re(b11));
}
static inline void srslte_mat_2x2_mmse_simd(simd_cf_t y0,
simd_cf_t y1,
simd_cf_t h00,
simd_cf_t h01,
simd_cf_t h10,
simd_cf_t h11,
simd_cf_t *x0,
simd_cf_t *x1,
float noise_estimate,
float norm) {
simd_f_t csi0, csi1;
srslte_mat_2x2_mmse_csi_simd(y0, y1, h00, h01, h10, h11, x0, x1, &csi0, &csi1, noise_estimate, norm);
}
#endif /* SRSLTE_SIMD_CF_SIZE != 0 */
#endif /* SRSLTE_MAT_H */

@ -197,7 +197,7 @@ static inline simd_f_t srslte_simd_f_loadu(const float *ptr) {
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
return _mm512_loadu_ps(ptr); return _mm512_loadu_ps(ptr);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
return _mm256_loadu_ps(ptr); return _mm256_loadu_ps(ptr);
#else /* LV_HAVE_AVX2 */ #else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
@ -233,7 +233,7 @@ static inline void srslte_simd_f_storeu(float *ptr, simd_f_t simdreg) {
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
_mm512_storeu_ps(ptr, simdreg); _mm512_storeu_ps(ptr, simdreg);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
_mm256_storeu_ps(ptr, simdreg); _mm256_storeu_ps(ptr, simdreg);
#else /* LV_HAVE_AVX2 */ #else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
@ -360,7 +360,7 @@ static inline simd_f_t srslte_simd_f_add(simd_f_t a, simd_f_t b) {
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
return _mm256_add_ps(a, b); return _mm256_add_ps(a, b);
#else /* LV_HAVE_AVX2 */ #else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
return _mm_add_ps(a, b); return _mm_add_ps(a, b);
#else /* LV_HAVE_SSE */ #else /* LV_HAVE_SSE */
#ifdef HAVE_NEON #ifdef HAVE_NEON
@ -376,9 +376,9 @@ static inline simd_f_t srslte_simd_f_zero (void) {
return _mm512_setzero_ps(); return _mm512_setzero_ps();
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
return _mm256_setzero_ps(); return _mm256_setzero_ps();
#else /* LV_HAVE_AVX2 */ #else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
return _mm_setzero_ps(); return _mm_setzero_ps();
#else /* LV_HAVE_SSE */ #else /* LV_HAVE_SSE */
#ifdef HAVE_NEON #ifdef HAVE_NEON
@ -401,7 +401,7 @@ static inline simd_f_t srslte_simd_f_swap(simd_f_t a) {
#else /* LV_HAVE_SSE */ #else /* LV_HAVE_SSE */
#ifdef HAVE_NEON #ifdef HAVE_NEON
return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(a))); return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(a)));
#endif /* HAVE_NEON */ #endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */ #endif /* LV_HAVE_AVX512 */
@ -443,7 +443,7 @@ static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) {
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
return _mm512_sqrt_ps(a); return _mm512_sqrt_ps(a);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
return _mm256_sqrt_ps(a); return _mm256_sqrt_ps(a);
#else /* LV_HAVE_AVX2 */ #else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
@ -458,7 +458,43 @@ static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) {
float32x4_t zeros = vmovq_n_f32(0); /* Zero vector */ float32x4_t zeros = vmovq_n_f32(0); /* Zero vector */
uint32x4_t mask = vceqq_f32(a, zeros); /* Zero vector mask */ uint32x4_t mask = vceqq_f32(a, zeros); /* Zero vector mask */
return vbslq_f32(mask, zeros, result); /* Force zero results and return */ return vbslq_f32(mask, zeros, result); /* Force zero results and return */
#endif /* HAVE_NEON */ #endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline simd_f_t srslte_simd_f_neg(simd_f_t a) {
#ifdef LV_HAVE_AVX512
return _mm512_xor_ps(_mm512_set1_ps(-0.0f), a);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_xor_ps(_mm256_set1_ps(-0.0f), a);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_xor_ps(_mm_set1_ps(-0.0f), a);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return vnegq_f32(a);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
}
static inline simd_f_t srslte_simd_f_neg_mask(simd_f_t a, simd_f_t mask) {
#ifdef LV_HAVE_AVX512
return _mm512_xor_ps(mask, a);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
return _mm256_xor_ps(mask, a);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
return _mm_xor_ps(mask, a);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
return (float32x4_t) veorq_s32((int32x4_t) a, (int32x4_t) mask);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */ #endif /* LV_HAVE_AVX512 */
@ -470,12 +506,11 @@ static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) {
#if SRSLTE_SIMD_CF_SIZE #if SRSLTE_SIMD_CF_SIZE
#ifdef HAVE_NEON #ifdef HAVE_NEON
typedef float32x4x2_t simd_cf_t; typedef float32x4x2_t simd_cf_t;
#else #else
typedef struct { typedef struct {
simd_f_t re; simd_f_t re;
simd_f_t im; simd_f_t im;
} simd_cf_t; } simd_cf_t;
#endif #endif
@ -667,8 +702,8 @@ static inline void srslte_simd_cf_store(float *re, float *im, simd_cf_t simdreg)
_mm512_store_ps(im, simdreg.im); _mm512_store_ps(im, simdreg.im);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
_mm256_store_ps((float *) re, simdreg.re); _mm256_store_ps(re, simdreg.re);
_mm256_store_ps((float *) im, simdreg.im); _mm256_store_ps(im, simdreg.im);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
_mm_store_ps((float *) re, simdreg.re); _mm_store_ps((float *) re, simdreg.re);
@ -689,8 +724,8 @@ static inline void srslte_simd_cf_storeu(float *re, float *im, simd_cf_t simdreg
_mm512_storeu_ps(im, simdreg.im); _mm512_storeu_ps(im, simdreg.im);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
_mm256_storeu_ps((float *) re, simdreg.re); _mm256_storeu_ps(re, simdreg.re);
_mm256_storeu_ps((float *) im, simdreg.im); _mm256_storeu_ps(im, simdreg.im);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
_mm_storeu_ps((float *) re, simdreg.re); _mm_storeu_ps((float *) re, simdreg.re);
@ -833,8 +868,32 @@ static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) {
return ret; return ret;
} }
static inline simd_cf_t srslte_simd_cf_sub (simd_cf_t a, simd_cf_t b) {
simd_cf_t ret;
#ifdef LV_HAVE_AVX512
ret.re = _mm512_sub_ps(a.re, b.re);
ret.im = _mm512_sub_ps(a.im, b.im);
#else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2
ret.re = _mm256_sub_ps(a.re, b.re);
ret.im = _mm256_sub_ps(a.im, b.im);
#else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE
ret.re = _mm_sub_ps(a.re, b.re);
ret.im = _mm_sub_ps(a.im, b.im);
#else /* LV_HAVE_SSE */
#ifdef HAVE_NEON
ret.val[0] = vsubq_f32(a.val[0],b.val[0]);
ret.val[1] = vsubq_f32(a.val[1],b.val[1]);
#endif /* HAVE_NEON */
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
return ret;
}
static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) { static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) {
simd_cf_t ret; simd_cf_t ret;
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
ret.re = _mm512_mul_ps(a.re, b); ret.re = _mm512_mul_ps(a.re, b);
ret.im = _mm512_mul_ps(a.im, b); ret.im = _mm512_mul_ps(a.im, b);
@ -855,7 +914,7 @@ static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) {
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */ #endif /* LV_HAVE_AVX512 */
return ret; return ret;
} }
static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) {
@ -902,6 +961,59 @@ static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) {
return ret; return ret;
} }
static inline simd_cf_t srslte_simd_cf_neg (simd_cf_t a) {
simd_cf_t ret;
#if LV_HAVE_NEON
ret.val[0] = srslte_simd_f_neg(a.val[0]);
ret.val[1] = srslte_simd_f_neg(a.val[1]);
#else /* LV_HAVE_NEON */
ret.re = srslte_simd_f_neg(a.re);
ret.im = srslte_simd_f_neg(a.im);
#endif /* LV_HAVE_NEON */
return ret;
}
static inline simd_cf_t srslte_simd_cf_neg_mask (simd_cf_t a, simd_f_t mask) {
simd_cf_t ret;
#ifndef LV_HAVE_AVX512
#ifdef LV_HAVE_AVX2
mask = _mm256_permutevar8x32_ps(mask, _mm256_setr_epi32(0,4,1,5,2,6,3,7));
#endif /* LV_HAVE_AVX2 */
#endif /* LV_HAVE_AVX512 */
#if LV_HAVE_NEON
ret.val[0] = srslte_simd_f_neg_mask(a.val[0], mask);
ret.val[1] = srslte_simd_f_neg_mask(a.val[1], mask);
#else /* LV_HAVE_NEON */
ret.re = srslte_simd_f_neg_mask(a.re, mask);
ret.im = srslte_simd_f_neg_mask(a.im, mask);
#endif /* LV_HAVE_NEON */
return ret;
}
static inline simd_cf_t srslte_simd_cf_conj (simd_cf_t a) {
simd_cf_t ret;
#if LV_HAVE_NEON
ret.val[0] = a.val[0];
ret.val[1] = srslte_simd_f_neg(a.val[1]);
#else /* LV_HAVE_NEON */
ret.re = a.re;
ret.im = srslte_simd_f_neg(a.im);
#endif /* LV_HAVE_NEON */
return ret;
}
static inline simd_cf_t srslte_simd_cf_mulj (simd_cf_t a) {
simd_cf_t ret;
#if LV_HAVE_NEON
ret.val[0] = srslte_simd_f_neg(a.val[1]);
ret.val[1] = a.val[0];
#else /* LV_HAVE_NEON */
ret.re = srslte_simd_f_neg(a.im);
ret.im = a.re;
#endif /* LV_HAVE_NEON */
return ret;
}
static inline simd_cf_t srslte_simd_cf_zero (void) { static inline simd_cf_t srslte_simd_cf_zero (void) {
simd_cf_t ret; simd_cf_t ret;
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
@ -1057,7 +1169,7 @@ static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t s
int* sel = (int*) &selector; int* sel = (int*) &selector;
int* c_ptr = (int*) &ret; int* c_ptr = (int*) &ret;
for(int i = 0;i<4;i++) for(int i = 0;i<4;i++)
{ {
if(sel[i] == -1){ if(sel[i] == -1){
c_ptr[i] = b_ptr[i]; c_ptr[i] = b_ptr[i];
}else{ }else{
@ -1115,7 +1227,7 @@ static inline simd_s_t srslte_simd_s_loadu(const int16_t *ptr) {
#ifdef LV_HAVE_AVX512 #ifdef LV_HAVE_AVX512
return _mm512_loadu_si512(ptr); return _mm512_loadu_si512(ptr);
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
return _mm256_loadu_si256((__m256i*) ptr); return _mm256_loadu_si256((__m256i*) ptr);
#else /* LV_HAVE_AVX2 */ #else /* LV_HAVE_AVX2 */
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE

@ -54,7 +54,7 @@ int srslte_layermap_multiplex(cf_t *d[SRSLTE_MAX_CODEWORDS], cf_t *x[SRSLTE_MAX_
int nof_symbols[SRSLTE_MAX_CODEWORDS]) { int nof_symbols[SRSLTE_MAX_CODEWORDS]) {
if (nof_cw == nof_layers) { if (nof_cw == nof_layers) {
for (int i = 0; i < nof_cw; i++) { for (int i = 0; i < nof_cw; i++) {
srs_vec_cf_cpy(x[i], d[i], (uint32_t) nof_symbols[0]); srs_vec_cf_cpy(d[i], x[i], (uint32_t) nof_symbols[0]);
} }
return nof_symbols[0]; return nof_symbols[0];
} else if (nof_cw == 1) { } else if (nof_cw == 1) {

@ -283,13 +283,13 @@ int srslte_predecoding_single_csi(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_
for (; i < nof_symbols; i++) { for (; i < nof_symbols; i++) {
cf_t r = 0; cf_t r = 0;
float hh = 0; float hh = 0;
float _scaling = 1.0f / scaling; float norm = 1.0f / scaling;
for (int p = 0; p < nof_rxant; p++) { for (int p = 0; p < nof_rxant; p++) {
r += y[p][i] * conj(h[p][i]); r += y[p][i] * conj(h[p][i]);
hh += (__real__ h[p][i] * __real__ h[p][i]) + (__imag__ h[p][i] * __imag__ h[p][i]); hh += (__real__ h[p][i] * __real__ h[p][i]) + (__imag__ h[p][i] * __imag__ h[p][i]);
} }
csi[i] = hh + noise_estimate; csi[i] = hh + noise_estimate;
x[i] = r * _scaling / csi[i]; x[i] = r * norm / csi[i];
} }
return nof_symbols; return nof_symbols;
} }
@ -327,10 +327,10 @@ int srslte_predecoding_single(cf_t *y_, cf_t *h_, cf_t *x, float *csi, int nof_s
} }
/* ZF/MMSE SISO equalizer x=y(h'h+no)^(-1)h' (ZF if n0=0.0)*/ /* ZF/MMSE SISO equalizer x=y(h'h+no)^(-1)h' (ZF if n0=0.0)*/
int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, float *csi, int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, float *csi[SRSLTE_MAX_CODEWORDS],
int nof_rxant, int nof_symbols, float scaling, float noise_estimate) { int nof_rxant, int nof_symbols, float scaling, float noise_estimate) {
if (csi) { if (csi && csi[0]) {
return srslte_predecoding_single_csi(y, h, x, csi, nof_rxant, nof_symbols, scaling, noise_estimate); return srslte_predecoding_single_csi(y, h, x, csi[0], nof_rxant, nof_symbols, scaling, noise_estimate);
} }
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX
@ -566,18 +566,123 @@ int srslte_predecoding_diversity(cf_t *y_, cf_t *h_[SRSLTE_MAX_PORTS], cf_t *x[S
#endif #endif
} }
int srslte_predecoding_diversity_csi(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS], float *csi[SRSLTE_MAX_CODEWORDS],
int nof_rxant, int nof_ports, int nof_symbols, float scaling) {
int i;
if (nof_ports == 2) {
cf_t h00, h01, h10, h11, r0, r1;
for (i = 0; i < nof_symbols / 2; i++) {
float hh = 0;
cf_t x0 = 0;
cf_t x1 = 0;
for (int p=0;p<nof_rxant;p++) {
h00 = h[0][p][2 * i];
h01 = h[0][p][2 * i+1];
h10 = h[1][p][2 * i];
h11 = h[1][p][2 * i+1];
hh += crealf(h00) * crealf(h00) + cimagf(h00) * cimagf(h00)
+ crealf(h11) * crealf(h11) + cimagf(h11) * cimagf(h11);
r0 = y[p][2 * i];
r1 = y[p][2 * i + 1];
if (hh == 0) {
hh = 1e-4;
}
x0 += (conjf(h00) * r0 + h11 * conjf(r1));
x1 += (-h10 * conj(r0) + conj(h01) * r1);
}
csi[0][2*i + 0] = hh;
csi[0][2*i + 1] = hh;
hh *= scaling;
x[0][i] = x0 / hh * sqrt(2);
x[1][i] = x1 / hh * sqrt(2);
}
return i;
} else if (nof_ports == 4) {
int m_ap = (nof_symbols % 4) ? ((nof_symbols - 2) / 4) : nof_symbols / 4;
for (i = 0; i < m_ap; i++) {
cf_t x0 = 0, x1 = 0, x2 = 0, x3 = 0;
float a0 = 0, a1 = 0, a2 = 0, a3 = 0;
cf_t r0, r1, r2, r3;
cf_t h00, h01, h10, h11;
for (int p=0;p<nof_rxant;p++) {
h00 = h[0][p][4 * i + 0];
h01 = h[2][p][4 * i + 0];
h10 = h[0][p][4 * i + 1];
h11 = h[2][p][4 * i + 1];
a0 += __real__ h00 * __real__ h00 + __imag__ h00 * __imag__ h00
+ __real__ h11 * __real__ h11 + __imag__ h11 * __imag__ h11;
a1 += __real__ h10 * __real__ h10 + __imag__ h10 * __imag__ h10
+ __real__ h01 * __real__ h01 + __imag__ h01 * __imag__ h01;
r0 = y[p][4 * i];
r1 = y[p][4 * i + 1];
x0 += (conjf(h00) * r0 + h11 * conjf(r1));
x1 += (-h01 * conjf(r0) + conjf(h10) * r1);
h00 = h[1][p][4 * i + 2];
h01 = h[3][p][4 * i + 2];
h10 = h[1][p][4 * i + 3];
h11 = h[3][p][4 * i + 3];
a2 += __real__ h00 * __real__ h00 + __imag__ h00 * __imag__ h00
+ __real__ h11 * __real__ h11 + __imag__ h11 * __imag__ h11;
a3 += __real__ h10 * __real__ h10 + __imag__ h10 * __imag__ h10
+ __real__ h01 * __real__ h01 + __imag__ h01 * __imag__ h01;
r2 = y[p][4 * i + 2];
r3 = y[p][4 * i + 3];
x2 += (conjf(h00) * r2 + h11 * conjf(r3));
x3 += (-h01 * conjf(r2) + conjf(h10) * r3);
}
a0 *= scaling;
a1 *= scaling;
a2 *= scaling;
a3 *= scaling;
csi[0][4 * i + 0] = a0 / nof_rxant;
csi[0][4 * i + 1] = a1 / nof_rxant;
csi[0][4 * i + 2] = a2 / nof_rxant;
csi[0][4 * i + 3] = a3 / nof_rxant;
x[0][i] = x0 / a0 * sqrtf(2.0f);
x[1][i] = x1 / a1 * sqrtf(2.0f);
x[2][i] = x2 / a2 * sqrtf(2.0f);
x[3][i] = x3 / a3 * sqrtf(2.0f);
}
return i;
} else {
fprintf(stderr, "Number of ports must be 2 or 4 for transmit diversity (nof_ports=%d)\n", nof_ports);
return -1;
}
}
int srslte_predecoding_diversity_multi(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int srslte_predecoding_diversity_multi(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS],
int nof_rxant, int nof_ports, int nof_symbols, float scaling) float *csi[SRSLTE_MAX_CODEWORDS], int nof_rxant, int nof_ports, int nof_symbols, float scaling)
{ {
#ifdef LV_HAVE_SSE if (csi && csi[0]) {
if (nof_symbols > 32 && nof_ports == 2) { return srslte_predecoding_diversity_csi(y, h, x, csi, nof_rxant, nof_ports, nof_symbols, scaling);
return srslte_predecoding_diversity2_sse(y, h, x, nof_rxant, nof_symbols, scaling);
} else { } else {
#ifdef LV_HAVE_SSE
if (nof_symbols > 32 && nof_ports == 2) {
return srslte_predecoding_diversity2_sse(y, h, x, nof_rxant, nof_symbols, scaling);
} else {
return srslte_predecoding_diversity_gen(y, h, x, nof_rxant, nof_ports, nof_symbols, scaling);
}
#else
return srslte_predecoding_diversity_gen(y, h, x, nof_rxant, nof_ports, nof_symbols, scaling); return srslte_predecoding_diversity_gen(y, h, x, nof_rxant, nof_ports, nof_symbols, scaling);
#endif
} }
#else
return srslte_predecoding_diversity_gen(y, h, x, nof_rxant, nof_ports, nof_symbols, scaling);
#endif
} }
int srslte_precoding_mimo_2x2_gen(cf_t W[2][2], cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int srslte_precoding_mimo_2x2_gen(cf_t W[2][2], cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS],
@ -614,102 +719,164 @@ int srslte_precoding_mimo_2x2_gen(cf_t W[2][2], cf_t *y[SRSLTE_MAX_PORTS], cf_t
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }
// AVX implementation of ZF 2x2 CCD equalizer static int srslte_predecoding_ccd_2x2_zf_csi(cf_t *y[SRSLTE_MAX_PORTS],
#ifdef LV_HAVE_AVX cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS],
int srslte_predecoding_ccd_2x2_zf_avx(cf_t *y[SRSLTE_MAX_PORTS], float *csi[SRSLTE_MAX_CODEWORDS],
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], int nof_symbols,
cf_t *x[SRSLTE_MAX_LAYERS], float scaling) {
uint32_t nof_symbols,
float scaling) {
uint32_t i = 0; uint32_t i = 0;
float norm = 2.0f / scaling;
#if SRSLTE_SIMD_CF_SIZE != 0
#if SRSLTE_SIMD_CF_SIZE == 16
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f,
+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f
-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 8
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 4
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, +0.0f, -0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, -0.0f, +0.0f, +0.0f};
#endif
__m256 mask0 = _mm256_setr_ps(+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f); simd_f_t mask1 = srslte_simd_f_loadu(_mask1);
__m256 mask1 = _mm256_setr_ps(-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f); simd_f_t mask2 = srslte_simd_f_loadu(_mask2);
for (i = 0; i < nof_symbols - 3; i += 4) { for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
/* Load channel */ /* Load channel */
__m256 h00i = _mm256_load_ps((float *) &h[0][0][i]); simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
__m256 h01i = _mm256_load_ps((float *) &h[0][1][i]); simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
__m256 h10i = _mm256_load_ps((float *) &h[1][0][i]); simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
__m256 h11i = _mm256_load_ps((float *) &h[1][1][i]); simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
/* Apply precoding */ /* Apply precoding */
__m256 h00 = _mm256_add_ps(h00i, _mm256_xor_ps(h10i, mask0)); simd_cf_t h00, h01, h10, h11;
__m256 h10 = _mm256_add_ps(h01i, _mm256_xor_ps(h11i, mask0)); h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask1));
__m256 h01 = _mm256_add_ps(h00i, _mm256_xor_ps(h10i, mask1)); h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask1));
__m256 h11 = _mm256_add_ps(h01i, _mm256_xor_ps(h11i, mask1)); h01 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask2));
h11 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask2));
__m256 y0 = _mm256_load_ps((float *) &y[0][i]); simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
__m256 y1 = _mm256_load_ps((float *) &y[1][i]); simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
__m256 x0, x1; simd_cf_t x0, x1;
simd_f_t csi0, csi1;
srslte_mat_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f / scaling); srslte_mat_2x2_zf_csi_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, &csi0, &csi1, norm);
_mm256_store_ps((float *) &x[0][i], x0); srslte_simd_cfi_store(&x[0][i], x0);
_mm256_store_ps((float *) &x[1][i], x1); srslte_simd_cfi_store(&x[1][i], x1);
srslte_simd_f_store(&csi[0][i], csi0);
srslte_simd_f_store(&csi[1][i], csi1);
} }
#endif /* SRSLTE_SIMD_CF_SIZE != 0 */
return nof_symbols; cf_t h00, h01, h10, h11, det;
} for (; i < nof_symbols; i++) {
#endif /* LV_HAVE_AVX */
// SSE implementation of ZF 2x2 CCD equalizer // Even precoder
#ifdef LV_HAVE_SSE h00 = +h[0][0][i] + h[1][0][i];
h10 = +h[0][1][i] + h[1][1][i];
h01 = +h[0][0][i] - h[1][0][i];
h11 = +h[0][1][i] - h[1][1][i];
det = (h00 * h11 - h01 * h10);
det = conjf(det) * (norm / (crealf(det) * crealf(det) + cimagf(det) * cimagf(det)));
x[0][i] = (+h11 * y[0][i] - h01 * y[1][i]) * det;
x[1][i] = (-h10 * y[0][i] + h00 * y[1][i]) * det;
csi[0][i] = 1.0f;
csi[1][i] = 1.0f;
i++;
// Odd precoder
h00 = h[0][0][i] - h[1][0][i];
h10 = h[0][1][i] - h[1][1][i];
h01 = h[0][0][i] + h[1][0][i];
h11 = h[0][1][i] + h[1][1][i];
det = (h00 * h11 - h01 * h10);
det = conjf(det) * (norm / (crealf(det) * crealf(det) + cimagf(det) * cimagf(det)));
x[0][i] = (+h11 * y[0][i] - h01 * y[1][i]) * det;
x[1][i] = (-h10 * y[0][i] + h00 * y[1][i]) * det;
int srslte_predecoding_ccd_2x2_zf_sse(cf_t *y[SRSLTE_MAX_PORTS], csi[0][i] = 1.0f;
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], csi[1][i] = 1.0f;
cf_t *x[SRSLTE_MAX_LAYERS], }
uint32_t nof_symbols, return SRSLTE_SUCCESS;
float scaling) { }
static int srslte_predecoding_ccd_2x2_zf(cf_t *y[SRSLTE_MAX_PORTS],
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS],
int nof_symbols,
float scaling) {
uint32_t i = 0; uint32_t i = 0;
float norm = 2.0f / scaling;
#if SRSLTE_SIMD_CF_SIZE != 0
#if SRSLTE_SIMD_CF_SIZE == 16
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f,
+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f
-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 8
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 4
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, +0.0f, -0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, -0.0f, +0.0f, +0.0f};
#endif
simd_f_t mask1 = srslte_simd_f_loadu(_mask1);
simd_f_t mask2 = srslte_simd_f_loadu(_mask2);
for (i = 0; i < nof_symbols - 1; i += 2) { for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
/* Load channel */ /* Load channel */
__m128 h00i = _mm_load_ps((float *) &h[0][0][i]); simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
__m128 h01i = _mm_load_ps((float *) &h[0][1][i]); simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
__m128 h10i = _mm_load_ps((float *) &h[1][0][i]); simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
__m128 h11i = _mm_load_ps((float *) &h[1][1][i]); simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
/* Apply precoding */ /* Apply precoding */
__m128 h00 = _mm_add_ps(h00i, _mm_xor_ps(h10i, _mm_setr_ps(+0.0f, +0.0f, -0.0f, -0.0f))); simd_cf_t h00, h01, h10, h11;
__m128 h10 = _mm_add_ps(h01i, _mm_xor_ps(h11i, _mm_setr_ps(+0.0f, +0.0f, -0.0f, -0.0f))); h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask1));
__m128 h01 = _mm_add_ps(h00i, _mm_xor_ps(h10i, _mm_setr_ps(-0.0f, -0.0f, +0.0f, +0.0f))); h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask1));
__m128 h11 = _mm_add_ps(h01i, _mm_xor_ps(h11i, _mm_setr_ps(-0.0f, -0.0f, +0.0f, +0.0f))); h01 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask2));
h11 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask2));
__m128 y0 = _mm_load_ps((float *) &y[0][i]); simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
__m128 y1 = _mm_load_ps((float *) &y[1][i]); simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
__m128 x0, x1; simd_cf_t x0, x1;
srslte_mat_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, 2.0f / scaling); srslte_mat_2x2_zf_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, norm);
_mm_store_ps((float *) &x[0][i], x0); srslte_simd_cfi_store(&x[0][i], x0);
_mm_store_ps((float *) &x[1][i], x1); srslte_simd_cfi_store(&x[1][i], x1);
} }
#endif /* SRSLTE_SIMD_CF_SIZE != 0 */
return nof_symbols; cf_t h00, h01, h10, h11, det;
} for (; i < nof_symbols; i++) {
#endif
// Generic implementation of ZF 2x2 CCD equalizer
int srslte_predecoding_ccd_2x2_zf_gen(cf_t *y[SRSLTE_MAX_PORTS],
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS],
int nof_symbols,
float scaling) {
cf_t h00, h01, h10, h11;
for (int i = 0; i < nof_symbols; i++) {
// Even precoder // Even precoder
h00 = +h[0][0][i] + h[1][0][i]; h00 = +h[0][0][i] + h[1][0][i];
h10 = +h[0][1][i] + h[1][1][i]; h10 = +h[0][1][i] + h[1][1][i];
h01 = +h[0][0][i] - h[1][0][i]; h01 = +h[0][0][i] - h[1][0][i];
h11 = +h[0][1][i] - h[1][1][i]; h11 = +h[0][1][i] - h[1][1][i];
det = (h00 * h11 - h01 * h10);
det = conjf(det) * (norm / (crealf(det) * crealf(det) + cimagf(det) * cimagf(det)));
srslte_mat_2x2_zf_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], 2.0f / scaling); x[0][i] = (+h11 * y[0][i] - h01 * y[1][i]) * det;
x[1][i] = (-h10 * y[0][i] + h00 * y[1][i]) * det;
i++; i++;
@ -718,30 +885,35 @@ int srslte_predecoding_ccd_2x2_zf_gen(cf_t *y[SRSLTE_MAX_PORTS],
h10 = h[0][1][i] - h[1][1][i]; h10 = h[0][1][i] - h[1][1][i];
h01 = h[0][0][i] + h[1][0][i]; h01 = h[0][0][i] + h[1][0][i];
h11 = h[0][1][i] + h[1][1][i]; h11 = h[0][1][i] + h[1][1][i];
det = (h00 * h11 - h01 * h10);
det = conjf(det) * (norm / (crealf(det) * crealf(det) + cimagf(det) * cimagf(det)));
srslte_mat_2x2_zf_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], 2.0f / scaling); x[0][i] = (+h11 * y[0][i] - h01 * y[1][i]) * det;
x[1][i] = (-h10 * y[0][i] + h00 * y[1][i]) * det;
} }
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }
int srslte_predecoding_ccd_zf(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], static int srslte_predecoding_ccd_zf(cf_t *y[SRSLTE_MAX_PORTS],
int nof_rxant, int nof_ports, int nof_layers, int nof_symbols, float scaling) cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
{ cf_t *x[SRSLTE_MAX_LAYERS],
float *csi[SRSLTE_MAX_CODEWORDS],
int nof_rxant,
int nof_ports,
int nof_layers,
int nof_symbols,
float scaling) {
if (nof_ports == 2 && nof_rxant == 2) { if (nof_ports == 2 && nof_rxant == 2) {
if (nof_layers == 2) { if (nof_layers == 2) {
#ifdef LV_HAVE_AVX if (csi && csi[0]) {
return srslte_predecoding_ccd_2x2_zf_avx(y, h, x, nof_symbols, scaling); return srslte_predecoding_ccd_2x2_zf_csi(y, h, x, csi, nof_symbols, scaling);
#else } else {
#ifdef LV_HAVE_SSE return srslte_predecoding_ccd_2x2_zf(y, h, x, nof_symbols, scaling);
return srslte_predecoding_ccd_2x2_zf_sse(y, h, x, nof_symbols, scaling); }
#else
return srslte_predecoding_ccd_2x2_zf_gen(y, h, x, nof_symbols, scaling);
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX */
} else { } else {
DEBUG("Error predecoding CCD: Invalid number of layers %d\n", nof_layers); DEBUG("Error predecoding CCD: Invalid number of layers %d\n", nof_layers);
return -1; return -1;
} }
} else if (nof_ports == 4) { } else if (nof_ports == 4) {
DEBUG("Error predecoding CCD: Only 2 ports supported\n"); DEBUG("Error predecoding CCD: Only 2 ports supported\n");
} else { } else {
@ -750,86 +922,155 @@ int srslte_predecoding_ccd_zf(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORT
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
// AVX implementation of MMSE 2x2 CCD equalizer static int srslte_predecoding_ccd_2x2_mmse_csi(cf_t *y[SRSLTE_MAX_PORTS],
#ifdef LV_HAVE_AVX cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS],
float *csi[SRSLTE_MAX_CODEWORDS],
int nof_symbols, float scaling, float noise_estimate) {
int i = 0;
float norm = 2.0f / scaling;
#if SRSLTE_SIMD_CF_SIZE != 0
#if SRSLTE_SIMD_CF_SIZE == 16
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f,
+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f,
-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 8
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 4
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, -0.0f, +0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, +0.0f, -0.0f, +0.0f};
#endif
int srslte_predecoding_ccd_2x2_mmse_avx(cf_t *y[SRSLTE_MAX_PORTS], simd_f_t mask1 = srslte_simd_f_loadu(_mask1);
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], simd_f_t mask2 = srslte_simd_f_loadu(_mask2);
cf_t *x[SRSLTE_MAX_LAYERS],
uint32_t nof_symbols, float scaling, float noise_estimate) {
uint32_t i = 0;
for (i = 0; i < nof_symbols - 3; i += 4) { for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
/* Load channel */ /* Load channel */
__m256 h00i = _mm256_load_ps((float *) &h[0][0][i]); simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
__m256 h01i = _mm256_load_ps((float *) &h[0][1][i]); simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
__m256 h10i = _mm256_load_ps((float *) &h[1][0][i]); simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
__m256 h11i = _mm256_load_ps((float *) &h[1][1][i]); simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
/* Apply precoding */ /* Apply precoding */
__m256 h00 = _mm256_add_ps(h00i, _mm256_xor_ps(h10i, _mm256_setr_ps(+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f))); simd_cf_t h00, h01, h10, h11;
__m256 h10 = _mm256_add_ps(h01i, _mm256_xor_ps(h11i, _mm256_setr_ps(+0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f))); h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask1));
__m256 h01 = _mm256_add_ps(h00i, _mm256_xor_ps(h10i, _mm256_setr_ps(-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f))); h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask1));
__m256 h11 = _mm256_add_ps(h01i, _mm256_xor_ps(h11i, _mm256_setr_ps(-0.0f, -0.0f, +0.0f, +0.0f, -0.0f, -0.0f, +0.0f, +0.0f))); h01 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask2));
h11 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask2));
__m256 y0 = _mm256_load_ps((float *) &y[0][i]); simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
__m256 y1 = _mm256_load_ps((float *) &y[1][i]); simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
__m256 x0, x1; simd_cf_t x0, x1;
simd_f_t csi0, csi1;
srslte_mat_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f / scaling); srslte_mat_2x2_mmse_csi_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, &csi0, &csi1, noise_estimate, norm);
_mm256_store_ps((float *) &x[0][i], x0); srslte_simd_cfi_store(&x[0][i], x0);
_mm256_store_ps((float *) &x[1][i], x1); srslte_simd_cfi_store(&x[1][i], x1);
srslte_simd_f_store(&csi[0][i], csi0);
srslte_simd_f_store(&csi[1][i], csi1);
} }
#endif /* SRSLTE_SIMD_CF_SIZE != 0 */
return nof_symbols; cf_t h00, h01, h10, h11;
for (; i < nof_symbols; i++) {
// Even precoder
h00 = +h[0][0][i] + h[1][0][i];
h10 = +h[0][1][i] + h[1][1][i];
h01 = +h[0][0][i] - h[1][0][i];
h11 = +h[0][1][i] - h[1][1][i];
srslte_mat_2x2_mmse_csi_gen(y[0][i],
y[1][i],
h00,
h01,
h10,
h11,
&x[0][i],
&x[1][i],
&csi[0][i],
&csi[1][i],
noise_estimate,
norm);
i++;
// Odd precoder
h00 = h[0][0][i] - h[1][0][i];
h10 = h[0][1][i] - h[1][1][i];
h01 = h[0][0][i] + h[1][0][i];
h11 = h[0][1][i] + h[1][1][i];
srslte_mat_2x2_mmse_csi_gen(y[0][i],
y[1][i],
h00,
h01,
h10,
h11,
&x[0][i],
&x[1][i],
&csi[0][i],
&csi[1][i],
noise_estimate,
norm);
}
return SRSLTE_SUCCESS;
} }
#endif /* LV_HAVE_AVX */
// SSE implementation of ZF 2x2 CCD equalizer static int srslte_predecoding_ccd_2x2_mmse(cf_t *y[SRSLTE_MAX_PORTS],
#ifdef LV_HAVE_SSE cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS],
int nof_symbols, float scaling, float noise_estimate) {
int i = 0;
float norm = 2.0f / scaling;
#if SRSLTE_SIMD_CF_SIZE != 0
#if SRSLTE_SIMD_CF_SIZE == 16
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f,
+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f,
-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 8
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f, -0.0f, +0.0f};
#elif SRSLTE_SIMD_CF_SIZE == 4
float _mask1[SRSLTE_SIMD_CF_SIZE] = {+0.0f, -0.0f, +0.0f, -0.0f};
float _mask2[SRSLTE_SIMD_CF_SIZE] = {-0.0f, +0.0f, -0.0f, +0.0f};
#endif
int srslte_predecoding_ccd_2x2_mmse_sse(cf_t *y[SRSLTE_MAX_PORTS], simd_f_t mask1 = srslte_simd_f_loadu(_mask1);
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], simd_f_t mask2 = srslte_simd_f_loadu(_mask2);
cf_t *x[SRSLTE_MAX_LAYERS],
uint32_t nof_symbols, float scaling, float noise_estimate) {
uint32_t i = 0;
for (i = 0; i < nof_symbols - 1; i += 2) { for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
/* Load channel */ /* Load channel */
__m128 h00i = _mm_load_ps((float *) &h[0][0][i]); simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
__m128 h01i = _mm_load_ps((float *) &h[0][1][i]); simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
__m128 h10i = _mm_load_ps((float *) &h[1][0][i]); simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
__m128 h11i = _mm_load_ps((float *) &h[1][1][i]); simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
/* Apply precoding */ /* Apply precoding */
__m128 h00 = _mm_add_ps(h00i, _mm_xor_ps(h10i, _mm_setr_ps(+0.0f, +0.0f, -0.0f, -0.0f))); simd_cf_t h00, h01, h10, h11;
__m128 h10 = _mm_add_ps(h01i, _mm_xor_ps(h11i, _mm_setr_ps(+0.0f, +0.0f, -0.0f, -0.0f))); h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask1));
__m128 h01 = _mm_add_ps(h00i, _mm_xor_ps(h10i, _mm_setr_ps(-0.0f, -0.0f, +0.0f, +0.0f))); h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask1));
__m128 h11 = _mm_add_ps(h01i, _mm_xor_ps(h11i, _mm_setr_ps(-0.0f, -0.0f, +0.0f, +0.0f))); h01 = srslte_simd_cf_add(h00i, srslte_simd_cf_neg_mask(h10i, mask2));
h11 = srslte_simd_cf_add(h01i, srslte_simd_cf_neg_mask(h11i, mask2));
__m128 y0 = _mm_load_ps((float *) &y[0][i]); simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
__m128 y1 = _mm_load_ps((float *) &y[1][i]); simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
__m128 x0, x1; simd_cf_t x0, x1;
srslte_mat_2x2_mmse_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm);
srslte_mat_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, 2.0f / scaling); srslte_simd_cfi_store(&x[0][i], x0);
srslte_simd_cfi_store(&x[1][i], x1);
_mm_store_ps((float *) &x[0][i], x0);
_mm_store_ps((float *) &x[1][i], x1);
} }
#endif /* SRSLTE_SIMD_CF_SIZE != 0 */
return nof_symbols;
}
#endif /* LV_HAVE_SSE */
// Generic implementation of ZF 2x2 CCD equalizer
int srslte_predecoding_ccd_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS],
int nof_symbols, float scaling, float noise_estimate) {
cf_t h00, h01, h10, h11; cf_t h00, h01, h10, h11;
for (i = 0; i < nof_symbols; i++) {
for (int i = 0; i < nof_symbols; i++) {
// Even precoder // Even precoder
h00 = +h[0][0][i] + h[1][0][i]; h00 = +h[0][0][i] + h[1][0][i];
@ -850,21 +1091,23 @@ int srslte_predecoding_ccd_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLT
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }
int srslte_predecoding_ccd_mmse(cf_t *y[SRSLTE_MAX_PORTS],
int srslte_predecoding_ccd_mmse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
int nof_rxant, int nof_ports, int nof_layers, int nof_symbols, float scaling, float noise_estimate) cf_t *x[SRSLTE_MAX_LAYERS],
{ float *csi[SRSLTE_MAX_CODEWORDS],
int nof_rxant,
int nof_ports,
int nof_layers,
int nof_symbols,
float scaling,
float noise_estimate) {
if (nof_ports == 2 && nof_rxant == 2) { if (nof_ports == 2 && nof_rxant == 2) {
if (nof_layers == 2) { if (nof_layers == 2) {
#ifdef LV_HAVE_AVX if (csi && csi[0])
return srslte_predecoding_ccd_2x2_mmse_avx(y, h, x, nof_symbols, scaling, noise_estimate); return srslte_predecoding_ccd_2x2_mmse_csi(y, h, x, csi, nof_symbols, scaling, noise_estimate);
#else else {
#ifdef LV_HAVE_SSE return srslte_predecoding_ccd_2x2_mmse(y, h, x, nof_symbols, scaling, noise_estimate);
return srslte_predecoding_ccd_2x2_mmse_sse(y, h, x, nof_symbols, scaling, noise_estimate); }
#else
return srslte_predecoding_ccd_2x2_mmse_gen(y, h, x, nof_symbols, scaling, noise_estimate);
#endif /* LV_HAVE_SSE */
#endif /* LV_HAVE_AVX */
} else { } else {
DEBUG("Error predecoding CCD: Invalid number of layers %d\n", nof_layers); DEBUG("Error predecoding CCD: Invalid number of layers %d\n", nof_layers);
return -1; return -1;
@ -1468,7 +1711,7 @@ void srslte_predecoding_set_mimo_decoder (srslte_mimo_decoder_t _mimo_decoder) {
/* 36.211 v10.3.0 Section 6.3.4 */ /* 36.211 v10.3.0 Section 6.3.4 */
int srslte_predecoding_type(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], int srslte_predecoding_type(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
cf_t *x[SRSLTE_MAX_LAYERS], float *csi, int nof_rxant, int nof_ports, int nof_layers, cf_t *x[SRSLTE_MAX_LAYERS], float *csi[SRSLTE_MAX_CODEWORDS], int nof_rxant, int nof_ports, int nof_layers,
int codebook_idx, int nof_symbols, srslte_mimo_type_t type, float scaling, int codebook_idx, int nof_symbols, srslte_mimo_type_t type, float scaling,
float noise_estimate) { float noise_estimate) {
@ -1488,10 +1731,10 @@ int srslte_predecoding_type(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS]
if (nof_layers >= 2 && nof_layers <= 4) { if (nof_layers >= 2 && nof_layers <= 4) {
switch (mimo_decoder) { switch (mimo_decoder) {
case SRSLTE_MIMO_DECODER_ZF: case SRSLTE_MIMO_DECODER_ZF:
return srslte_predecoding_ccd_zf(y, h, x, nof_rxant, nof_ports, nof_layers, nof_symbols, scaling); return srslte_predecoding_ccd_zf(y, h, x, csi, nof_rxant, nof_ports, nof_layers, nof_symbols, scaling);
break; break;
case SRSLTE_MIMO_DECODER_MMSE: case SRSLTE_MIMO_DECODER_MMSE:
return srslte_predecoding_ccd_mmse(y, h, x, nof_rxant, nof_ports, nof_layers, nof_symbols, scaling, noise_estimate); return srslte_predecoding_ccd_mmse(y, h, x, csi, nof_rxant, nof_ports, nof_layers, nof_symbols, scaling, noise_estimate);
break; break;
} }
} else { } else {
@ -1510,7 +1753,7 @@ int srslte_predecoding_type(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS]
break; break;
case SRSLTE_MIMO_TYPE_TX_DIVERSITY: case SRSLTE_MIMO_TYPE_TX_DIVERSITY:
if (nof_ports == nof_layers) { if (nof_ports == nof_layers) {
return srslte_predecoding_diversity_multi(y, h, x, nof_rxant, nof_ports, nof_symbols, scaling); return srslte_predecoding_diversity_multi(y, h, x, csi, nof_rxant, nof_ports, nof_symbols, scaling);
} else { } else {
fprintf(stderr, fprintf(stderr,
"Error number of layers must equal number of ports in transmit diversity\n"); "Error number of layers must equal number of ports in transmit diversity\n");

@ -221,7 +221,7 @@ int srslte_pcfich_decode_multi(srslte_pcfich_t *q, cf_t *sf_symbols[SRSLTE_MAX_P
/* no need for layer demapping */ /* no need for layer demapping */
srslte_predecoding_single_multi(q_symbols, q_ce[0], q->d, NULL, q->nof_rx_antennas, q->nof_symbols, 1.0f, noise_estimate); srslte_predecoding_single_multi(q_symbols, q_ce[0], q->d, NULL, q->nof_rx_antennas, q->nof_symbols, 1.0f, noise_estimate);
} else { } else {
srslte_predecoding_diversity_multi(q_symbols, q_ce, x, q->nof_rx_antennas, q->cell.nof_ports, q->nof_symbols, 1.0f); srslte_predecoding_diversity_multi(q_symbols, q_ce, x, NULL, q->nof_rx_antennas, q->cell.nof_ports, q->nof_symbols, 1.0f);
srslte_layerdemap_diversity(x, q->d, q->cell.nof_ports, q->nof_symbols / q->cell.nof_ports); srslte_layerdemap_diversity(x, q->d, q->cell.nof_ports, q->nof_symbols / q->cell.nof_ports);
} }

@ -492,7 +492,7 @@ int srslte_pdcch_extract_llr_multi(srslte_pdcch_t *q, cf_t *sf_symbols[SRSLTE_MA
/* no need for layer demapping */ /* no need for layer demapping */
srslte_predecoding_single_multi(q->symbols, q->ce[0], q->d, NULL, q->nof_rx_antennas, nof_symbols, 1.0f, noise_estimate/2); srslte_predecoding_single_multi(q->symbols, q->ce[0], q->d, NULL, q->nof_rx_antennas, nof_symbols, 1.0f, noise_estimate/2);
} else { } else {
srslte_predecoding_diversity_multi(q->symbols, q->ce, x, q->nof_rx_antennas, q->cell.nof_ports, nof_symbols, 1.0f); srslte_predecoding_diversity_multi(q->symbols, q->ce, x, NULL, q->nof_rx_antennas, q->cell.nof_ports, nof_symbols, 1.0f);
srslte_layerdemap_diversity(x, q->d, q->cell.nof_ports, nof_symbols / q->cell.nof_ports); srslte_layerdemap_diversity(x, q->d, q->cell.nof_ports, nof_symbols / q->cell.nof_ports);
} }

@ -402,7 +402,7 @@ int srslte_pdsch_enable_csi(srslte_pdsch_t *q, bool enable) {
if (enable) { if (enable) {
for (int i = 0; i < SRSLTE_MAX_CODEWORDS; i++) { for (int i = 0; i < SRSLTE_MAX_CODEWORDS; i++) {
if (!q->csi[i]) { if (!q->csi[i]) {
q->csi[i] = srslte_vec_malloc(sizeof(float) * q->max_re); q->csi[i] = srslte_vec_malloc(sizeof(float) * q->max_re * 2);
if (!q->csi[i]) { if (!q->csi[i]) {
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
@ -757,7 +757,7 @@ int srslte_pdsch_decode(srslte_pdsch_t *q,
} }
// Pre-decoder // Pre-decoder
if (srslte_predecoding_type(q->symbols, q->ce, x, q->csi[0], q->nof_rx_antennas, q->cell.nof_ports, cfg->nof_layers, if (srslte_predecoding_type(q->symbols, q->ce, x, q->csi, q->nof_rx_antennas, q->cell.nof_ports, cfg->nof_layers,
cfg->codebook_idx, cfg->nbits[0].nof_re, cfg->mimo_type, pdsch_scaling, noise_estimate)<0) { cfg->codebook_idx, cfg->nbits[0].nof_re, cfg->mimo_type, pdsch_scaling, noise_estimate)<0) {
DEBUG("Error predecoding\n"); DEBUG("Error predecoding\n");
return SRSLTE_ERROR; return SRSLTE_ERROR;

@ -241,7 +241,7 @@ int srslte_phich_decode(srslte_phich_t *q, cf_t *sf_symbols[SRSLTE_MAX_PORTS],
/* no need for layer demapping */ /* no need for layer demapping */
srslte_predecoding_single_multi(q_sf_symbols, q_ce[0], q->d0, NULL, q->nof_rx_antennas, SRSLTE_PHICH_MAX_NSYMB, 1.0f, noise_estimate); srslte_predecoding_single_multi(q_sf_symbols, q_ce[0], q->d0, NULL, q->nof_rx_antennas, SRSLTE_PHICH_MAX_NSYMB, 1.0f, noise_estimate);
} else { } else {
srslte_predecoding_diversity_multi(q_sf_symbols, q_ce, x, q->nof_rx_antennas, q->cell.nof_ports, SRSLTE_PHICH_MAX_NSYMB, 1.0f); srslte_predecoding_diversity_multi(q_sf_symbols, q_ce, x, NULL, q->nof_rx_antennas, q->cell.nof_ports, SRSLTE_PHICH_MAX_NSYMB, 1.0f);
srslte_layerdemap_diversity(x, q->d0, q->cell.nof_ports, SRSLTE_PHICH_MAX_NSYMB / q->cell.nof_ports); srslte_layerdemap_diversity(x, q->d0, q->cell.nof_ports, SRSLTE_PHICH_MAX_NSYMB / q->cell.nof_ports);
} }
DEBUG("Recv!!: \n"); DEBUG("Recv!!: \n");

@ -60,8 +60,8 @@ inline void srslte_mat_2x2_zf_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10
} }
/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */ /* Generic implementation for Minimum Mean Squared Error (MMSE) solver */
inline void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11, inline void srslte_mat_2x2_mmse_csi_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1, float noise_estimate, float norm) { cf_t *x0, cf_t *x1, float *csi0, float *csi1, float noise_estimate, float norm) {
/* Create conjugated matrix */ /* Create conjugated matrix */
cf_t _h00 = conjf(h00); cf_t _h00 = conjf(h00);
cf_t _h01 = conjf(h01); cf_t _h01 = conjf(h01);
@ -73,14 +73,14 @@ inline void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h
cf_t a01 = _h00 * h01 + _h10 * h11; cf_t a01 = _h00 * h01 + _h10 * h11;
cf_t a10 = _h01 * h00 + _h11 * h10; cf_t a10 = _h01 * h00 + _h11 * h10;
cf_t a11 = _h01 * h01 + _h11 * h11 + noise_estimate; cf_t a11 = _h01 * h01 + _h11 * h11 + noise_estimate;
cf_t a_det_rcp = srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(a00, a01, a10, a11));
/* 2. B = inv(H' x H + No) = inv(A) */ /* 2. B = inv(H' x H + No) = inv(A) */
cf_t b00 = a11; cf_t _norm = norm * a_det_rcp;
cf_t b01 = -a01; cf_t b00 = a11 * _norm;
cf_t b10 = -a10; cf_t b01 = -a01 * _norm;
cf_t b11 = a00; cf_t b10 = -a10 * _norm;
cf_t _norm = norm * srslte_mat_cf_recip_gen(srslte_mat_2x2_det_gen(a00, a01, a10, a11)); cf_t b11 = a00 * _norm;
/* 3. W = inv(H' x H + No) x H' = B x H' */ /* 3. W = inv(H' x H + No) x H' = B x H' */
cf_t w00 = b00 * _h00 + b01 * _h01; cf_t w00 = b00 * _h00 + b01 * _h01;
@ -89,8 +89,19 @@ inline void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h
cf_t w11 = b10 * _h10 + b11 * _h11; cf_t w11 = b10 * _h10 + b11 * _h11;
/* 4. X = W x Y */ /* 4. X = W x Y */
*x0 = (y0 * w00 + y1 * w01) * _norm; *x0 = (y0 * w00 + y1 * w01);
*x1 = (y0 * w10 + y1 * w11) * _norm; *x1 = (y0 * w10 + y1 * w11);
/* 5. Set CSI */
*csi0 = 1.0f / crealf(b00);
*csi1 = 1.0f / crealf(b11);
}
/* Generic implementation for Minimum Mean Squared Error (MMSE) solver */
void srslte_mat_2x2_mmse_gen(cf_t y0, cf_t y1, cf_t h00, cf_t h01, cf_t h10, cf_t h11,
cf_t *x0, cf_t *x1, float noise_estimate, float norm) {
float csi0, csi1;
srslte_mat_2x2_mmse_csi_gen(y0, y1, h00, h01, h10, h11, x0, x1, &csi0, &csi1, noise_estimate, norm);
} }
inline float srslte_mat_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) { inline float srslte_mat_2x2_cn(cf_t h00, cf_t h01, cf_t h10, cf_t h11) {

@ -32,8 +32,8 @@
#include <sys/time.h> #include <sys/time.h>
#include "srslte/phy/utils/mat.h" #include "srslte/phy/utils/mat.h"
#include "srslte/phy/utils/simd.h"
#include "srslte/phy/utils/vector.h" #include "srslte/phy/utils/vector.h"
#include "srslte/phy/utils/vector_simd.h"
bool zf_solver = false; bool zf_solver = false;
@ -378,6 +378,98 @@ bool test_mmse_solver_avx(void) {
#endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX */
#if SRSLTE_SIMD_CF_SIZE != 0
bool test_zf_solver_simd(void) {
cf_t cf_error0, cf_error1;
float error = 0.0f;
cf_t x0_gold_1 = RANDOM_CF();
cf_t x1_gold_1 = RANDOM_CF();
cf_t h00_1 = RANDOM_CF();
cf_t h01_1 = RANDOM_CF();
cf_t h10_1 = RANDOM_CF();
cf_t h11_1 = (1 - h01_1 * h10_1) / h00_1;
cf_t y0_1 = x0_gold_1 * h00_1 + x1_gold_1 * h01_1;
cf_t y1_1 = x0_gold_1 * h10_1 + x1_gold_1 * h11_1;
simd_cf_t _y0 = srslte_simd_cf_set1(y0_1);
simd_cf_t _y1 = srslte_simd_cf_set1(y1_1);
simd_cf_t _h00 = srslte_simd_cf_set1(h00_1);
simd_cf_t _h01 = srslte_simd_cf_set1(h01_1);
simd_cf_t _h10 = srslte_simd_cf_set1(h10_1);
simd_cf_t _h11 = srslte_simd_cf_set1(h11_1);
simd_cf_t _x0, _x1;
srslte_mat_2x2_zf_simd(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 1.0f);
__attribute__((aligned(SRSLTE_SIMD_BIT_ALIGN))) cf_t x0[SRSLTE_SIMD_CF_SIZE];
__attribute__((aligned(SRSLTE_SIMD_BIT_ALIGN))) cf_t x1[SRSLTE_SIMD_CF_SIZE];
srslte_simd_cfi_store(x0, _x0);
srslte_simd_cfi_store(x1, _x1);
cf_error0 = x0[1] - x0_gold_1;
cf_error1 = x1[1] - x1_gold_1;
error += crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) +
crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1);
return (error < 1e-3);
}
bool test_mmse_solver_simd(void) {
cf_t cf_error0, cf_error1;
float error = 0.0f;
cf_t x0_gold[SRSLTE_SIMD_CF_SIZE];
cf_t x1_gold[SRSLTE_SIMD_CF_SIZE];
cf_t h00[SRSLTE_SIMD_CF_SIZE];
cf_t h01[SRSLTE_SIMD_CF_SIZE];
cf_t h10[SRSLTE_SIMD_CF_SIZE];
cf_t h11[SRSLTE_SIMD_CF_SIZE];
cf_t y0[SRSLTE_SIMD_CF_SIZE];
cf_t y1[SRSLTE_SIMD_CF_SIZE];
for (int i = 0; i < SRSLTE_SIMD_CF_SIZE; i++) {
x0_gold[i] = RANDOM_CF();
x1_gold[i] = RANDOM_CF();
h00[i] = RANDOM_CF();
h01[i] = RANDOM_CF();
h10[i] = RANDOM_CF();
h11[i] = (1 - h01[i] * h10[i]) / h00[i];
y0[i] = x0_gold[i] * h00[i]+ x1_gold[i] * h01[i];
y1[i] = x0_gold[i] * h10[i] + x1_gold[i] * h11[i];
}
simd_cf_t _y0 = srslte_simd_cfi_loadu(y0);
simd_cf_t _y1 = srslte_simd_cfi_loadu(y1);
simd_cf_t _h00 = srslte_simd_cfi_loadu(h00);
simd_cf_t _h01 = srslte_simd_cfi_loadu(h01);
simd_cf_t _h10 = srslte_simd_cfi_loadu(h10);
simd_cf_t _h11 = srslte_simd_cfi_loadu(h11);
simd_cf_t _x0, _x1;
srslte_mat_2x2_mmse_simd(_y0, _y1, _h00, _h01, _h10, _h11, &_x0, &_x1, 0.0f, 1.0f);
__attribute__((aligned(SRSLTE_SIMD_BIT_ALIGN))) cf_t x0[SRSLTE_SIMD_CF_SIZE];
__attribute__((aligned(SRSLTE_SIMD_BIT_ALIGN))) cf_t x1[SRSLTE_SIMD_CF_SIZE];
srslte_simd_cfi_store(x0, _x0);
srslte_simd_cfi_store(x1, _x1);
cf_error0 = x0[1] - x0_gold[1];
cf_error1 = x1[1] - x1_gold[1];
error += crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) +
crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1);
return (error < 1e-3);
}
#endif /* SRSLTE_SIMD_CF_SIZE != 0 */
bool test_vec_dot_prod_ccc(void) { bool test_vec_dot_prod_ccc(void) {
__attribute__((aligned(256))) cf_t a[14]; __attribute__((aligned(256))) cf_t a[14];
__attribute__((aligned(256))) cf_t b[14]; __attribute__((aligned(256))) cf_t b[14];
@ -413,6 +505,10 @@ int main(int argc, char **argv) {
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX
RUN_TEST(test_zf_solver_avx); RUN_TEST(test_zf_solver_avx);
#endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX */
#if SRSLTE_SIMD_CF_SIZE != 0
RUN_TEST(test_zf_solver_simd);
#endif /* SRSLTE_SIMD_CF_SIZE != 0*/
} }
if (mmse_solver) { if (mmse_solver) {
@ -426,6 +522,10 @@ int main(int argc, char **argv) {
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX
RUN_TEST(test_mmse_solver_avx); RUN_TEST(test_mmse_solver_avx);
#endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX */
#if SRSLTE_SIMD_CF_SIZE != 0
RUN_TEST(test_mmse_solver_simd);
#endif /* SRSLTE_SIMD_CF_SIZE != 0*/
} }
RUN_TEST(test_vec_dot_prod_ccc); RUN_TEST(test_vec_dot_prod_ccc);

@ -427,8 +427,8 @@ void srslte_vec_quant_sus(const int16_t *in, uint16_t *out, const float gain, co
} }
} }
void srs_vec_cf_cpy(const cf_t *dst, cf_t *src, int len) { void srs_vec_cf_cpy(const cf_t *src, cf_t *dst, int len) {
srslte_vec_cp_simd(dst, src, len); srslte_vec_cp_simd(src, dst, len);
} }
void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, const int len) { void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, const int len) {

Loading…
Cancel
Save