|
|
@ -1118,12 +1118,15 @@ int srslte_predecoding_ccd_mmse(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
static int srslte_predecoding_multiplex_2x2_zf_csi(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
// Generic implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
int srslte_predecoding_multiplex_2x2_zf_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
float *csi,
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols, float scaling) {
|
|
|
|
int codebook_idx,
|
|
|
|
float norm = 1.0;
|
|
|
|
int nof_symbols,
|
|
|
|
|
|
|
|
float scaling) {
|
|
|
|
|
|
|
|
float norm = 1.0f;
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
@ -1138,60 +1141,101 @@ int srslte_predecoding_multiplex_2x2_zf_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[S
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols - 3; i += 4) {
|
|
|
|
#if SRSLTE_SIMD_CF_SIZE != 0
|
|
|
|
__m256 _h00 = _mm256_load_ps((float*)&(h[0][0][i]));
|
|
|
|
for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
|
|
|
|
__m256 _h01 = _mm256_load_ps((float*)&(h[0][1][i]));
|
|
|
|
simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
|
|
|
|
__m256 _h10 = _mm256_load_ps((float*)&(h[1][0][i]));
|
|
|
|
simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
|
|
|
|
__m256 _h11 = _mm256_load_ps((float*)&(h[1][1][i]));
|
|
|
|
simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
|
|
|
|
|
|
|
|
simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
__m256 h00, h01, h10, h11;
|
|
|
|
simd_cf_t h00, h01, h10, h11;
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
h00 = _h00;
|
|
|
|
h00 = h00i;
|
|
|
|
h01 = _h10;
|
|
|
|
h01 = h10i;
|
|
|
|
h10 = _h01;
|
|
|
|
h10 = h01i;
|
|
|
|
h11 = _h11;
|
|
|
|
h11 = h11i;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
h00 = _mm256_add_ps(_h00, _h10);
|
|
|
|
h00 = srslte_simd_cf_add(h00i, h10i);
|
|
|
|
h01 = _mm256_sub_ps(_h00, _h10);
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, h10i);
|
|
|
|
h10 = _mm256_add_ps(_h01, _h11);
|
|
|
|
h10 = srslte_simd_cf_add(h01i, h11i);
|
|
|
|
h11 = _mm256_sub_ps(_h01, _h11);
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, h11i);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
h00 = _mm256_add_ps(_h00, _MM256_MULJ_PS(_h10));
|
|
|
|
h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
h01 = _mm256_sub_ps(_h00, _MM256_MULJ_PS(_h10));
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
h10 = _mm256_add_ps(_h01, _MM256_MULJ_PS(_h11));
|
|
|
|
h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
h11 = _mm256_sub_ps(_h01, _MM256_MULJ_PS(_h11));
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
DEBUG("Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__m256 y0 = _mm256_load_ps((float *) &y[0][i]);
|
|
|
|
simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
|
|
|
|
__m256 y1 = _mm256_load_ps((float *) &y[1][i]);
|
|
|
|
simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
__m256 x0, x1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
srslte_mat_2x2_zf_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, norm);
|
|
|
|
simd_cf_t x0, x1;
|
|
|
|
|
|
|
|
simd_f_t csi0, csi1;
|
|
|
|
|
|
|
|
srslte_mat_2x2_zf_csi_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, &csi0, &csi1, norm);
|
|
|
|
|
|
|
|
|
|
|
|
_mm256_store_ps((float *) &x[0][i], x0);
|
|
|
|
srslte_simd_cfi_store(&x[0][i], x0);
|
|
|
|
_mm256_store_ps((float *) &x[1][i], x1);
|
|
|
|
srslte_simd_cfi_store(&x[1][i], x1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
srslte_simd_f_store(&csi[i], csi0);
|
|
|
|
|
|
|
|
srslte_simd_f_store(&csi[i], csi1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* SRSLTE_SIMD_CF_SIZE */
|
|
|
|
|
|
|
|
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
for (; i < nof_symbols; i++) {
|
|
|
|
|
|
|
|
cf_t h00, h01, h10, h11;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
|
|
h00 = h[0][0][i];
|
|
|
|
|
|
|
|
h01 = h[1][0][i];
|
|
|
|
|
|
|
|
h10 = h[0][1][i];
|
|
|
|
|
|
|
|
h11 = h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
h00 = h[0][0][i] + h[1][0][i];
|
|
|
|
|
|
|
|
h01 = h[0][0][i] - h[1][0][i];
|
|
|
|
|
|
|
|
h10 = h[0][1][i] + h[1][1][i];
|
|
|
|
|
|
|
|
h11 = h[0][1][i] - h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
|
|
h00 = h[0][0][i] + _Complex_I * h[1][0][i];
|
|
|
|
|
|
|
|
h01 = h[0][0][i] - _Complex_I * h[1][0][i];
|
|
|
|
|
|
|
|
h10 = h[0][1][i] + _Complex_I * h[1][1][i];
|
|
|
|
|
|
|
|
h11 = h[0][1][i] - _Complex_I * h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_AVX */
|
|
|
|
cf_t det = (h00 * h11 - h01 * h10);
|
|
|
|
|
|
|
|
det = conjf(det) * (norm / (crealf(det) * crealf(det) + cimagf(det) * cimagf(det)));
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
x[0][i] = (+h11 * y[0][i] - h01 * y[1][i]) * det;
|
|
|
|
|
|
|
|
x[1][i] = (-h10 * y[0][i] + h00 * y[1][i]) * det;
|
|
|
|
|
|
|
|
|
|
|
|
// SSE implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
csi[i] = 1.0f;
|
|
|
|
int srslte_predecoding_multiplex_2x2_zf_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
csi[i] = 1.0f;
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols, float scaling) {
|
|
|
|
}
|
|
|
|
float norm = 1.0;
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Generic implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
|
|
|
|
static int srslte_predecoding_multiplex_2x2_zf(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
|
|
|
|
int codebook_idx,
|
|
|
|
|
|
|
|
int nof_symbols,
|
|
|
|
|
|
|
|
float scaling) {
|
|
|
|
|
|
|
|
float norm = 1.0f;
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
@ -1206,75 +1250,53 @@ int srslte_predecoding_multiplex_2x2_zf_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[S
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols - 1; i += 2) {
|
|
|
|
#if SRSLTE_SIMD_CF_SIZE != 0
|
|
|
|
__m128 _h00 = _mm_load_ps((float*)&(h[0][0][i]));
|
|
|
|
for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
|
|
|
|
__m128 _h01 = _mm_load_ps((float*)&(h[0][1][i]));
|
|
|
|
simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
|
|
|
|
__m128 _h10 = _mm_load_ps((float*)&(h[1][0][i]));
|
|
|
|
simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
|
|
|
|
__m128 _h11 = _mm_load_ps((float*)&(h[1][1][i]));
|
|
|
|
simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
|
|
|
|
|
|
|
|
simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
__m128 h00, h01, h10, h11;
|
|
|
|
simd_cf_t h00, h01, h10, h11;
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
h00 = _h00;
|
|
|
|
h00 = h00i;
|
|
|
|
h01 = _h10;
|
|
|
|
h01 = h10i;
|
|
|
|
h10 = _h01;
|
|
|
|
h10 = h01i;
|
|
|
|
h11 = _h11;
|
|
|
|
h11 = h11i;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
h00 = _mm_add_ps(_h00, _h10);
|
|
|
|
h00 = srslte_simd_cf_add(h00i, h10i);
|
|
|
|
h01 = _mm_sub_ps(_h00, _h10);
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, h10i);
|
|
|
|
h10 = _mm_add_ps(_h01, _h11);
|
|
|
|
h10 = srslte_simd_cf_add(h01i, h11i);
|
|
|
|
h11 = _mm_sub_ps(_h01, _h11);
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, h11i);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
h00 = _mm_add_ps(_h00, _MM_MULJ_PS(_h10));
|
|
|
|
h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
h01 = _mm_sub_ps(_h00, _MM_MULJ_PS(_h10));
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
h10 = _mm_add_ps(_h01, _MM_MULJ_PS(_h11));
|
|
|
|
h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
h11 = _mm_sub_ps(_h01, _MM_MULJ_PS(_h11));
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__m128 y0 = _mm_load_ps((float *) &y[0][i]);
|
|
|
|
simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
|
|
|
|
__m128 y1 = _mm_load_ps((float *) &y[1][i]);
|
|
|
|
simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
__m128 x0, x1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
srslte_mat_2x2_zf_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, norm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_mm_store_ps((float *) &x[0][i], x0);
|
|
|
|
|
|
|
|
_mm_store_ps((float *) &x[1][i], x1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
simd_cf_t x0, x1;
|
|
|
|
|
|
|
|
simd_f_t csi0, csi1;
|
|
|
|
|
|
|
|
srslte_mat_2x2_zf_csi_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, &csi0, &csi1, norm);
|
|
|
|
|
|
|
|
|
|
|
|
// Generic implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
srslte_simd_cfi_store(&x[0][i], x0);
|
|
|
|
int srslte_predecoding_multiplex_2x2_zf_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
srslte_simd_cfi_store(&x[1][i], x1);
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols, float scaling) {
|
|
|
|
|
|
|
|
float norm = 1.0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch(codebook_idx) {
|
|
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
|
|
norm = (float) M_SQRT2 / scaling;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
|
|
norm = 2.0f / scaling;
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* SRSLTE_SIMD_CF_SIZE */
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols; i++) {
|
|
|
|
for (; i < nof_symbols; i++) {
|
|
|
|
cf_t h00, h01, h10, h11, det;
|
|
|
|
cf_t h00, h01, h10, h11;
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
@ -1300,22 +1322,22 @@ int srslte_predecoding_multiplex_2x2_zf_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[S
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
det = (h00 * h11 - h01 * h10);
|
|
|
|
srslte_mat_2x2_zf_gen(y[0][i], y[1][i], h00, h01, h10, h11, &x[0][i], &x[1][i], norm);
|
|
|
|
det = conjf(det) * (norm / (crealf(det) * crealf(det) + cimagf(det) * cimagf(det)));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
x[0][i] = (+h11 * y[0][i] - h01 * y[1][i]) * det;
|
|
|
|
|
|
|
|
x[1][i] = (-h10 * y[0][i] + h00 * y[1][i]) * det;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
// Generic implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
|
|
|
|
static int srslte_predecoding_multiplex_2x2_mmse_csi(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
// AVX implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
int srslte_predecoding_multiplex_2x2_mmse_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols,
|
|
|
|
float *csi[SRSLTE_MAX_CODEWORDS],
|
|
|
|
float scaling, float noise_estimate) {
|
|
|
|
int codebook_idx,
|
|
|
|
float norm = 1.0;
|
|
|
|
int nof_symbols,
|
|
|
|
|
|
|
|
float scaling,
|
|
|
|
|
|
|
|
float noise_estimate) {
|
|
|
|
|
|
|
|
float norm = 1.0f;
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
@ -1326,147 +1348,168 @@ int srslte_predecoding_multiplex_2x2_mmse_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h
|
|
|
|
norm = 2.0f / scaling;
|
|
|
|
norm = 2.0f / scaling;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
ERROR("Wrong codebook_idx=%d", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols; i += 4) {
|
|
|
|
#if SRSLTE_SIMD_CF_SIZE != 0
|
|
|
|
__m256 _h00 = _mm256_load_ps((float*)&(h[0][0][i]));
|
|
|
|
for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
|
|
|
|
__m256 _h01 = _mm256_load_ps((float*)&(h[0][1][i]));
|
|
|
|
simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
|
|
|
|
__m256 _h10 = _mm256_load_ps((float*)&(h[1][0][i]));
|
|
|
|
simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
|
|
|
|
__m256 _h11 = _mm256_load_ps((float*)&(h[1][1][i]));
|
|
|
|
simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
|
|
|
|
|
|
|
|
simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
__m256 h00, h01, h10, h11;
|
|
|
|
simd_cf_t h00, h01, h10, h11;
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
h00 = _h00;
|
|
|
|
h00 = h00i;
|
|
|
|
h01 = _h10;
|
|
|
|
h01 = h10i;
|
|
|
|
h10 = _h01;
|
|
|
|
h10 = h01i;
|
|
|
|
h11 = _h11;
|
|
|
|
h11 = h11i;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
h00 = _mm256_add_ps(_h00, _h10);
|
|
|
|
h00 = srslte_simd_cf_add(h00i, h10i);
|
|
|
|
h01 = _mm256_sub_ps(_h00, _h10);
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, h10i);
|
|
|
|
h10 = _mm256_add_ps(_h01, _h11);
|
|
|
|
h10 = srslte_simd_cf_add(h01i, h11i);
|
|
|
|
h11 = _mm256_sub_ps(_h01, _h11);
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, h11i);
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
h00 = _mm256_add_ps(_h00, _MM256_MULJ_PS(_h10));
|
|
|
|
h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
h01 = _mm256_sub_ps(_h00, _MM256_MULJ_PS(_h10));
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
h10 = _mm256_add_ps(_h01, _MM256_MULJ_PS(_h11));
|
|
|
|
h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
h11 = _mm256_sub_ps(_h01, _MM256_MULJ_PS(_h11));
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__m256 y0 = _mm256_load_ps((float *) &y[0][i]);
|
|
|
|
simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
|
|
|
|
__m256 y1 = _mm256_load_ps((float *) &y[1][i]);
|
|
|
|
simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
__m256 x0, x1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
srslte_mat_2x2_mmse_avx(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_mm256_store_ps((float *) &x[0][i], x0);
|
|
|
|
simd_cf_t x0, x1;
|
|
|
|
_mm256_store_ps((float *) &x[1][i], x1);
|
|
|
|
simd_f_t csi0, csi1;
|
|
|
|
|
|
|
|
srslte_mat_2x2_mmse_csi_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, &csi0, &csi1, noise_estimate, norm);
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
srslte_simd_cfi_store(&x[0][i], x0);
|
|
|
|
|
|
|
|
srslte_simd_cfi_store(&x[1][i], x1);
|
|
|
|
|
|
|
|
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
srslte_simd_f_store(&csi[0][i], csi0);
|
|
|
|
|
|
|
|
srslte_simd_f_store(&csi[1][i], csi1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* SRSLTE_SIMD_CF_SIZE */
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_AVX */
|
|
|
|
for (; i < nof_symbols; i++) {
|
|
|
|
|
|
|
|
cf_t h00, h01, h10, h11;
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// SSE implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
|
|
|
|
int srslte_predecoding_multiplex_2x2_mmse_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols,
|
|
|
|
|
|
|
|
float scaling, float noise_estimate) {
|
|
|
|
|
|
|
|
float norm;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
norm = (float) M_SQRT2 / scaling;
|
|
|
|
h00 = h[0][0][i];
|
|
|
|
|
|
|
|
h01 = h[1][0][i];
|
|
|
|
|
|
|
|
h10 = h[0][1][i];
|
|
|
|
|
|
|
|
h11 = h[1][1][i];
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
h00 = h[0][0][i] + h[1][0][i];
|
|
|
|
|
|
|
|
h01 = h[0][0][i] - h[1][0][i];
|
|
|
|
|
|
|
|
h10 = h[0][1][i] + h[1][1][i];
|
|
|
|
|
|
|
|
h11 = h[0][1][i] - h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
norm = 2.0f / scaling;
|
|
|
|
h00 = h[0][0][i] + _Complex_I * h[1][0][i];
|
|
|
|
|
|
|
|
h01 = h[0][0][i] - _Complex_I * h[1][0][i];
|
|
|
|
|
|
|
|
h10 = h[0][1][i] + _Complex_I * h[1][1][i];
|
|
|
|
|
|
|
|
h11 = h[0][1][i] - _Complex_I * h[1][1][i];
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols - 1; i += 2) {
|
|
|
|
srslte_mat_2x2_mmse_csi_gen(y[0][i],
|
|
|
|
__m128 _h00 = _mm_load_ps((float*)&(h[0][0][i]));
|
|
|
|
y[1][i],
|
|
|
|
__m128 _h01 = _mm_load_ps((float*)&(h[0][1][i]));
|
|
|
|
h00,
|
|
|
|
__m128 _h10 = _mm_load_ps((float*)&(h[1][0][i]));
|
|
|
|
h01,
|
|
|
|
__m128 _h11 = _mm_load_ps((float*)&(h[1][1][i]));
|
|
|
|
h10,
|
|
|
|
|
|
|
|
h11,
|
|
|
|
|
|
|
|
&x[0][i],
|
|
|
|
|
|
|
|
&x[1][i],
|
|
|
|
|
|
|
|
&csi[0][i],
|
|
|
|
|
|
|
|
&csi[1][i],
|
|
|
|
|
|
|
|
noise_estimate,
|
|
|
|
|
|
|
|
norm);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int srslte_predecoding_multiplex_2x2_mmse(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
|
|
|
|
int codebook_idx,
|
|
|
|
|
|
|
|
int nof_symbols,
|
|
|
|
|
|
|
|
float scaling,
|
|
|
|
|
|
|
|
float noise_estimate) {
|
|
|
|
|
|
|
|
float norm = 1.0;
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
__m128 h00, h01, h10, h11;
|
|
|
|
|
|
|
|
switch(codebook_idx) {
|
|
|
|
switch(codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
h00 = _h00;
|
|
|
|
norm = (float) M_SQRT2 / scaling;
|
|
|
|
h01 = _h10;
|
|
|
|
|
|
|
|
h10 = _h01;
|
|
|
|
|
|
|
|
h11 = _h11;
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
h00 = _mm_add_ps(_h00, _h10);
|
|
|
|
|
|
|
|
h01 = _mm_sub_ps(_h00, _h10);
|
|
|
|
|
|
|
|
h10 = _mm_add_ps(_h01, _h11);
|
|
|
|
|
|
|
|
h11 = _mm_sub_ps(_h01, _h11);
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
h00 = _mm_add_ps(_h00, _MM_MULJ_PS(_h10));
|
|
|
|
norm = 2.0f / scaling;
|
|
|
|
h01 = _mm_sub_ps(_h00, _MM_MULJ_PS(_h10));
|
|
|
|
|
|
|
|
h10 = _mm_add_ps(_h01, _MM_MULJ_PS(_h11));
|
|
|
|
|
|
|
|
h11 = _mm_sub_ps(_h01, _MM_MULJ_PS(_h11));
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
ERROR("Wrong codebook_idx=%d", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__m128 y0 = _mm_load_ps((float *) &y[0][i]);
|
|
|
|
#if SRSLTE_SIMD_CF_SIZE != 0
|
|
|
|
__m128 y1 = _mm_load_ps((float *) &y[1][i]);
|
|
|
|
for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
|
|
|
|
|
|
|
|
simd_cf_t h00i = srslte_simd_cfi_load(&h[0][0][i]);
|
|
|
|
__m128 x0, x1;
|
|
|
|
simd_cf_t h01i = srslte_simd_cfi_load(&h[0][1][i]);
|
|
|
|
|
|
|
|
simd_cf_t h10i = srslte_simd_cfi_load(&h[1][0][i]);
|
|
|
|
srslte_mat_2x2_mmse_sse(y0, y1, h00, h01, h10, h11, &x0, &x1, noise_estimate, norm);
|
|
|
|
simd_cf_t h11i = srslte_simd_cfi_load(&h[1][1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
_mm_store_ps((float *) &x[0][i], x0);
|
|
|
|
|
|
|
|
_mm_store_ps((float *) &x[1][i], x1);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Generic implementation of ZF 2x2 Spatial Multiplexity equalizer
|
|
|
|
|
|
|
|
int srslte_predecoding_multiplex_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols,
|
|
|
|
|
|
|
|
float scaling, float noise_estimate) {
|
|
|
|
|
|
|
|
float norm = 1.0;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
simd_cf_t h00, h01, h10, h11;
|
|
|
|
switch(codebook_idx) {
|
|
|
|
switch(codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
norm = (float) M_SQRT2 / scaling;
|
|
|
|
h00 = h00i;
|
|
|
|
|
|
|
|
h01 = h10i;
|
|
|
|
|
|
|
|
h10 = h01i;
|
|
|
|
|
|
|
|
h11 = h11i;
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
h00 = srslte_simd_cf_add(h00i, h10i);
|
|
|
|
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, h10i);
|
|
|
|
|
|
|
|
h10 = srslte_simd_cf_add(h01i, h11i);
|
|
|
|
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, h11i);
|
|
|
|
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
norm = 2.0f / scaling;
|
|
|
|
h00 = srslte_simd_cf_add(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
|
|
|
|
h01 = srslte_simd_cf_sub(h00i, srslte_simd_cf_mulj(h10i));
|
|
|
|
|
|
|
|
h10 = srslte_simd_cf_add(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
|
|
|
|
h11 = srslte_simd_cf_sub(h01i, srslte_simd_cf_mulj(h11i));
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
ERROR("Wrong codebook_idx=%d", codebook_idx);
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols; i++) {
|
|
|
|
simd_cf_t y0 = srslte_simd_cfi_load(&y[0][i]);
|
|
|
|
|
|
|
|
simd_cf_t y1 = srslte_simd_cfi_load(&y[1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
simd_cf_t x0, x1;
|
|
|
|
|
|
|
|
simd_f_t csi0, csi1;
|
|
|
|
|
|
|
|
srslte_mat_2x2_mmse_csi_simd(y0, y1, h00, h01, h10, h11, &x0, &x1, &csi0, &csi1, noise_estimate, norm);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
srslte_simd_cfi_store(&x[0][i], x0);
|
|
|
|
|
|
|
|
srslte_simd_cfi_store(&x[1][i], x1);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* SRSLTE_SIMD_CF_SIZE */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (; i < nof_symbols; i++) {
|
|
|
|
cf_t h00, h01, h10, h11;
|
|
|
|
cf_t h00, h01, h10, h11;
|
|
|
|
|
|
|
|
|
|
|
|
switch(codebook_idx) {
|
|
|
|
switch(codebook_idx) {
|
|
|
@ -1498,132 +1541,145 @@ int srslte_predecoding_multiplex_2x2_mmse_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Generic implementation of MRC 2x1 (two antennas into one layer) Spatial Multiplexing equalizer
|
|
|
|
// Implementation of MRC 2x1 (two antennas into one layer) Spatial Multiplexing equalizer
|
|
|
|
int srslte_predecoding_multiplex_2x1_mrc_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
static int srslte_predecoding_multiplex_2x1_mrc(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols, float scaling) {
|
|
|
|
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
|
|
|
|
int codebook_idx,
|
|
|
|
|
|
|
|
int nof_symbols,
|
|
|
|
|
|
|
|
float scaling) {
|
|
|
|
|
|
|
|
float norm = (float) M_SQRT2 / scaling;
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols - 3; i += 4) {
|
|
|
|
#if SRSLTE_SIMD_CF_SIZE != 0
|
|
|
|
__m256 _h00 = _mm256_load_ps((float*)&(h[0][0][i]));
|
|
|
|
simd_f_t _norm = srslte_simd_f_set1(norm);
|
|
|
|
__m256 _h01 = _mm256_load_ps((float*)&(h[0][1][i]));
|
|
|
|
|
|
|
|
__m256 _h10 = _mm256_load_ps((float*)&(h[1][0][i]));
|
|
|
|
|
|
|
|
__m256 _h11 = _mm256_load_ps((float*)&(h[1][1][i]));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__m256 h0, h1;
|
|
|
|
for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
|
|
|
|
|
|
|
|
simd_cf_t x0 = srslte_simd_cf_set1(0.0f);
|
|
|
|
|
|
|
|
simd_f_t hh = srslte_simd_f_set1(0.0f);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int k = 0; k < 2; k++) {
|
|
|
|
|
|
|
|
simd_cf_t h0xi = srslte_simd_cfi_load(&h[0][k][i]);
|
|
|
|
|
|
|
|
simd_cf_t h1xi = srslte_simd_cfi_load(&h[1][k][i]);
|
|
|
|
|
|
|
|
simd_cf_t yx = srslte_simd_cfi_load(&y[k][i]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
simd_cf_t hx;
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
h0 = _mm256_add_ps(_h00, _h10);
|
|
|
|
hx = srslte_simd_cf_add(h0xi, h1xi);
|
|
|
|
h1 = _mm256_add_ps(_h01, _h11);
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
h0 = _mm256_sub_ps(_h00, _h10);
|
|
|
|
hx = srslte_simd_cf_sub(h0xi, h1xi);
|
|
|
|
h1 = _mm256_sub_ps(_h01, _h11);
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
h0 = _mm256_add_ps(_h00, _MM256_MULJ_PS(_h10));
|
|
|
|
hx = srslte_simd_cf_add(h0xi, srslte_simd_cf_mulj(h1xi));
|
|
|
|
h1 = _mm256_add_ps(_h01, _MM256_MULJ_PS(_h11));
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
case 3:
|
|
|
|
h0 = _mm256_sub_ps(_h00, _MM256_MULJ_PS(_h10));
|
|
|
|
hx = srslte_simd_cf_sub(h0xi, srslte_simd_cf_mulj(h1xi));
|
|
|
|
h1 = _mm256_sub_ps(_h01, _MM256_MULJ_PS(_h11));
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__m256 h0_2 = _mm256_mul_ps(h0, h0);
|
|
|
|
hh = srslte_simd_f_add(srslte_simd_cf_re(srslte_simd_cf_conjprod(hx, hx)), hh);
|
|
|
|
__m256 h1_2 = _mm256_mul_ps(h1, h1);
|
|
|
|
x0 = srslte_simd_cf_add(srslte_simd_cf_conjprod(yx, hx), x0);
|
|
|
|
__m256 hh0 = _mm256_add_ps(_mm256_movehdup_ps(h0_2), _mm256_moveldup_ps(h0_2));
|
|
|
|
}
|
|
|
|
__m256 hh1 = _mm256_add_ps(_mm256_movehdup_ps(h1_2), _mm256_moveldup_ps(h1_2));
|
|
|
|
|
|
|
|
__m256 hh = _mm256_add_ps(hh0, hh1);
|
|
|
|
|
|
|
|
__m256 hhrec = _mm256_rcp_ps(hh);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hhrec = _mm256_mul_ps(hhrec, _mm256_set1_ps((float) M_SQRT2 / scaling));
|
|
|
|
|
|
|
|
__m256 y0 = _mm256_load_ps((float*)&y[0][i]);
|
|
|
|
|
|
|
|
__m256 y1 = _mm256_load_ps((float*)&y[1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__m256 x0 = _mm256_add_ps(_MM256_PROD_PS(_MM256_CONJ_PS(h0), y0), _MM256_PROD_PS(_MM256_CONJ_PS(h1), y1));
|
|
|
|
hh = srslte_simd_f_mul(_norm, srslte_simd_f_rcp(hh));
|
|
|
|
x0 = _mm256_mul_ps(hhrec, x0);
|
|
|
|
srslte_simd_cfi_store(&x[0][i], srslte_simd_cf_mul(x0, hh));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* SRSLTE_SIMD_CF_SIZE */
|
|
|
|
|
|
|
|
|
|
|
|
_mm256_store_ps((float*)&x[0][i], x0);
|
|
|
|
for (; i < nof_symbols; i += 1) {
|
|
|
|
|
|
|
|
cf_t h0, h1;
|
|
|
|
|
|
|
|
float hh;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
|
|
|
|
case 0:
|
|
|
|
|
|
|
|
h0 = h[0][0][i] + h[1][0][i];
|
|
|
|
|
|
|
|
h1 = h[0][1][i] + h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
|
|
|
|
h0 = h[0][0][i] - h[1][0][i];
|
|
|
|
|
|
|
|
h1 = h[0][1][i] - h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 2:
|
|
|
|
|
|
|
|
h0 = h[0][0][i] + _Complex_I * h[1][0][i];
|
|
|
|
|
|
|
|
h1 = h[0][1][i] + _Complex_I * h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 3:
|
|
|
|
|
|
|
|
h0 = h[0][0][i] - _Complex_I * h[1][0][i];
|
|
|
|
|
|
|
|
h1 = h[0][1][i] - _Complex_I * h[1][1][i];
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hh = norm / (crealf(h0) * crealf(h0) + cimagf(h0) * cimagf(h0) + crealf(h1) * crealf(h1) + cimagf(h1) * cimagf(h1));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
x[0][i] = (conjf(h0) * y[0][i] + conjf(h1) * y[1][i]) * hh;
|
|
|
|
|
|
|
|
}
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_AVX */
|
|
|
|
// Generic implementation of MRC 2x1 (two antennas into one layer) Spatial Multiplexing equalizer
|
|
|
|
|
|
|
|
static int srslte_predecoding_multiplex_2x1_mrc_csi(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
|
|
|
|
float *csi,
|
|
|
|
|
|
|
|
int codebook_idx,
|
|
|
|
|
|
|
|
int nof_symbols,
|
|
|
|
|
|
|
|
float scaling) {
|
|
|
|
|
|
|
|
float norm = (float) M_SQRT2 / scaling;
|
|
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
// SSE implementation of MRC 2x1 (two antennas into one layer) Spatial Multiplexing equalizer
|
|
|
|
#if SRSLTE_SIMD_CF_SIZE != 0
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
simd_f_t _norm = srslte_simd_f_set1(norm);
|
|
|
|
|
|
|
|
|
|
|
|
int srslte_predecoding_multiplex_2x1_mrc_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
for (; i < nof_symbols - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols, float scaling) {
|
|
|
|
simd_cf_t x0 = srslte_simd_cf_set1(0.0f);
|
|
|
|
|
|
|
|
simd_f_t hh = srslte_simd_f_set1(0.0f);
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols - 1; i += 2) {
|
|
|
|
for (int k = 0; k < 2; k++) {
|
|
|
|
__m128 _h00 = _mm_load_ps((float*)&(h[0][0][i]));
|
|
|
|
simd_cf_t h0xi = srslte_simd_cfi_load(&h[0][k][i]);
|
|
|
|
__m128 _h01 = _mm_load_ps((float*)&(h[0][1][i]));
|
|
|
|
simd_cf_t h1xi = srslte_simd_cfi_load(&h[1][k][i]);
|
|
|
|
__m128 _h10 = _mm_load_ps((float*)&(h[1][0][i]));
|
|
|
|
simd_cf_t yx = srslte_simd_cfi_load(&y[k][i]);
|
|
|
|
__m128 _h11 = _mm_load_ps((float*)&(h[1][1][i]));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__m128 h0, h1;
|
|
|
|
simd_cf_t hx;
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
|
h0 = _mm_add_ps(_h00, _h10);
|
|
|
|
hx = srslte_simd_cf_add(h0xi, h1xi);
|
|
|
|
h1 = _mm_add_ps(_h01, _h11);
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
case 1:
|
|
|
|
h0 = _mm_sub_ps(_h00, _h10);
|
|
|
|
hx = srslte_simd_cf_sub(h0xi, h1xi);
|
|
|
|
h1 = _mm_sub_ps(_h01, _h11);
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
case 2:
|
|
|
|
h0 = _mm_add_ps(_h00, _MM_MULJ_PS(_h10));
|
|
|
|
hx = srslte_simd_cf_add(h0xi, srslte_simd_cf_mulj(h1xi));
|
|
|
|
h1 = _mm_add_ps(_h01, _MM_MULJ_PS(_h11));
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
case 3:
|
|
|
|
h0 = _mm_sub_ps(_h00, _MM_MULJ_PS(_h10));
|
|
|
|
hx = srslte_simd_cf_sub(h0xi, srslte_simd_cf_mulj(h1xi));
|
|
|
|
h1 = _mm_sub_ps(_h01, _MM_MULJ_PS(_h11));
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
fprintf(stderr, "Wrong codebook_idx=%d\n", codebook_idx);
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__m128 h0_2 = _mm_mul_ps(h0, h0);
|
|
|
|
hh = srslte_simd_f_add(srslte_simd_cf_re(srslte_simd_cf_conjprod(hx, hx)), hh);
|
|
|
|
__m128 h1_2 = _mm_mul_ps(h1, h1);
|
|
|
|
x0 = srslte_simd_cf_add(srslte_simd_cf_conjprod(yx, hx), x0);
|
|
|
|
__m128 hh0 = _mm_add_ps(_mm_movehdup_ps(h0_2), _mm_moveldup_ps(h0_2));
|
|
|
|
|
|
|
|
__m128 hh1 = _mm_add_ps(_mm_movehdup_ps(h1_2), _mm_moveldup_ps(h1_2));
|
|
|
|
|
|
|
|
__m128 hh = _mm_add_ps(hh0, hh1);
|
|
|
|
|
|
|
|
__m128 hhrec = _mm_rcp_ps(hh);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hhrec = _mm_mul_ps(hhrec, _mm_set1_ps((float) M_SQRT2 / scaling));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__m128 y0 = _mm_load_ps((float*)&y[0][i]);
|
|
|
|
|
|
|
|
__m128 y1 = _mm_load_ps((float*)&y[1][i]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__m128 x0 = _mm_add_ps(_MM_PROD_PS(_MM_CONJ_PS(h0), y0), _MM_PROD_PS(_MM_CONJ_PS(h1), y1));
|
|
|
|
|
|
|
|
x0 = _mm_mul_ps(hhrec, x0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_mm_store_ps((float*)&x[0][i], x0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
hh = srslte_simd_f_mul(_norm, srslte_simd_f_rcp(hh));
|
|
|
|
|
|
|
|
srslte_simd_cfi_store(&x[0][i], srslte_simd_cf_mul(x0, hh));
|
|
|
|
|
|
|
|
srslte_simd_f_store(&csi[i], srslte_simd_f_mul(srslte_simd_f_rcp(hh), srslte_simd_f_set1((float) M_SQRT1_2)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* SRSLTE_SIMD_CF_SIZE */
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
for (; i < nof_symbols; i += 1) {
|
|
|
|
|
|
|
|
|
|
|
|
// Generic implementation of MRC 2x1 (two antennas into one layer) Spatial Multiplexing equalizer
|
|
|
|
|
|
|
|
int srslte_predecoding_multiplex_2x1_mrc_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS], int codebook_idx, int nof_symbols, float scaling) {
|
|
|
|
|
|
|
|
float norm = (float) M_SQRT2 / scaling;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nof_symbols; i += 1) {
|
|
|
|
|
|
|
|
cf_t h0, h1;
|
|
|
|
cf_t h0, h1;
|
|
|
|
float hh;
|
|
|
|
float hh, _csi;
|
|
|
|
|
|
|
|
|
|
|
|
switch (codebook_idx) {
|
|
|
|
switch (codebook_idx) {
|
|
|
|
case 0:
|
|
|
|
case 0:
|
|
|
@ -1647,53 +1703,57 @@ int srslte_predecoding_multiplex_2x1_mrc_gen(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
hh = norm / (crealf(h0) * crealf(h0) + cimagf(h0) * cimagf(h0) + crealf(h1) * crealf(h1) + cimagf(h1) * cimagf(h1));
|
|
|
|
_csi = crealf(h0) * crealf(h0) + cimagf(h0) * cimagf(h0) + crealf(h1) * crealf(h1) + cimagf(h1) * cimagf(h1);
|
|
|
|
|
|
|
|
hh = norm / _csi;
|
|
|
|
|
|
|
|
|
|
|
|
x[0][i] = (conjf(h0) * y[0][i] + conjf(h1) * y[1][i]) * hh;
|
|
|
|
x[0][i] = (conjf(h0) * y[0][i] + conjf(h1) * y[1][i]) * hh;
|
|
|
|
|
|
|
|
csi[i] = _csi / norm * (float) M_SQRT1_2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
return SRSLTE_SUCCESS;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int srslte_predecoding_multiplex(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
static int srslte_predecoding_multiplex(cf_t *y[SRSLTE_MAX_PORTS],
|
|
|
|
int nof_rxant, int nof_ports, int nof_layers, int codebook_idx, int nof_symbols,
|
|
|
|
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS],
|
|
|
|
float scaling, float noise_estimate)
|
|
|
|
cf_t *x[SRSLTE_MAX_LAYERS],
|
|
|
|
{
|
|
|
|
float *csi[SRSLTE_MAX_CODEWORDS],
|
|
|
|
|
|
|
|
int nof_rxant,
|
|
|
|
|
|
|
|
int nof_ports,
|
|
|
|
|
|
|
|
int nof_layers,
|
|
|
|
|
|
|
|
int codebook_idx,
|
|
|
|
|
|
|
|
int nof_symbols,
|
|
|
|
|
|
|
|
float scaling,
|
|
|
|
|
|
|
|
float noise_estimate) {
|
|
|
|
if (nof_ports == 2 && nof_rxant <= 2) {
|
|
|
|
if (nof_ports == 2 && nof_rxant <= 2) {
|
|
|
|
if (nof_layers == 2) {
|
|
|
|
if (nof_layers == 2) {
|
|
|
|
switch (mimo_decoder) {
|
|
|
|
switch (mimo_decoder) {
|
|
|
|
case SRSLTE_MIMO_DECODER_ZF:
|
|
|
|
case SRSLTE_MIMO_DECODER_ZF:
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
if (csi && csi[0]) {
|
|
|
|
return srslte_predecoding_multiplex_2x2_zf_avx(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
return srslte_predecoding_multiplex_2x2_zf_csi(y, h, x, csi[0], codebook_idx, nof_symbols, scaling);
|
|
|
|
#else
|
|
|
|
} else {
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
return srslte_predecoding_multiplex_2x2_zf(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
return srslte_predecoding_multiplex_2x2_zf_sse(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
|
|
|
return srslte_predecoding_multiplex_2x2_zf_gen(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_AVX */
|
|
|
|
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case SRSLTE_MIMO_DECODER_MMSE:
|
|
|
|
case SRSLTE_MIMO_DECODER_MMSE:
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
if (csi && csi[0]) {
|
|
|
|
return srslte_predecoding_multiplex_2x2_mmse_avx(y, h, x, codebook_idx, nof_symbols, scaling, noise_estimate);
|
|
|
|
return srslte_predecoding_multiplex_2x2_mmse_csi(y,
|
|
|
|
#else
|
|
|
|
h,
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
x,
|
|
|
|
return srslte_predecoding_multiplex_2x2_mmse_sse(y, h, x, codebook_idx, nof_symbols, scaling, noise_estimate);
|
|
|
|
csi,
|
|
|
|
#else
|
|
|
|
codebook_idx,
|
|
|
|
return srslte_predecoding_multiplex_2x2_mmse_gen(y, h, x, codebook_idx, nof_symbols, scaling, noise_estimate);
|
|
|
|
nof_symbols,
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
scaling,
|
|
|
|
#endif /* LV_HAVE_AVX */
|
|
|
|
noise_estimate);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
return srslte_predecoding_multiplex_2x2_mmse(y, h, x, codebook_idx, nof_symbols, scaling, noise_estimate);
|
|
|
|
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
#ifdef LV_HAVE_AVX
|
|
|
|
if (csi && csi[0]) {
|
|
|
|
return srslte_predecoding_multiplex_2x1_mrc_avx(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
return srslte_predecoding_multiplex_2x1_mrc_csi(y, h, x, csi[0], codebook_idx, nof_symbols, scaling);
|
|
|
|
#else
|
|
|
|
} else {
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
return srslte_predecoding_multiplex_2x1_mrc(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
return srslte_predecoding_multiplex_2x1_mrc_sse(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
|
|
|
return srslte_predecoding_multiplex_2x1_mrc_gen(y, h, x, codebook_idx, nof_symbols, scaling);
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_SSE */
|
|
|
|
|
|
|
|
#endif /* LV_HAVE_AVX */
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (nof_ports == 4) {
|
|
|
|
} else if (nof_ports == 4) {
|
|
|
|
DEBUG("Error predecoding multiplex: not implemented for %d Tx ports", nof_ports);
|
|
|
|
DEBUG("Error predecoding multiplex: not implemented for %d Tx ports", nof_ports);
|
|
|
@ -1759,7 +1819,7 @@ int srslte_predecoding_type(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
break;
|
|
|
|
case SRSLTE_MIMO_TYPE_SPATIAL_MULTIPLEX:
|
|
|
|
case SRSLTE_MIMO_TYPE_SPATIAL_MULTIPLEX:
|
|
|
|
return srslte_predecoding_multiplex(y, h, x, nof_rxant, nof_ports, nof_layers, codebook_idx, nof_symbols,
|
|
|
|
return srslte_predecoding_multiplex(y, h, x, csi, nof_rxant, nof_ports, nof_layers, codebook_idx, nof_symbols,
|
|
|
|
scaling, noise_estimate);
|
|
|
|
scaling, noise_estimate);
|
|
|
|
default:
|
|
|
|
default:
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|
return SRSLTE_ERROR;
|
|
|
|