/** * * \section COPYRIGHT * * Copyright 2013-2015 Software Radio Systems Limited * * \section LICENSE * * This file is part of the srsLTE library. * * srsLTE is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of * the License, or (at your option) any later version. * * srsLTE is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * A copy of the GNU Affero General Public License can be found in * the LICENSE file in the top-level directory of this distribution * and at http://www.gnu.org/licenses/. * */ #include #include #include #include #include #include "srslte/common/phy_common.h" #include "srslte/mimo/precoding.h" #include "srslte/utils/vector.h" #ifdef LV_HAVE_SSE #include #include int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); int srslte_predecoding_diversity2_sse(cf_t *y, cf_t *h[SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_symbols); #endif #ifdef LV_HAVE_AVX #include int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); #endif /************************************************ * * RECEIVER SIDE FUNCTIONS * **************************************************/ #ifdef LV_HAVE_SSE #define PROD(a,b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(_mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b))) int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) { float *xPtr = (float*) x; const float *hPtr1 = (const float*) h[0]; const float *yPtr1 = (const float*) y[0]; const float *hPtr2 = (const float*) h[1]; const float *yPtr2 = (const float*) y[1]; __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); __m128 noise = _mm_set1_ps(noise_estimate); __m128 h1Val1, h2Val1, y1Val1, y2Val1; __m128 h1Val2, h2Val2, y1Val2, y2Val2; __m128 hsquare, h1square, h2square, h1conj1, h2conj1, x1Val1, x2Val1; __m128 hsquare2, h1conj2, h2conj2, x1Val2, x2Val2; for (int i=0;i 0) { hsquare = _mm_add_ps(hsquare, noise); } h1square = _mm_shuffle_ps(hsquare, hsquare, _MM_SHUFFLE(1, 1, 0, 0)); h2square = _mm_shuffle_ps(hsquare, hsquare, _MM_SHUFFLE(3, 3, 2, 2)); /* Conjugate channel */ h1conj1 = _mm_xor_ps(h1Val1, conjugator); h2conj1 = _mm_xor_ps(h2Val1, conjugator); if (nof_rxant == 2) { h1conj2 = _mm_xor_ps(h1Val2, conjugator); h2conj2 = _mm_xor_ps(h2Val2, conjugator); } /* Complex product */ x1Val1 = PROD(y1Val1, h1conj1); x2Val1 = PROD(y2Val1, h2conj1); if (nof_rxant == 2) { x1Val2 = PROD(y1Val2, h1conj2); x2Val2 = PROD(y2Val2, h2conj2); x1Val1 = _mm_add_ps(x1Val1, x1Val2); x2Val1 = _mm_add_ps(x2Val1, x2Val2); } x1Val1 = _mm_div_ps(x1Val1, h1square); x2Val1 = _mm_div_ps(x2Val1, h2square); _mm_store_ps(xPtr, x1Val1); xPtr+=4; _mm_store_ps(xPtr, x2Val1); xPtr+=4; } for (int i=8*(nof_symbols/8);i 0) { h12square = _mm256_add_ps(h12square, noise); } h1_p = _mm256_permute_ps(h12square, _MM_SHUFFLE(1, 1, 0, 0)); h2_p = _mm256_permute_ps(h12square, _MM_SHUFFLE(3, 3, 2, 2)); h1square = _mm256_permute2f128_ps(h1_p, h2_p, 2<<4); h2square = _mm256_permute2f128_ps(h1_p, h2_p, 3<<4 | 1); /* Conjugate channel */ h1conj = _mm256_xor_ps(h1Val, conjugator); h2conj = _mm256_xor_ps(h2Val, conjugator); /* Complex product */ x1Val = PROD_AVX(y1Val, h1conj); x2Val = PROD_AVX(y2Val, h2conj); x1Val = _mm256_div_ps(x1Val, h1square); x2Val = _mm256_div_ps(x2Val, h2square); _mm256_store_ps(xPtr, x1Val); xPtr+=8; _mm256_store_ps(xPtr, x2Val); xPtr+=8; } for (int i=16*(nof_symbols/16);i 32) { return srslte_predecoding_single_avx(y, h, x, nof_rxant, nof_symbols, noise_estimate); } else { return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate); } #else #ifdef LV_HAVE_SSE if (nof_symbols > 32) { return srslte_predecoding_single_sse(y, h, x, nof_rxant, nof_symbols, noise_estimate); } else { return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate); } #else return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate); #endif #endif } /* ZF/MMSE SISO equalizer x=y(h'h+no)^(-1)h' (ZF if n0=0.0)*/ int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) { #ifdef LV_HAVE_AVX if (nof_symbols > 32) { return srslte_predecoding_single_avx(y, h, x, nof_rxant, nof_symbols, noise_estimate); } else { return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate); } #else #ifdef LV_HAVE_SSE if (nof_symbols > 32) { return srslte_predecoding_single_sse(y, h, x, nof_rxant, nof_symbols, noise_estimate); } else { return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate); } #else return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate); #endif #endif } /* C implementatino of the SFBC equalizer */ int srslte_predecoding_diversity_gen_(cf_t *y, cf_t *h[SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_ports, int nof_symbols, int symbol_start) { int i; if (nof_ports == 2) { cf_t h00, h01, h10, h11, r0, r1; float hh; for (i = symbol_start/2; i < nof_symbols / 2; i++) { h00 = h[0][2 * i]; h01 = h[0][2 * i+1]; h10 = h[1][2 * i]; h11 = h[1][2 * i+1]; hh = crealf(h00) * crealf(h00) + cimagf(h00) * cimagf(h00) + crealf(h11) * crealf(h11) + cimagf(h11) * cimagf(h11); r0 = y[2 * i]; r1 = y[2 * i + 1]; if (hh == 0) { hh = 1e-4; } x[0][i] = (conjf(h00) * r0 + h11 * conjf(r1)) / hh * sqrt(2); x[1][i] = (-h10 * conj(r0) + conj(h01) * r1) / hh * sqrt(2); } return i; } else if (nof_ports == 4) { cf_t h0, h1, h2, h3, r0, r1, r2, r3; float hh02, hh13; int m_ap = (nof_symbols % 4) ? ((nof_symbols - 2) / 4) : nof_symbols / 4; for (i = symbol_start; i < m_ap; i++) { h0 = h[0][4 * i]; h1 = h[1][4 * i + 2]; h2 = h[2][4 * i]; h3 = h[3][4 * i + 2]; hh02 = crealf(h0) * crealf(h0) + cimagf(h0) * cimagf(h0) + crealf(h2) * crealf(h2) + cimagf(h2) * cimagf(h2); hh13 = crealf(h1) * crealf(h1) + cimagf(h1) * cimagf(h1) + crealf(h3) * crealf(h3) + cimagf(h3) * cimagf(h3); r0 = y[4 * i]; r1 = y[4 * i + 1]; r2 = y[4 * i + 2]; r3 = y[4 * i + 3]; x[0][i] = (conjf(h0) * r0 + h2 * conjf(r1)) / hh02 * sqrt(2); x[1][i] = (-h2 * conjf(r0) + conjf(h0) * r1) / hh02 * sqrt(2); x[2][i] = (conjf(h1) * r2 + h3 * conjf(r3)) / hh13 * sqrt(2); x[3][i] = (-h3 * conjf(r2) + conjf(h1) * r3) / hh13 * sqrt(2); } return i; } else { fprintf(stderr, "Number of ports must be 2 or 4 for transmit diversity (nof_ports=%d)\n", nof_ports); return -1; } } int srslte_predecoding_diversity_gen(cf_t *y, cf_t *h[SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_ports, int nof_symbols) { return srslte_predecoding_diversity_gen_(y, h, x, nof_ports, nof_symbols, 0); } /* SSE implementation of the 2-port SFBC equalizer */ #ifdef LV_HAVE_SSE int srslte_predecoding_diversity2_sse(cf_t *y, cf_t *h[SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_symbols) { float *x0Ptr = (float*) x[0]; float *x1Ptr = (float*) x[1]; const float *h0Ptr = (const float*) h[0]; const float *h1Ptr = (const float*) h[1]; const float *yPtr = (const float*) y; __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); __m128 sqrt2 = _mm_setr_ps(sqrt(2), sqrt(2), sqrt(2), sqrt(2)); __m128 h0Val_0, h0Val_1, h1Val_0, h1Val_1, h00, h00conj, h01, h01conj, h10, h11, hh, hhshuf, hhsum, hhadd; __m128 r0Val, r1Val, r0, r1, r0conj, r1conj; __m128 x0, x1; for (int i=0;i 32 && nof_ports == 2) { return srslte_predecoding_diversity2_sse(y, h, x, nof_symbols); } else { return srslte_predecoding_diversity_gen(y, h, x, nof_ports, nof_symbols); } #else return srslte_predecoding_diversity_gen(y, h, x, nof_ports, nof_symbols); #endif } /* 36.211 v10.3.0 Section 6.3.4 */ int srslte_predecoding_type(cf_t *y, cf_t *h[SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_ports, int nof_layers, int nof_symbols, srslte_mimo_type_t type, float noise_estimate) { if (nof_ports > SRSLTE_MAX_PORTS) { fprintf(stderr, "Maximum number of ports is %d (nof_ports=%d)\n", SRSLTE_MAX_PORTS, nof_ports); return -1; } if (nof_layers > SRSLTE_MAX_LAYERS) { fprintf(stderr, "Maximum number of layers is %d (nof_layers=%d)\n", SRSLTE_MAX_LAYERS, nof_layers); return -1; } switch (type) { case SRSLTE_MIMO_TYPE_SINGLE_ANTENNA: if (nof_ports == 1 && nof_layers == 1) { return srslte_predecoding_single(y, h[0], x[0], nof_symbols, noise_estimate); } else { fprintf(stderr, "Number of ports and layers must be 1 for transmission on single antenna ports\n"); return -1; } break; case SRSLTE_MIMO_TYPE_TX_DIVERSITY: if (nof_ports == nof_layers) { return srslte_predecoding_diversity(y, h, x, nof_ports, nof_symbols); } else { fprintf(stderr, "Error number of layers must equal number of ports in transmit diversity\n"); return -1; } break; case SRSLTE_MIMO_TYPE_SPATIAL_MULTIPLEX: fprintf(stderr, "Spatial multiplexing not supported\n"); return -1; } return 0; } /************************************************ * * TRANSMITTER SIDE FUNCTIONS * **************************************************/ int srslte_precoding_single(cf_t *x, cf_t *y, int nof_symbols) { memcpy(y, x, nof_symbols * sizeof(cf_t)); return nof_symbols; } int srslte_precoding_diversity(cf_t *x[SRSLTE_MAX_LAYERS], cf_t *y[SRSLTE_MAX_PORTS], int nof_ports, int nof_symbols) { int i; if (nof_ports == 2) { for (i = 0; i < nof_symbols; i++) { y[0][2 * i] = x[0][i]; y[1][2 * i] = -conjf(x[1][i]); y[0][2 * i + 1] = x[1][i]; y[1][2 * i + 1] = conjf(x[0][i]); } // normalize srslte_vec_sc_prod_cfc(y[0], 1.0/sqrtf(2), y[0], 2*nof_symbols); srslte_vec_sc_prod_cfc(y[1], 1.0/sqrtf(2), y[1], 2*nof_symbols); return 2 * i; } else if (nof_ports == 4) { //int m_ap = (nof_symbols%4)?(nof_symbols*4-2):nof_symbols*4; int m_ap = 4 * nof_symbols; for (i = 0; i < m_ap / 4; i++) { y[0][4 * i] = x[0][i] / sqrtf(2); y[1][4 * i] = 0; y[2][4 * i] = -conjf(x[1][i]) / sqrtf(2); y[3][4 * i] = 0; y[0][4 * i + 1] = x[1][i] / sqrtf(2); y[1][4 * i + 1] = 0; y[2][4 * i + 1] = conjf(x[0][i]) / sqrtf(2); y[3][4 * i + 1] = 0; y[0][4 * i + 2] = 0; y[1][4 * i + 2] = x[2][i] / sqrtf(2); y[2][4 * i + 2] = 0; y[3][4 * i + 2] = -conjf(x[3][i]) / sqrtf(2); y[0][4 * i + 3] = 0; y[1][4 * i + 3] = x[3][i] / sqrtf(2); y[2][4 * i + 3] = 0; y[3][4 * i + 3] = conjf(x[2][i]) / sqrtf(2); } return 4 * i; } else { fprintf(stderr, "Number of ports must be 2 or 4 for transmit diversity (nof_ports=%d)\n", nof_ports); return -1; } } /* 36.211 v10.3.0 Section 6.3.4 */ int srslte_precoding_type(cf_t *x[SRSLTE_MAX_LAYERS], cf_t *y[SRSLTE_MAX_PORTS], int nof_layers, int nof_ports, int nof_symbols, srslte_mimo_type_t type) { if (nof_ports > SRSLTE_MAX_PORTS) { fprintf(stderr, "Maximum number of ports is %d (nof_ports=%d)\n", SRSLTE_MAX_PORTS, nof_ports); return -1; } if (nof_layers > SRSLTE_MAX_LAYERS) { fprintf(stderr, "Maximum number of layers is %d (nof_layers=%d)\n", SRSLTE_MAX_LAYERS, nof_layers); return -1; } switch (type) { case SRSLTE_MIMO_TYPE_SINGLE_ANTENNA: if (nof_ports == 1 && nof_layers == 1) { return srslte_precoding_single(x[0], y[0], nof_symbols); } else { fprintf(stderr, "Number of ports and layers must be 1 for transmission on single antenna ports\n"); return -1; } break; case SRSLTE_MIMO_TYPE_TX_DIVERSITY: if (nof_ports == nof_layers) { return srslte_precoding_diversity(x, y, nof_ports, nof_symbols); } else { fprintf(stderr, "Error number of layers must equal number of ports in transmit diversity\n"); return -1; } case SRSLTE_MIMO_TYPE_SPATIAL_MULTIPLEX: fprintf(stderr, "Spatial multiplexing not supported\n"); return -1; } return 0; }