DL Wiener: Fixed minor header issues, format and optimization

master
Xavier Arteaga 5 years ago committed by Xavier Arteaga
parent 6248ab43e3
commit 5199aaa94e

@ -19,11 +19,12 @@
* *
*/ */
#ifndef SRSLTE_SRSLTE_WIENER_DL_H_ #ifndef SRSLTE_WIENER_DL_H_
#define SRSLTE_SRSLTE_WIENER_DL_H_ #define SRSLTE_WIENER_DL_H_
#include <srslte/config.h>
#include <srslte/phy/dft/dft.h>
#include <srslte/phy/utils/random.h> #include <srslte/phy/utils/random.h>
#include <srslte/srslte.h>
// Constant static parameters // Constant static parameters
#define SRSLTE_WIENER_DL_HLS_FIFO_SIZE (8U) #define SRSLTE_WIENER_DL_HLS_FIFO_SIZE (8U)
@ -107,7 +108,7 @@ typedef struct {
SRSLTE_API int SRSLTE_API int
srslte_wiener_dl_init(srslte_wiener_dl_t* q, uint32_t max_prb, uint32_t max_tx_ports, uint32_t max_rx_ant); srslte_wiener_dl_init(srslte_wiener_dl_t* q, uint32_t max_prb, uint32_t max_tx_ports, uint32_t max_rx_ant);
SRSLTE_API int srslte_wiener_dl_set_cell(srslte_wiener_dl_t* q, const srslte_cell_t* cell); SRSLTE_API int srslte_wiener_dl_set_cell(srslte_wiener_dl_t* q, srslte_cell_t cell);
SRSLTE_API void srslte_wiener_dl_reset(srslte_wiener_dl_t* q); SRSLTE_API void srslte_wiener_dl_reset(srslte_wiener_dl_t* q);
@ -122,4 +123,4 @@ SRSLTE_API int srslte_wiener_dl_run(srslte_wiener_dl_t* q,
SRSLTE_API void srslte_wiener_dl_free(srslte_wiener_dl_t* q); SRSLTE_API void srslte_wiener_dl_free(srslte_wiener_dl_t* q);
#endif // SRSLTE_SRSLTE_WIENER_DL_H_ #endif // SRSLTE_WIENER_DL_H_

@ -20,6 +20,7 @@
*/ */
#include <assert.h> #include <assert.h>
#include <srslte/phy/ch_estimation/wiener_dl.h>
#include <srslte/phy/utils/mat.h> #include <srslte/phy/utils/mat.h>
#include <srslte/srslte.h> #include <srslte/srslte.h>
@ -27,7 +28,10 @@
#define NSAMPLES2NBYTES(N) (sizeof(cf_t) * (N)) #define NSAMPLES2NBYTES(N) (sizeof(cf_t) * (N))
#define M_1_3 0.33333333333333333333f /* 1 / 3 */ #define M_1_3 0.33333333333333333333f /* 1 / 3 */
#define M_1_4 0.25f /* 1 / 4 */ #define M_1_4 0.25f /* 1 / 4 */
#define M_4_7 0.571428571f /* 4 / 7*/ #define M_4_7 0.571428571f /* 4 / 7 */
#define M_4_3 1.33333333333333333333f /* 4 / 3 */
#define M_5_3 1.66666666666666666666f /* 5 / 3 */
#define SRSLTE_WIENER_LOCAL
// Constants // Constants
const float hlsv_sum_norm[SRSLTE_WIENER_DL_MIN_RE] = { const float hlsv_sum_norm[SRSLTE_WIENER_DL_MIN_RE] = {
@ -37,15 +41,15 @@ const float hlsv_sum_norm[SRSLTE_WIENER_DL_MIN_RE] = {
4.0f, 3.66f, 3.33f, 3.0f, 2.66f, 2.33f, 2.0f, 1.66f, 1.33f, 1.0f, 0.66f, 0.33f}; 4.0f, 3.66f, 3.33f, 3.0f, 2.66f, 2.33f, 2.0f, 1.66f, 1.33f, 1.0f, 0.66f, 0.33f};
// Local state function prototypes // Local state function prototypes
static srslte_wiener_dl_state_t* srslte_wiener_dl_state_malloc(srslte_wiener_dl_t* q); SRSLTE_WIENER_LOCAL srslte_wiener_dl_state_t* srslte_wiener_dl_state_malloc(srslte_wiener_dl_t* q);
static void srslte_wiener_dl_state_free(srslte_wiener_dl_state_t* q); SRSLTE_WIENER_LOCAL void srslte_wiener_dl_state_free(srslte_wiener_dl_state_t* q);
static void srslte_wiener_dl_state_reset(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state); SRSLTE_WIENER_LOCAL void srslte_wiener_dl_state_reset(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state);
// Local run function prototypes // Local run function prototypes
static void SRSLTE_WIENER_LOCAL void
srslte_wiener_dl_run_symbol_1_8(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state, cf_t* pilots, float snr_lin); srslte_wiener_dl_run_symbol_1_8(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state, cf_t* pilots, float snr_lin);
static void srslte_wiener_dl_run_symbol_2_9(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state); SRSLTE_WIENER_LOCAL void srslte_wiener_dl_run_symbol_2_9(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state);
static void srslte_wiener_dl_run_symbol_5_12(srslte_wiener_dl_t* q, SRSLTE_WIENER_LOCAL void srslte_wiener_dl_run_symbol_5_12(srslte_wiener_dl_t* q,
srslte_wiener_dl_state_t* state, srslte_wiener_dl_state_t* state,
cf_t* pilots, cf_t* pilots,
uint32_t tx, uint32_t tx,
@ -54,7 +58,7 @@ static void srslte_wiener_dl_run_symbol_5_12(srslte_wiener_dl_t* q,
float snr_lin); float snr_lin);
// Local state related functions // Local state related functions
static srslte_wiener_dl_state_t* srslte_wiener_dl_state_malloc(srslte_wiener_dl_t* q) SRSLTE_WIENER_LOCAL srslte_wiener_dl_state_t* srslte_wiener_dl_state_malloc(srslte_wiener_dl_t* q)
{ {
// Allocate Channel state // Allocate Channel state
srslte_wiener_dl_state_t* state = calloc(sizeof(srslte_wiener_dl_state_t), 1); srslte_wiener_dl_state_t* state = calloc(sizeof(srslte_wiener_dl_state_t), 1);
@ -107,7 +111,7 @@ static srslte_wiener_dl_state_t* srslte_wiener_dl_state_malloc(srslte_wiener_dl_
} }
for (uint32_t i = 0; i < SRSLTE_WIENER_DL_CXFIFO_SIZE && !ret; i++) { for (uint32_t i = 0; i < SRSLTE_WIENER_DL_CXFIFO_SIZE && !ret; i++) {
state->cxfifo[i] = srslte_vec_malloc(NSAMPLES2NBYTES(SRSLTE_WIENER_DL_TFIFO_SIZE)); state->cxfifo[i] = srslte_vec_malloc(NSAMPLES2NBYTES(SRSLTE_WIENER_DL_MIN_RE));
if (!state->cxfifo[i]) { if (!state->cxfifo[i]) {
perror("malloc"); perror("malloc");
ret = SRSLTE_ERROR; ret = SRSLTE_ERROR;
@ -134,7 +138,7 @@ static srslte_wiener_dl_state_t* srslte_wiener_dl_state_malloc(srslte_wiener_dl_
return state; return state;
} }
static void srslte_wiener_dl_state_reset(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state) SRSLTE_WIENER_LOCAL void srslte_wiener_dl_state_reset(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state)
{ {
if (q && state) { if (q && state) {
// Initialise memory // Initialise memory
@ -146,7 +150,7 @@ static void srslte_wiener_dl_state_reset(srslte_wiener_dl_t* q, srslte_wiener_dl
bzero(state->tfifo[i], NSAMPLES2NBYTES(q->nof_re)); bzero(state->tfifo[i], NSAMPLES2NBYTES(q->nof_re));
} }
for (uint32_t i = 0; i < SRSLTE_WIENER_DL_XFIFO_SIZE; i++) { for (uint32_t i = 0; i < SRSLTE_WIENER_DL_XFIFO_SIZE; i++) {
bzero(state->xfifo, NSAMPLES2NBYTES(SRSLTE_WIENER_DL_MIN_RE)); bzero(state->xfifo[i], NSAMPLES2NBYTES(SRSLTE_WIENER_DL_MIN_RE));
} }
bzero(state->cV, NSAMPLES2NBYTES(SRSLTE_WIENER_DL_MIN_RE)); bzero(state->cV, NSAMPLES2NBYTES(SRSLTE_WIENER_DL_MIN_RE));
bzero(state->timefifo, NSAMPLES2NBYTES(SRSLTE_WIENER_DL_TIMEFIFO_SIZE)); bzero(state->timefifo, NSAMPLES2NBYTES(SRSLTE_WIENER_DL_TIMEFIFO_SIZE));
@ -165,7 +169,7 @@ static void srslte_wiener_dl_state_reset(srslte_wiener_dl_t* q, srslte_wiener_dl
} }
} }
static void srslte_wiener_dl_state_free(srslte_wiener_dl_state_t* q) SRSLTE_WIENER_LOCAL void srslte_wiener_dl_state_free(srslte_wiener_dl_state_t* q)
{ {
if (q) { if (q) {
@ -205,7 +209,7 @@ int srslte_wiener_dl_init(srslte_wiener_dl_t* q, uint32_t max_prb, uint32_t max_
{ {
int ret = SRSLTE_SUCCESS; int ret = SRSLTE_SUCCESS;
if (q && max_prb > SRSLTE_MAX_PRB && max_tx_ports > SRSLTE_MAX_PORTS && max_rx_ant > SRSLTE_MAX_PORTS) { if (q && max_prb <= SRSLTE_MAX_PRB && max_tx_ports <= SRSLTE_MAX_PORTS && max_rx_ant <= SRSLTE_MAX_PORTS) {
// Bzero structure // Bzero structure
bzero(q, sizeof(srslte_wiener_dl_t)); bzero(q, sizeof(srslte_wiener_dl_t));
@ -281,19 +285,19 @@ int srslte_wiener_dl_init(srslte_wiener_dl_t* q, uint32_t max_prb, uint32_t max_
return ret; return ret;
} }
int srslte_wiener_dl_set_cell(srslte_wiener_dl_t* q, const srslte_cell_t* cell) int srslte_wiener_dl_set_cell(srslte_wiener_dl_t* q, srslte_cell_t cell)
{ {
int ret = SRSLTE_ERROR_INVALID_INPUTS; int ret = SRSLTE_ERROR_INVALID_INPUTS;
if (q && cell) { if (q) {
// No invalid inputs // No invalid inputs
ret = SRSLTE_SUCCESS; ret = SRSLTE_SUCCESS;
// Set new values // Set new values
q->nof_prb = cell->nof_prb; q->nof_prb = cell.nof_prb;
q->nof_ref = cell->nof_prb * 2; q->nof_ref = cell.nof_prb * 2;
q->nof_re = cell->nof_prb * SRSLTE_NRE; q->nof_re = cell.nof_prb * SRSLTE_NRE;
q->nof_tx_ports = cell->nof_ports; q->nof_tx_ports = cell.nof_ports;
// Reset states // Reset states
srslte_wiener_dl_reset(q); srslte_wiener_dl_reset(q);
@ -320,7 +324,7 @@ void srslte_wiener_dl_reset(srslte_wiener_dl_t* q)
} }
} }
static void circshift_dim1(cf_t** matrix, uint32_t ndim1, int32_t k) SRSLTE_WIENER_LOCAL void circshift_dim1(cf_t** matrix, uint32_t ndim1, int32_t k)
{ {
// Check valid inputs // Check valid inputs
if (matrix != NULL && ndim1 != 0 && k != 0) { if (matrix != NULL && ndim1 != 0 && k != 0) {
@ -340,10 +344,12 @@ static void circshift_dim1(cf_t** matrix, uint32_t ndim1, int32_t k)
// Save last pointer // Save last pointer
matrix[ndim1 - 1] = tmp_ptr; matrix[ndim1 - 1] = tmp_ptr;
} }
} else {
ERROR("unattended circshift_dim1!");
} }
} }
static void circshift_dim2(cf_t** matrix, uint32_t ndim1, uint32_t ndim2, int32_t k) SRSLTE_WIENER_LOCAL void circshift_dim2(cf_t** matrix, uint32_t ndim1, uint32_t ndim2, int32_t k)
{ {
// Wrap k // Wrap k
k = (k + ndim1) % ndim1; k = (k + ndim1) % ndim1;
@ -365,10 +371,23 @@ static void circshift_dim2(cf_t** matrix, uint32_t ndim1, uint32_t ndim2, int32_
} }
} }
static void matrix_acc_dim1_cc(cf_t** matrix, cf_t* res, uint32_t ndim1, uint32_t ndim2) SRSLTE_WIENER_LOCAL void matrix_acc_dim1_cc(cf_t** matrix, cf_t* res, uint32_t ndim1, uint32_t ndim2)
{ {
int dim2 = 0;
#if SRSLTE_SIMD_CF_SIZE
for (; dim2 < ndim2 - SRSLTE_SIMD_CF_SIZE / 2 + 1; dim2 += SRSLTE_SIMD_CF_SIZE / 2) {
simd_f_t acc = srslte_simd_f_zero();
for (uint32_t dim1 = 0; dim1 < ndim1; dim1++) {
simd_f_t reg = srslte_simd_f_loadu((float*)&matrix[dim1][dim2]);
acc = srslte_simd_f_add(acc, reg);
}
srslte_simd_f_storeu((float*)&res[dim2], acc);
}
#endif
// Accumulate each column // Accumulate each column
for (uint32_t dim2 = 0; dim2 < ndim2; dim2++) { for (; dim2 < ndim2; dim2++) {
cf_t acc = 0.0f; cf_t acc = 0.0f;
for (uint32_t dim1 = 0; dim1 < ndim1; dim1++) { for (uint32_t dim1 = 0; dim1 < ndim1; dim1++) {
acc += matrix[dim1][dim2]; acc += matrix[dim1][dim2];
@ -385,7 +404,7 @@ static void matrix_acc_dim1_cc(cf_t** matrix, cf_t* res, uint32_t ndim1, uint32_
} }
}*/ }*/
static uint32_t vec_find_first_smaller_than_cf(cf_t* x, float y, uint32_t n, uint32_t pos) SRSLTE_WIENER_LOCAL uint32_t vec_find_first_smaller_than_cf(cf_t* x, float y, uint32_t n, uint32_t pos)
{ {
uint32_t ret = n; uint32_t ret = n;
@ -398,6 +417,42 @@ static uint32_t vec_find_first_smaller_than_cf(cf_t* x, float y, uint32_t n, uin
return ret; return ret;
} }
SRSLTE_WIENER_LOCAL cf_t _srslte_vec_dot_prod_ccc_simd(const cf_t* x, const cf_t* y, const int len)
{
int i = 0;
cf_t result = 0;
#if SRSLTE_SIMD_CF_SIZE
if (len >= SRSLTE_SIMD_CF_SIZE) {
simd_cf_t avx_result = srslte_simd_cf_zero();
for (; i < len - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
simd_cf_t xVal = srslte_simd_cfi_load(&x[i]);
simd_cf_t yVal = srslte_simd_cfi_load(&y[i]);
avx_result = srslte_simd_cf_add(srslte_simd_cf_prod(xVal, yVal), avx_result);
}
__attribute__((aligned(64))) float simd_dotProdVector[SRSLTE_SIMD_CF_SIZE];
simd_f_t acc_re = srslte_simd_cf_re(avx_result);
simd_f_t acc_im = srslte_simd_cf_im(avx_result);
simd_f_t acc = srslte_simd_f_hadd(acc_re, acc_im);
for (int j = 2; j < SRSLTE_SIMD_F_SIZE; j *= 2) {
acc = srslte_simd_f_hadd(acc, acc);
}
srslte_simd_f_store(simd_dotProdVector, acc);
__real__ result = simd_dotProdVector[0];
__imag__ result = simd_dotProdVector[1];
}
#endif
for (; i < len; i++) {
result += (x[i] * y[i]);
}
return result;
}
static void estimate_wiener(srslte_wiener_dl_t* q, static void estimate_wiener(srslte_wiener_dl_t* q,
const cf_t wm[SRSLTE_WIENER_DL_MIN_RE][SRSLTE_WIENER_DL_MIN_REF], const cf_t wm[SRSLTE_WIENER_DL_MIN_RE][SRSLTE_WIENER_DL_MIN_REF],
cf_t* ref, cf_t* ref,
@ -408,14 +463,14 @@ static void estimate_wiener(srslte_wiener_dl_t* q,
// Estimate lower band // Estimate lower band
for (uint32_t i = 0; i < SRSLTE_WIENER_DL_MIN_RE; i++) { for (uint32_t i = 0; i < SRSLTE_WIENER_DL_MIN_RE; i++) {
h[r_offset + i] = srslte_vec_dot_prod_ccc(&ref[p_offset], wm[i], SRSLTE_WIENER_DL_MIN_REF); h[r_offset + i] = _srslte_vec_dot_prod_ccc_simd(&ref[p_offset], wm[i], SRSLTE_WIENER_DL_MIN_REF);
} }
// Estimate Upper band (it might overlap in 6PRB cells with the lower band) // Estimate Upper band (it might overlap in 6PRB cells with the lower band)
r_offset = q->nof_re - SRSLTE_WIENER_DL_MIN_RE; r_offset = q->nof_re - SRSLTE_WIENER_DL_MIN_RE;
p_offset = q->nof_ref - SRSLTE_WIENER_DL_MIN_REF; p_offset = q->nof_ref - SRSLTE_WIENER_DL_MIN_REF;
for (uint32_t i = 0; i < SRSLTE_WIENER_DL_MIN_RE; i++) { for (uint32_t i = 0; i < SRSLTE_WIENER_DL_MIN_RE; i++) {
h[r_offset + i] = srslte_vec_dot_prod_ccc(&ref[p_offset], wm[i], SRSLTE_WIENER_DL_MIN_REF); h[r_offset + i] = _srslte_vec_dot_prod_ccc_simd(&ref[p_offset], wm[i], SRSLTE_WIENER_DL_MIN_REF);
} }
// Estimate center Resource elements // Estimate center Resource elements
@ -425,13 +480,13 @@ static void estimate_wiener(srslte_wiener_dl_t* q,
uint32_t ref_idx = prb * 2 - SRSLTE_WIENER_DL_MIN_REF / 2; uint32_t ref_idx = prb * 2 - SRSLTE_WIENER_DL_MIN_REF / 2;
uint32_t re_idx = prb * SRSLTE_NRE; uint32_t re_idx = prb * SRSLTE_NRE;
for (uint32_t i = SRSLTE_WIENER_DL_MIN_RE / 4; i < (3 * SRSLTE_WIENER_DL_MIN_RE) / 4; i++) { for (uint32_t i = SRSLTE_WIENER_DL_MIN_RE / 4; i < (3 * SRSLTE_WIENER_DL_MIN_RE) / 4; i++) {
h[re_idx + i] = srslte_vec_dot_prod_ccc(&ref[ref_idx], wm[i], SRSLTE_WIENER_DL_MIN_REF); h[re_idx + i] = _srslte_vec_dot_prod_ccc_simd(&ref[ref_idx], wm[i], SRSLTE_WIENER_DL_MIN_REF);
} }
} }
} }
} }
static void SRSLTE_WIENER_LOCAL void
srslte_wiener_dl_run_symbol_1_8(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state, cf_t* pilots, float snr_lin) srslte_wiener_dl_run_symbol_1_8(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state, cf_t* pilots, float snr_lin)
{ {
@ -460,7 +515,7 @@ srslte_wiener_dl_run_symbol_1_8(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t*
state->invtpilotoff = M_1_3; state->invtpilotoff = M_1_3;
} }
static void srslte_wiener_dl_run_symbol_2_9(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state) SRSLTE_WIENER_LOCAL void srslte_wiener_dl_run_symbol_2_9(srslte_wiener_dl_t* q, srslte_wiener_dl_state_t* state)
{ {
// here we only shift and feed TD interpolation fifo // here we only shift and feed TD interpolation fifo
@ -478,7 +533,7 @@ static void srslte_wiener_dl_run_symbol_2_9(srslte_wiener_dl_t* q, srslte_wiener
state->invtpilotoff = M_1_3; state->invtpilotoff = M_1_3;
} }
static void srslte_wiener_dl_run_symbol_5_12(srslte_wiener_dl_t* q, SRSLTE_WIENER_LOCAL void srslte_wiener_dl_run_symbol_5_12(srslte_wiener_dl_t* q,
srslte_wiener_dl_state_t* state, srslte_wiener_dl_state_t* state,
cf_t* pilots, cf_t* pilots,
uint32_t tx, uint32_t tx,
@ -556,10 +611,10 @@ static void srslte_wiener_dl_run_symbol_5_12(srslte_wiener_dl_t* q,
// Interpolate last edge // Interpolate last edge
state->cV[SRSLTE_WIENER_DL_MIN_RE - 2] = state->cV[SRSLTE_WIENER_DL_MIN_RE - 2] =
state->cV[SRSLTE_WIENER_DL_MIN_RE - 6] + state->cV[SRSLTE_WIENER_DL_MIN_RE - 6] +
(state->cV[SRSLTE_WIENER_DL_MIN_RE - 3] - state->cV[SRSLTE_WIENER_DL_MIN_RE - 6]) * 1.33333333f; (state->cV[SRSLTE_WIENER_DL_MIN_RE - 3] - state->cV[SRSLTE_WIENER_DL_MIN_RE - 6]) * M_4_3;
state->cV[SRSLTE_WIENER_DL_MIN_RE - 1] = state->cV[SRSLTE_WIENER_DL_MIN_RE - 1] =
state->cV[SRSLTE_WIENER_DL_MIN_RE - 6] + state->cV[SRSLTE_WIENER_DL_MIN_RE - 6] +
(state->cV[SRSLTE_WIENER_DL_MIN_RE - 3] - state->cV[SRSLTE_WIENER_DL_MIN_RE - 6]) * 1.66666666f; (state->cV[SRSLTE_WIENER_DL_MIN_RE - 3] - state->cV[SRSLTE_WIENER_DL_MIN_RE - 6]) * M_5_3;
if (tx == q->nof_tx_ports - 1 && rx == q->nof_rx_ant - 1) { if (tx == q->nof_tx_ports - 1 && rx == q->nof_rx_ant - 1) {
// Average correlation vectors // Average correlation vectors
@ -587,7 +642,7 @@ static void srslte_wiener_dl_run_symbol_5_12(srslte_wiener_dl_t* q,
} }
// Add noise contribution to the square wiener // Add noise contribution to the square wiener
float N = (__real__ q->acV[0] / SRSLTE_MIN(15, state->sumlen)); float N = (__real__ q->acV[0] / SRSLTE_MIN(15, snr_lin * state->sumlen));
for (uint32_t i = 0; i < SRSLTE_WIENER_DL_MIN_REF; i++) { for (uint32_t i = 0; i < SRSLTE_WIENER_DL_MIN_REF; i++) {
q->RH.m[i][i] += N; q->RH.m[i][i] += N;
} }
@ -659,7 +714,7 @@ int srslte_wiener_dl_run(srslte_wiener_dl_t* q,
srslte_wiener_dl_run_symbol_5_12(q, state, pilots, tx, rx, shift, snr_lin); srslte_wiener_dl_run_symbol_5_12(q, state, pilots, tx, rx, shift, snr_lin);
break; break;
default: default:
perror("unhandled switch-case"); /* Do nothing */;
} }
// Estimate // Estimate

Loading…
Cancel
Save