2rx equalizer working in matlab in gen/sse

master
Ismael Gomez 8 years ago
parent c8bc53e749
commit 5fcb95d54d

@ -1,8 +1,9 @@
clear
addpath('../../build/srslte/lib/mimo/test')
addpath('../../debug/srslte/lib/mimo/test')
enb = lteRMCDL('R.10');
%enb = lteRMCDL('R.10'); % 2-ports
enb = lteRMCDL('R.0'); % 1-ports
cec = struct('FreqWindow',9,'TimeWindow',9,'InterpType','cubic');
cec.PilotAverage = 'UserDefined';
@ -10,7 +11,7 @@ cec.InterpWinSize = 1;
cec.InterpWindow = 'Causal';
cfg.Seed = 1; % Random channel seed
cfg.NRxAnts = 2; % 1 receive antenna
cfg.NRxAnts = 1; % 1 receive antenna
cfg.DelayProfile = 'ETU'; % EVA delay spread
cfg.DopplerFreq = 100; % 120Hz Doppler frequency
cfg.MIMOCorrelation = 'Low'; % Low (no) MIMO correlation
@ -22,10 +23,9 @@ cfg.NormalizePathGains = 'On'; % Normalize delay profile power
cfg.NormalizeTxAnts = 'On'; % Normalize for transmit antennas
[txWaveform, ~, info] = lteRMCDLTool(enb,[1;0;0;1]);
n = length(txWaveform);
cfg.SamplingRate = info.SamplingRate;
txWaveform = txWaveform+complex(randn(n,2),randn(n,2))*1e-3;
txWaveform = txWaveform+complex(randn(size(txWaveform)),randn(size(txWaveform)))*1e-3;
rxWaveform = lteFadingChannel(cfg,txWaveform);
@ -35,16 +35,36 @@ rxGrid = lteOFDMDemodulate(enb,rxWaveform);
s=size(h);
p=s(1);
Nt=s(4);
n=s(2);
if (length(s)>2)
Nr=s(3);
else
Nr=1;
end
if (length(s)>3)
Nt=s(4);
else
Nt=1;
end
rx=reshape(rxGrid(:,1,:),p,Nr);
hp=reshape(h(:,1,:,:),p,Nr,Nt);
if (Nr > 1)
rx=reshape(rxGrid,p,n,Nr);
hp=reshape(h,p,n,Nr,Nt);
else
rx=rxGrid;
hp=h;
end
if (Nt > 1)
output_mat = lteTransmitDiversityDecode(rx, hp);
output_srs = srslte_diversitydecode(rx, hp);
else
output_mat = lteEqualizeMMSE(rx, hp, n0);
end
output_srs = srslte_diversitydecode(rx, hp, n0);
plot(abs(output_mat-output_srs))
mean(abs(output_mat-output_srs).^2)
plot(abs(output_mat(:)-output_srs(:)))
mean(abs(output_mat(:)-output_srs(:)).^2)
t=1:10;
plot(t,real(output_mat(t)),t,real(output_srs(t)))

@ -71,8 +71,8 @@ int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
__m128 noise = _mm_set1_ps(noise_estimate);
__m128 h1Val1, h2Val1, y1Val1, y2Val1;
__m128 h1Val2, h2Val2, y1Val2, y2Val2;
__m128 h12square1, h1square1, h2square1, h1conj1, h2conj1, x1Val1, x2Val1;
__m128 h12square2, h1square2, h2square2, h1conj2, h2conj2, x1Val2, x2Val2;
__m128 hsquare, h1square, h2square, h1conj1, h2conj1, x1Val1, x2Val1;
__m128 hsquare2, h1conj2, h2conj2, x1Val2, x2Val2;
for (int i=0;i<nof_symbols/4;i++) {
y1Val1 = _mm_load_ps(yPtr1); yPtr1+=4;
@ -87,25 +87,17 @@ int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
h2Val2 = _mm_load_ps(hPtr2); hPtr2+=4;
}
h12square1 = _mm_hadd_ps(_mm_mul_ps(h1Val1, h1Val1), _mm_mul_ps(h2Val1, h2Val1));
hsquare = _mm_hadd_ps(_mm_mul_ps(h1Val1, h1Val1), _mm_mul_ps(h2Val1, h2Val1));
if (nof_rxant == 2) {
h12square2 = _mm_hadd_ps(_mm_mul_ps(h1Val2, h1Val2), _mm_mul_ps(h2Val2, h2Val2));
h12square1 = _mm_add_ps(h12square1, h12square2);
hsquare2 = _mm_hadd_ps(_mm_mul_ps(h1Val2, h1Val2), _mm_mul_ps(h2Val2, h2Val2));
hsquare = _mm_add_ps(hsquare, hsquare2);
}
if (noise_estimate > 0) {
h12square1 = _mm_add_ps(h12square1, noise);
hsquare = _mm_add_ps(hsquare, noise);
}
h1square1 = _mm_shuffle_ps(h12square1, h12square1, _MM_SHUFFLE(1, 1, 0, 0));
h2square1 = _mm_shuffle_ps(h12square1, h12square1, _MM_SHUFFLE(3, 3, 2, 2));
if (nof_rxant == 2) {
h1square2 = _mm_shuffle_ps(h12square2, h12square2, _MM_SHUFFLE(1, 1, 0, 0));
h2square2 = _mm_shuffle_ps(h12square2, h12square2, _MM_SHUFFLE(3, 3, 2, 2));
h1square1 = _mm_add_ps(h1square1, h1square2);
h2square1 = _mm_add_ps(h2square1, h2square2);
}
h1square = _mm_shuffle_ps(hsquare, hsquare, _MM_SHUFFLE(1, 1, 0, 0));
h2square = _mm_shuffle_ps(hsquare, hsquare, _MM_SHUFFLE(3, 3, 2, 2));
/* Conjugate channel */
h1conj1 = _mm_xor_ps(h1Val1, conjugator);
@ -123,16 +115,25 @@ int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
if (nof_rxant == 2) {
x1Val2 = PROD(y1Val2, h1conj2);
x2Val2 = PROD(y2Val2, h2conj2);
x1Val1 = _mm_add_ps(x1Val1, x1Val2);
x2Val1 = _mm_add_ps(x2Val1, x2Val2);
}
x1Val1 = _mm_div_ps(x1Val1, h1square1);
x2Val1 = _mm_div_ps(x2Val1, h2square1);
x1Val1 = _mm_div_ps(x1Val1, h1square);
x2Val1 = _mm_div_ps(x2Val1, h2square);
_mm_store_ps(xPtr, x1Val1); xPtr+=4;
_mm_store_ps(xPtr, x2Val1); xPtr+=4;
_mm_store_ps(xPtr, x1Val); xPtr+=4;
_mm_store_ps(xPtr, x2Val); xPtr+=4;
}
for (int i=8*(nof_symbols/8);i<nof_symbols;i++) {
x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
cf_t r = 0;
cf_t hh = 0;
for (int p=0;p<nof_rxant;p++) {
r += y[p][i]*conj(h[p][i]);
hh += conj(h[p][i])*h[p][i];
}
x[i] = r/(hh+noise_estimate);
}
return nof_symbols;
}
@ -156,6 +157,8 @@ int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
__m256 noise = _mm256_set1_ps(noise_estimate);
__m256 h1Val, h2Val, y1Val, y2Val, h12square, h1square, h2square, h1_p, h2_p, h1conj, h2conj, x1Val, x2Val;
printf("using avx\n");
for (int i=0;i<nof_symbols/8;i++) {
y1Val = _mm256_load_ps(yPtr); yPtr+=8;
y2Val = _mm256_load_ps(yPtr); yPtr+=8;
@ -188,7 +191,13 @@ int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
_mm256_store_ps(xPtr, x2Val); xPtr+=8;
}
for (int i=16*(nof_symbols/16);i<nof_symbols;i++) {
x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
cf_t r = 0;
cf_t hh = 0;
for (int p=0;p<nof_rxant;p++) {
r += y[p][i]*conj(h[p][i]);
hh += conj(h[p][i])*h[p][i];
}
x[i] = r/(hh+noise_estimate);
}
return nof_symbols;
}
@ -219,16 +228,16 @@ int srslte_predecoding_single(cf_t *y_, cf_t *h_, cf_t *x, int nof_symbols, floa
#ifdef LV_HAVE_AVX
if (nof_symbols > 32) {
return srslte_predecoding_single_avx(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_avx(y, h, x, nof_rxant, nof_symbols, noise_estimate);
} else {
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
}
#else
#ifdef LV_HAVE_SSE
if (nof_symbols > 32) {
return srslte_predecoding_single_sse(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_sse(y, h, x, nof_rxant, nof_symbols, noise_estimate);
} else {
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
}
#else
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
@ -240,16 +249,16 @@ int srslte_predecoding_single(cf_t *y_, cf_t *h_, cf_t *x, int nof_symbols, floa
int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) {
#ifdef LV_HAVE_AVX
if (nof_symbols > 32) {
return srslte_predecoding_single_avx(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_avx(y, h, x, nof_rxant, nof_symbols, noise_estimate);
} else {
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
}
#else
#ifdef LV_HAVE_SSE
if (nof_symbols > 32) {
return srslte_predecoding_single_sse(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_sse(y, h, x, nof_rxant, nof_symbols, noise_estimate);
} else {
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
}
#else
return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);

@ -33,6 +33,7 @@
#define INPUT prhs[0]
#define HEST prhs[1]
#define NEST prhs[2]
#define NOF_INPUTS 2
@ -56,43 +57,69 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
}
// Read input symbols
nof_symbols = mexutils_read_cf(INPUT, &input);
if (nof_symbols < 0) {
if (mexutils_read_cf(INPUT, &input) < 0) {
mexErrMsgTxt("Error reading input\n");
return;
}
uint32_t nof_tx_ports = 1;
uint32_t nof_rx_ants = 1;
const mwSize *dims = mxGetDimensions(INPUT);
mwSize ndims = mxGetNumberOfDimensions(INPUT);
nof_symbols = dims[0]*dims[1];
if (ndims >= 3) {
nof_rx_ants = dims[2];
}
if (ndims >= 4) {
nof_tx_ports = dims[3];
}
// Read channel estimates
uint32_t nof_symbols2 = mexutils_read_cf(HEST, &hest);
if (nof_symbols < 0) {
if (mexutils_read_cf(HEST, &hest) < 0) {
mexErrMsgTxt("Error reading hest\n");
return;
}
if ((nof_symbols2 % nof_symbols) != 0) {
mexErrMsgTxt("Hest size must be multiple of input size\n");
return;
// Read noise estimate
float noise_estimate = 0;
if (nrhs >= NOF_INPUTS) {
noise_estimate = mxGetScalar(NEST);
}
// Calculate number of ports
uint32_t nof_ports = nof_symbols2/nof_symbols;
cf_t *x[8];
cf_t *h[4];
cf_t *x[SRSLTE_MAX_LAYERS];
cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_RXANT];
cf_t *y[SRSLTE_MAX_RXANT];
for (int i=0;i<SRSLTE_MAX_LAYERS;i++) {
x[i] = NULL;
}
for (int i=0;i<SRSLTE_MAX_PORTS;i++) {
for (int j=0;j<SRSLTE_MAX_RXANT;j++) {
h[i][j] = NULL;
}
}
/* Allocate memory */
output = srslte_vec_malloc(sizeof(cf_t)*nof_symbols);
int i;
for (i = 0; i < nof_ports; i++) {
for (int i = 0; i < nof_tx_ports; i++) {
x[i] = srslte_vec_malloc(sizeof(cf_t)*nof_symbols);
h[i] = &hest[i*nof_symbols];
for (int j=0;j<nof_rx_ants;j++) {
h[i][j] = &hest[i*nof_symbols*nof_rx_ants + j*nof_symbols];
}
for (;i<8;i++) {
x[i] = NULL;
}
for (i=nof_ports;i<4;i++) {
h[i] = NULL;
for (int j=0;j<nof_rx_ants;j++) {
y[j] = &input[j*nof_symbols];
}
srslte_predecoding_diversity(input, h, x, nof_ports, nof_symbols);
srslte_layerdemap_diversity(x, output, nof_ports, nof_symbols / nof_ports);
mexPrintf("nof_tx_ports=%d, nof_rx_ants=%d, nof_symbols=%d\n", nof_tx_ports, nof_rx_ants, nof_symbols);
if (nof_tx_ports > 1) {
//srslte_predecoding_diversity(input, h, x, nof_tx_ports, nof_symbols);
//srslte_layerdemap_diversity(x, output, nof_tx_ports, nof_symbols / nof_tx_ports);
} else {
srslte_predecoding_single_multi(y, h[0], output, nof_rx_ants, nof_symbols, noise_estimate);
}
if (nlhs >= 1) {
@ -105,7 +132,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
if (output) {
free(output);
}
for (i=0;i<8;i++) {
for (int i=0;i<SRSLTE_MAX_LAYERS;i++) {
if (x[i]) {
free(x[i]);
}

Loading…
Cancel
Save