2rx equalizer working in matlab in gen/sse

8 years ago · 5fcb95d54d
parent c8bc53e749
commit 5fcb95d54d
3 changed files with 124 additions and 68 deletions
--- a/matlab/tests/diversity_decode_test.m
+++ b/matlab/tests/diversity_decode_test.m
@ -1,8 +1,9 @@
 clear

-addpath('../../build/srslte/lib/mimo/test')
+addpath('../../debug/srslte/lib/mimo/test')

-enb = lteRMCDL('R.10');
+%enb = lteRMCDL('R.10'); % 2-ports
+enb = lteRMCDL('R.0'); % 1-ports

 cec = struct('FreqWindow',9,'TimeWindow',9,'InterpType','cubic');
 cec.PilotAverage = 'UserDefined';
@ -10,7 +11,7 @@ cec.InterpWinSize = 1;
 cec.InterpWindow = 'Causal';

 cfg.Seed = 1;                  % Random channel seed
-cfg.NRxAnts = 2;               % 1 receive antenna
+cfg.NRxAnts = 1;               % 1 receive antenna
 cfg.DelayProfile = 'ETU';      % EVA delay spread
 cfg.DopplerFreq = 100;           % 120Hz Doppler frequency
 cfg.MIMOCorrelation = 'Low';   % Low (no) MIMO correlation
@ -22,10 +23,9 @@ cfg.NormalizePathGains = 'On'; % Normalize delay profile power
 cfg.NormalizeTxAnts = 'On';    % Normalize for transmit antennas

 [txWaveform, ~, info] = lteRMCDLTool(enb,[1;0;0;1]);
-n = length(txWaveform);
 cfg.SamplingRate = info.SamplingRate;

-txWaveform = txWaveform+complex(randn(n,2),randn(n,2))*1e-3;
+txWaveform = txWaveform+complex(randn(size(txWaveform)),randn(size(txWaveform)))*1e-3;

 rxWaveform = lteFadingChannel(cfg,txWaveform);

@ -35,16 +35,36 @@ rxGrid = lteOFDMDemodulate(enb,rxWaveform);

 s=size(h);
 p=s(1);
-Nt=s(4);
+n=s(2);
+if (length(s)>2)
    Nr=s(3);
+else
+    Nr=1;
+end
+if (length(s)>3)
+    Nt=s(4);
+else
+    Nt=1;
+end

-rx=reshape(rxGrid(:,1,:),p,Nr);
-hp=reshape(h(:,1,:,:),p,Nr,Nt);
+if (Nr > 1)
+    rx=reshape(rxGrid,p,n,Nr);
+    hp=reshape(h,p,n,Nr,Nt);    
+else
+    rx=rxGrid;
+    hp=h;
+end

+if (Nt > 1) 
    output_mat = lteTransmitDiversityDecode(rx, hp); 
-output_srs = srslte_diversitydecode(rx, hp);
+else
+    output_mat = lteEqualizeMMSE(rx, hp, n0); 
+end
+output_srs = srslte_diversitydecode(rx, hp, n0);

-plot(abs(output_mat-output_srs))
-mean(abs(output_mat-output_srs).^2)
+plot(abs(output_mat(:)-output_srs(:)))
+mean(abs(output_mat(:)-output_srs(:)).^2)

+t=1:10;
+plot(t,real(output_mat(t)),t,real(output_srs(t)))

--- a/srslte/lib/mimo/precoding.c
+++ b/srslte/lib/mimo/precoding.c
@ -71,8 +71,8 @@ int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
  __m128 noise = _mm_set1_ps(noise_estimate);
  __m128 h1Val1, h2Val1, y1Val1, y2Val1;
  __m128 h1Val2, h2Val2, y1Val2, y2Val2;
-  __m128 h12square1, h1square1, h2square1, h1conj1, h2conj1, x1Val1, x2Val1;
-  __m128 h12square2, h1square2, h2square2, h1conj2, h2conj2, x1Val2, x2Val2;
+  __m128 hsquare, h1square, h2square, h1conj1, h2conj1, x1Val1, x2Val1;
+  __m128 hsquare2, h1conj2, h2conj2, x1Val2, x2Val2;

  for (int i=0;i<nof_symbols/4;i++) {
    y1Val1 = _mm_load_ps(yPtr1); yPtr1+=4;
@ -87,25 +87,17 @@ int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
      h2Val2 = _mm_load_ps(hPtr2); hPtr2+=4;      
    }
    
-    h12square1 = _mm_hadd_ps(_mm_mul_ps(h1Val1, h1Val1), _mm_mul_ps(h2Val1, h2Val1)); 
+    hsquare = _mm_hadd_ps(_mm_mul_ps(h1Val1, h1Val1), _mm_mul_ps(h2Val1, h2Val1)); 
    if (nof_rxant == 2) {
-      h12square2 = _mm_hadd_ps(_mm_mul_ps(h1Val2, h1Val2), _mm_mul_ps(h2Val2, h2Val2)); 
-      h12square1 = _mm_add_ps(h12square1, h12square2);
+      hsquare2 = _mm_hadd_ps(_mm_mul_ps(h1Val2, h1Val2), _mm_mul_ps(h2Val2, h2Val2)); 
+      hsquare = _mm_add_ps(hsquare, hsquare2);
    }
    if (noise_estimate > 0) {
-      h12square1  = _mm_add_ps(h12square1, noise);
+      hsquare  = _mm_add_ps(hsquare, noise);
    }
    
-    h1square1  = _mm_shuffle_ps(h12square1, h12square1, _MM_SHUFFLE(1, 1, 0, 0));
-    h2square1  = _mm_shuffle_ps(h12square1, h12square1, _MM_SHUFFLE(3, 3, 2, 2));
-    
-    if (nof_rxant == 2) {
-      h1square2  = _mm_shuffle_ps(h12square2, h12square2, _MM_SHUFFLE(1, 1, 0, 0));
-      h2square2  = _mm_shuffle_ps(h12square2, h12square2, _MM_SHUFFLE(3, 3, 2, 2));
-      
-      h1square1  = _mm_add_ps(h1square1, h1square2);
-      h2square1  = _mm_add_ps(h2square1, h2square2);
-    }
+    h1square  = _mm_shuffle_ps(hsquare, hsquare, _MM_SHUFFLE(1, 1, 0, 0));
+    h2square  = _mm_shuffle_ps(hsquare, hsquare, _MM_SHUFFLE(3, 3, 2, 2));
    
    /* Conjugate channel */
    h1conj1 = _mm_xor_ps(h1Val1, conjugator); 
@ -123,16 +115,25 @@ int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
    if (nof_rxant == 2) {
      x1Val2 = PROD(y1Val2, h1conj2);
      x2Val2 = PROD(y2Val2, h2conj2);
+      x1Val1 = _mm_add_ps(x1Val1, x1Val2);
+      x2Val1 = _mm_add_ps(x2Val1, x2Val2);
    }
    
-    x1Val1 = _mm_div_ps(x1Val1, h1square1);
-    x2Val1 = _mm_div_ps(x2Val1, h2square1);
+    x1Val1 = _mm_div_ps(x1Val1, h1square);
+    x2Val1 = _mm_div_ps(x2Val1, h2square);
+    
+    _mm_store_ps(xPtr, x1Val1); xPtr+=4;
+    _mm_store_ps(xPtr, x2Val1); xPtr+=4;
    
-    _mm_store_ps(xPtr, x1Val); xPtr+=4;
-    _mm_store_ps(xPtr, x2Val); xPtr+=4;
  }
  for (int i=8*(nof_symbols/8);i<nof_symbols;i++) {
-    x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
+    cf_t r  = 0; 
+    cf_t hh = 0; 
+    for (int p=0;p<nof_rxant;p++) {
+      r  += y[p][i]*conj(h[p][i]);
+      hh += conj(h[p][i])*h[p][i];
+    }
+    x[i] = r/(hh+noise_estimate);
  }
  return nof_symbols;
 }
@ -156,6 +157,8 @@ int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
  __m256 noise = _mm256_set1_ps(noise_estimate);
  __m256 h1Val, h2Val, y1Val, y2Val, h12square, h1square, h2square, h1_p, h2_p, h1conj, h2conj, x1Val, x2Val;

+  printf("using avx\n");
+    
  for (int i=0;i<nof_symbols/8;i++) {
    y1Val = _mm256_load_ps(yPtr); yPtr+=8;
    y2Val = _mm256_load_ps(yPtr); yPtr+=8;
@ -188,7 +191,13 @@ int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_
    _mm256_store_ps(xPtr, x2Val); xPtr+=8;
  }
  for (int i=16*(nof_symbols/16);i<nof_symbols;i++) {
-    x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
+    cf_t r  = 0; 
+    cf_t hh = 0; 
+    for (int p=0;p<nof_rxant;p++) {
+      r  += y[p][i]*conj(h[p][i]);
+      hh += conj(h[p][i])*h[p][i];
+    }
+    x[i] = r/(hh+noise_estimate);
  }
  return nof_symbols;
 }
@ -219,16 +228,16 @@ int srslte_predecoding_single(cf_t *y_, cf_t *h_, cf_t *x, int nof_symbols, floa
  
 #ifdef LV_HAVE_AVX
  if (nof_symbols > 32) {
-    return srslte_predecoding_single_avx(y, h, x, nof_symbols, noise_estimate);
+    return srslte_predecoding_single_avx(y, h, x, nof_rxant, nof_symbols, noise_estimate);
  } else {
-    return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
+    return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
  }
 #else
  #ifdef LV_HAVE_SSE
    if (nof_symbols > 32) {
-      return srslte_predecoding_single_sse(y, h, x, nof_symbols, noise_estimate);
+      return srslte_predecoding_single_sse(y, h, x, nof_rxant, nof_symbols, noise_estimate);
    } else {
-      return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);      
+      return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);      
    }
  #else
    return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
@ -240,16 +249,16 @@ int srslte_predecoding_single(cf_t *y_, cf_t *h_, cf_t *x, int nof_symbols, floa
 int srslte_predecoding_single_multi(cf_t *y[SRSLTE_MAX_RXANT], cf_t *h[SRSLTE_MAX_RXANT], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate) {
 #ifdef LV_HAVE_AVX
  if (nof_symbols > 32) {
-    return srslte_predecoding_single_avx(y, h, x, nof_symbols, noise_estimate);
+    return srslte_predecoding_single_avx(y, h, x, nof_rxant, nof_symbols, noise_estimate);
  } else {
-    return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
+    return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
  }
 #else
  #ifdef LV_HAVE_SSE
    if (nof_symbols > 32) {
-      return srslte_predecoding_single_sse(y, h, x, nof_symbols, noise_estimate);
+      return srslte_predecoding_single_sse(y, h, x, nof_rxant, nof_symbols, noise_estimate);
    } else {
-      return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);      
+      return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);      
    }
  #else
    return srslte_predecoding_single_gen(y, h, x, nof_rxant, nof_symbols, noise_estimate);
--- a/srslte/lib/mimo/test/diversitydecode_mex.c
+++ b/srslte/lib/mimo/test/diversitydecode_mex.c
@ -33,6 +33,7 @@

 #define INPUT prhs[0]
 #define HEST  prhs[1]
+#define NEST  prhs[2]
 #define NOF_INPUTS 2


@ -56,43 +57,69 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  }
  
  // Read input symbols
-  nof_symbols = mexutils_read_cf(INPUT, &input);
-  if (nof_symbols < 0) {
+  if (mexutils_read_cf(INPUT, &input) < 0) {
    mexErrMsgTxt("Error reading input\n");
    return; 
  }
+  uint32_t nof_tx_ports = 1; 
+  uint32_t nof_rx_ants  = 1; 
+  const mwSize *dims = mxGetDimensions(INPUT);
+  mwSize ndims = mxGetNumberOfDimensions(INPUT);
+  nof_symbols = dims[0]*dims[1];
+  
+  if (ndims >= 3) {
+    nof_rx_ants = dims[2];
+  }
+  if (ndims >= 4) {
+    nof_tx_ports = dims[3];
+  }
+  
  // Read channel estimates
-  uint32_t nof_symbols2 = mexutils_read_cf(HEST, &hest);
-  if (nof_symbols < 0) {
+  if (mexutils_read_cf(HEST, &hest) < 0) {
    mexErrMsgTxt("Error reading hest\n");
    return; 
  }
-  if ((nof_symbols2 % nof_symbols) != 0) {
-    mexErrMsgTxt("Hest size must be multiple of input size\n");
-    return; 
+  
+  // Read noise estimate
+  float noise_estimate = 0; 
+  if (nrhs >= NOF_INPUTS) {
+    noise_estimate = mxGetScalar(NEST);
  }
-  // Calculate number of ports
-  uint32_t nof_ports = nof_symbols2/nof_symbols; 
  
-  cf_t *x[8]; 
-  cf_t *h[4];
+  cf_t *x[SRSLTE_MAX_LAYERS]; 
+  cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_RXANT];
+  cf_t *y[SRSLTE_MAX_RXANT];
+
+  for (int i=0;i<SRSLTE_MAX_LAYERS;i++) {
+    x[i] = NULL; 
+  }
+  for (int i=0;i<SRSLTE_MAX_PORTS;i++) {
+    for (int j=0;j<SRSLTE_MAX_RXANT;j++) {
+      h[i][j] = NULL; 
+    }
+  }
  
  /* Allocate memory */
  output = srslte_vec_malloc(sizeof(cf_t)*nof_symbols);
-  int i;
-  for (i = 0; i < nof_ports; i++) {
+  for (int i = 0; i < nof_tx_ports; i++) {
    x[i] = srslte_vec_malloc(sizeof(cf_t)*nof_symbols);
-    h[i] = &hest[i*nof_symbols];
+    for (int j=0;j<nof_rx_ants;j++) {
+      h[i][j] = &hest[i*nof_symbols*nof_rx_ants + j*nof_symbols];
    }
-  for (;i<8;i++) {
-    x[i] = NULL; 
  }
-  for (i=nof_ports;i<4;i++) {
-    h[i] = NULL; 
+  
+  for (int j=0;j<nof_rx_ants;j++) {
+    y[j] = &input[j*nof_symbols];
  }
  
-  srslte_predecoding_diversity(input, h, x, nof_ports, nof_symbols); 
-  srslte_layerdemap_diversity(x, output, nof_ports, nof_symbols / nof_ports);
+  mexPrintf("nof_tx_ports=%d, nof_rx_ants=%d, nof_symbols=%d\n", nof_tx_ports, nof_rx_ants, nof_symbols);
+  
+  if (nof_tx_ports > 1) {
+    //srslte_predecoding_diversity(input, h, x, nof_tx_ports, nof_symbols); 
+    //srslte_layerdemap_diversity(x, output, nof_tx_ports, nof_symbols / nof_tx_ports);
+  } else {
+    srslte_predecoding_single_multi(y, h[0], output, nof_rx_ants, nof_symbols, noise_estimate);
+  }
  

  if (nlhs >= 1) { 
@ -105,7 +132,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
  if (output) {
    free(output);
  }
-  for (i=0;i<8;i++) {
+  for (int i=0;i<SRSLTE_MAX_LAYERS;i++) {
    if (x[i]) {
      free(x[i]);      
    }