Added SIMD turbo rate recovery

master
ismagom 9 years ago
parent 88cd40420a
commit 9e685eca32

@ -0,0 +1,14 @@
function [ out ] = write_int16( filename, x)
%READ_COMPLEX Summary of this function goes here
% Detailed explanation goes here
[tidin msg]=fopen(filename,'w');
if (tidin==-1)
fprintf('error opening %s: %s\n',filename, msg);
out=[];
return
end
fwrite(tidin,x,'int16');
end

@ -0,0 +1,14 @@
function [ out ] = write_complex( filename, x)
%READ_COMPLEX Summary of this function goes here
% Detailed explanation goes here
[tidin msg]=fopen(filename,'w');
if (tidin==-1)
fprintf('error opening %s: %s\n',filename, msg);
out=[];
return
end
fwrite(tidin,x,'single');
end

@ -0,0 +1,14 @@
function [ out ] = write_uchar( filename, x)
%READ_COMPLEX Summary of this function goes here
% Detailed explanation goes here
[tidin msg]=fopen(filename,'w');
if (tidin==-1)
fprintf('error opening %s: %s\n',filename, msg);
out=[];
return
end
fwrite(tidin,x,'uint8');
end

@ -37,7 +37,7 @@
#include "srslte/utils/vector.h" #include "srslte/utils/vector.h"
#include "srslte/fec/cbsegm.h" #include "srslte/fec/cbsegm.h"
//#define HAVE_SIMD #define HAVE_SIMD
#ifdef HAVE_SIMD #ifdef HAVE_SIMD
#include <xmmintrin.h> #include <xmmintrin.h>
@ -321,45 +321,60 @@ int srslte_rm_turbo_rx_lut_simd(int16_t *input, int16_t *output, uint32_t in_len
const __m128i* xPtr = (const __m128i*) input; const __m128i* xPtr = (const __m128i*) input;
const __m128i* lutPtr = (const __m128i*) deinter; const __m128i* lutPtr = (const __m128i*) deinter;
printf("\nin_len=%d, out_len=%d\n", in_len, out_len);
srslte_vec_fprint_s(stdout, input, in_len);
__m128i xVal, lutVal; __m128i xVal, lutVal;
int intCnt = 8;
int nwrapps = 0; /* Simplify load if we do not need to wrap (ie high rates) */
for (int i=0;i<in_len/8-1-nwrapps/2;i++) { if (in_len <= out_len) {
xVal = _mm_loadu_si128(xPtr); for (int i=0;i<in_len/8;i++) {
lutVal = _mm_load_si128(lutPtr); xVal = _mm_load_si128(xPtr);
lutVal = _mm_load_si128(lutPtr);
for (int j=0;j<8;j++) {
int16_t x = (int16_t) _mm_extract_epi16(xVal, j); for (int j=0;j<8;j++) {
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); int16_t x = (int16_t) _mm_extract_epi16(xVal, j);
output[l] += x; uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j);
output[l] += x;
}
xPtr ++;
lutPtr ++;
} }
printf("x: "); print128_num(xVal); for (int i=8*(in_len/8);i<in_len;i++) {
printf("l: "); print128_num(lutVal); output[deinter[i%out_len]] += input[i];
xPtr ++; }
lutPtr ++; } else {
intCnt += 8; int intCnt = 8;
if (intCnt >= out_len) { int inputCnt = 0;
/* Copy last elements */ int nwrapps = 0;
for (int j=nwrapps*out_len+intCnt-8;j<(nwrapps+1)*out_len;j++) { while(inputCnt < in_len - 8) {
printf("coping element %d (in=%d)\n", j, input[j]); xVal = _mm_loadu_si128(xPtr);
output[deinter[j]] += input[j]; lutVal = _mm_load_si128(lutPtr);
for (int j=0;j<8;j++) {
int16_t x = (int16_t) _mm_extract_epi16(xVal, j);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j);
output[l] += x;
}
xPtr++;
lutPtr++;
intCnt += 8;
inputCnt += 8;
if (intCnt >= out_len && inputCnt < in_len - 8) {
/* Copy last elements */
for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) {
output[deinter[j%out_len]] += input[j];
inputCnt++;
}
/* And wrap pointers */
nwrapps++;
intCnt = 8;
xPtr = (const __m128i*) &input[nwrapps*out_len];
lutPtr = (const __m128i*) deinter;
} }
/* And wrap pointers */
nwrapps++;
printf("--- Wrapping: intCnt=%d, nwrap=%d\n",intCnt, nwrapps);
intCnt = 8;
xPtr = (const __m128i*) &input[nwrapps*out_len];
lutPtr = (const __m128i*) deinter;
} }
for (int i=inputCnt;i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
} }
for (int i=8*(in_len/8-1)+(((8*(in_len/8))%out_len)%8);i<in_len;i++) {
printf("copying i=%d, val=%d, t=%d\n",i, input[i],deinter[i%out_len]);
output[deinter[i%out_len]] += input[i];
}
return 0; return 0;
} else { } else {

@ -176,7 +176,7 @@ int main(int argc, char **argv) {
printf("OK TX..."); printf("OK TX...");
for (int i=0;i<nof_e_bits;i++) { for (int i=0;i<nof_e_bits;i++) {
rm_bits_f[i] = i;//rand()%10-5; rm_bits_f[i] = rand()%10-5;
rm_bits_s[i] = (short) rm_bits_f[i]; rm_bits_s[i] = (short) rm_bits_f[i];
} }

Loading…
Cancel
Save