added more functions to simd for UE

master
Ismael Gomez 8 years ago
parent ab0d946a68
commit 9acb1002e9

@ -53,11 +53,15 @@ SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len);
SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *h, float *z, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len);
@ -65,6 +69,10 @@ SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t
SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len);
SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len);

@ -44,7 +44,7 @@ int srslte_vec_acc_ii(int *x, uint32_t len) {
return z;
}
// Used in PRACH detector
// Used in PRACH detector, AGC and chest_dl for noise averaging
float srslte_vec_acc_ff(float *x, uint32_t len) {
int i;
float z=0;
@ -79,10 +79,14 @@ void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t
}
void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]-y[i];
}
#else
srslte_vec_sub_fff_simd(x, y, z, len);
#endif
}
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
@ -96,6 +100,7 @@ void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#endif
}
// Noise estimation in chest_dl, interpolation
void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) {
return srslte_vec_sub_fff((float*) x,(float*) y,(float*) z, 2*len);
}
@ -161,12 +166,16 @@ void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len) {
z[i] += h;
}
}
// PSS, PBCH, DEMOD, FFTW, etc.
void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len) {
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#else
srslte_vec_sc_prod_fff_simd(x, h, z, len);
#endif
}
void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
@ -490,8 +499,9 @@ void srslte_vec_prod_conj_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#endif
}
#define DIV_USE_VEC
//#define DIV_USE_VEC
// Used in SSS
/* Complex division is conjugate multiplication + real division */
void srslte_vec_div_ccc(cf_t *x, cf_t *y, float *y_mod, cf_t *z, float *z_real, float *z_imag, uint32_t len) {
#ifdef DIV_USE_VEC
@ -528,16 +538,21 @@ void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len) {
}
}
// PSS. convolution
cf_t srslte_vec_dot_prod_ccc(cf_t *x, cf_t *y, uint32_t len) {
#ifndef LV_HAVE_SSE
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#else
return srslte_vec_dot_prod_ccc_simd(x, y, len);
#endif
}
// Convolution filter
// Convolution filter and in SSS search
cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len) {
uint32_t i;
cf_t res = 0;
@ -547,13 +562,19 @@ cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len) {
return res;
}
// SYNC
cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) {
#ifndef LV_HAVE_SSE
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*conjf(y[i]);
res += x[i]*y[i];
}
return res;
#else
return srslte_vec_dot_prod_conj_ccc_simd(x, y, len);
#endif
}
// PHICH
@ -583,7 +604,7 @@ float srslte_vec_avg_power_cf(cf_t *x, uint32_t len) {
return crealf(srslte_vec_dot_prod_conj_ccc(x,x,len)) / len;
}
// PSS
// PSS (disabled and using abs_square )
void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len) {
int i;
for (i=0;i<len;i++) {

@ -314,6 +314,38 @@ void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) {
#endif
}
void srslte_vec_sub_fff_simd(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 4;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
float* zPtr = (float*) z;
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_sub_ps(xVal, yVal);
_mm_storeu_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
number = points * 4;
for(;number < len; number++){
z[number] = x[number] + y[number];
}
#endif
}
#ifdef LV_HAVE_SSE
static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) {
__m128 yl, yh, tmp1, tmp2;
@ -326,6 +358,97 @@ static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) {
}
#endif
#ifdef LV_HAVE_SSE
static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) {
const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
y = _mm_xor_ps(y, conjugator);
return _mm_complexmul_ps(x, y);
}
#endif
cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len)
{
cf_t result = 0;
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 2;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
__m128 dotProdVal = _mm_setzero_ps();
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_complexmul_ps(xVal, yVal);
dotProdVal = _mm_add_ps(dotProdVal, zVal);
xPtr += 4;
yPtr += 4;
}
cf_t dotProdVector[2];
_mm_storeu_ps((float*) dotProdVector, dotProdVal);
for (int i=0;i<2;i++) {
result += dotProdVector[i];
}
number = points * 2;
for(;number < len; number++){
result += (x[number] * y[number]);
}
#endif
return result;
}
cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len)
{
cf_t result = 0;
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 2;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
__m128 dotProdVal = _mm_setzero_ps();
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_complexmulconj_ps(xVal, yVal);
dotProdVal = _mm_add_ps(dotProdVal, zVal);
xPtr += 4;
yPtr += 4;
}
cf_t dotProdVector[2];
_mm_storeu_ps((float*) dotProdVector, dotProdVal);
for (int i=0;i<2;i++) {
result += dotProdVector[i];
}
number = points * 2;
for(;number < len; number++){
result += (x[number] * y[number]);
}
#endif
return result;
}
void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
@ -355,13 +478,6 @@ void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
#endif
}
#ifdef LV_HAVE_SSE
static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) {
const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
y = _mm_xor_ps(y, conjugator);
return _mm_complexmul_ps(x, y);
}
#endif
void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
@ -454,6 +570,40 @@ void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len) {
#endif
}
void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int quarterPoints = len / 4;
__m128 xVal, hVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
// Set up constant scalar vector
hVal = _mm_set_ps1(h);
for(;number < quarterPoints; number++){
xVal = _mm_loadu_ps(xPtr);
zVal = _mm_mul_ps(xVal,hVal);
_mm_storeu_ps(zPtr,zVal);
xPtr += 4;
zPtr += 4;
}
number = quarterPoints * 4;
for(;number < len; number++){
z[number] = x[number] * h;
}
#endif
}
void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;

Loading…
Cancel
Save