fixed conflicts tests not passing

master
Ismael Gomez 8 years ago
commit 2f510fbe83

@ -51,6 +51,8 @@ SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, ui
SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len);

@ -201,19 +201,18 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
// Used throughout
void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifndef LV_HAVE_SSE
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#else
cf_t hh;
__real__ hh = h;
__imag__ hh = 0;
srslte_vec_sc_prod_ccc_simd(x,hh,z,len);
srslte_vec_sc_prod_cfc_simd(x, h, z, len);
#endif
}
// Chest UL
void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
#ifndef LV_HAVE_SSE

@ -57,8 +57,8 @@ int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len)
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
xVal = _mm_loadu_si128(xPtr);
yVal = _mm_loadu_si128(yPtr);
zVal = _mm_mullo_epi16(xVal, yVal);
@ -69,7 +69,7 @@ int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len)
}
short dotProdVector[8];
_mm_store_si128((__m128i*) dotProdVector, dotProdVal);
_mm_storeu_si128((__m128i*) dotProdVector, dotProdVal);
for (int i=0;i<8;i++) {
result += dotProdVector[i];
}
@ -96,12 +96,12 @@ void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len)
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
xVal = _mm_loadu_si128(xPtr);
yVal = _mm_loadu_si128(yPtr);
zVal = _mm_add_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
_mm_storeu_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
@ -129,12 +129,12 @@ void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
xVal = _mm_loadu_si128(xPtr);
yVal = _mm_loadu_si128(yPtr);
zVal = _mm_sub_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
_mm_storeu_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
@ -161,12 +161,12 @@ void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
__m128i xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
yVal = _mm_load_si128(yPtr);
xVal = _mm_loadu_si128(xPtr);
yVal = _mm_loadu_si128(yPtr);
zVal = _mm_mullo_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
_mm_storeu_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
@ -192,11 +192,11 @@ void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
__m128i xVal, zVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
xVal = _mm_loadu_si128(xPtr);
zVal = _mm_srai_epi16(xVal, k);
_mm_store_si128(zPtr, zVal);
_mm_storeu_si128(zPtr, zVal);
xPtr ++;
zPtr ++;
@ -223,8 +223,8 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l
__m128i xVal, lutVal;
for(;number < points; number++){
xVal = _mm_load_si128(xPtr);
lutVal = _mm_load_si128(lutPtr);
xVal = _mm_loadu_si128(xPtr);
lutVal = _mm_loadu_si128(lutPtr);
for (int i=0;i<8;i++) {
_mm_shuffle_epi8(xVal,_mm_set1_epi8(i));
@ -350,8 +350,9 @@ void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
zPtr += 4;
}
if((len % 2) != 0){
*zPtr = (*xPtr) * (*yPtr);
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * y[number];
}
#endif
}
@ -375,18 +376,19 @@ void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
const float* yPtr = (const float*) y;
for(; number < halfPoints; number++){
xVal = _mm_load_ps(xPtr);
yVal = _mm_load_ps(yPtr);
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_complexmulconj_ps(xVal, yVal);
_mm_store_ps(zPtr, zVal);
_mm_storeu_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
if((len % 2) != 0){
*zPtr = (*xPtr) * (*yPtr);
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * conjf(y[number]);
}
#endif
}
@ -406,7 +408,7 @@ void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
for(;number < halfPoints; number++){
xVal = _mm_load_ps(xPtr);
xVal = _mm_loadu_ps(xPtr);
tmp1 = _mm_mul_ps(xVal,yl);
xVal = _mm_shuffle_ps(xVal,xVal,0xB1);
tmp2 = _mm_mul_ps(xVal,yh);
@ -417,12 +419,43 @@ void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
zPtr += 4;
}
if((len % 2) != 0) {
*zPtr = (*xPtr) * h;
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * h;
}
#endif
}
void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, hVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
// Set up constant scalar vector
hVal = _mm_set_ps1(h);
for(;number < halfPoints; number++){
xVal = _mm_loadu_ps(xPtr);
zVal = _mm_mul_ps(xVal,hVal);
_mm_storeu_ps(zPtr,zVal);
xPtr += 4;
zPtr += 4;
}
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * h;
}
#endif
}
void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -445,10 +478,8 @@ void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) {
}
number = quarterPoints * 4;
for(; number < len; number++){
float val1Real = *xPtr++;
float val1Imag = *xPtr++;
*zPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
for(;number < len; number++){
z[number] = creal(x[number]) * creal(x[number]) + cimag(x[number])*cimag(x[number]);
}
#endif
}

Loading…
Cancel
Save