fixed dotprodconj. Removed unaligned load/store

master
Ismael Gomez 8 years ago
parent fd2a6f13e2
commit 2bba9d187d

@ -608,7 +608,7 @@ cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) {
uint32_t i; uint32_t i;
cf_t res = 0; cf_t res = 0;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
res += x[i]*y[i]; res += x[i]*conjf(y[i]);
} }
return res; return res;
#else #else

@ -206,12 +206,12 @@ void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len)
__m128i xVal, yVal, zVal; __m128i xVal, yVal, zVal;
for(;number < points; number++){ for(;number < points; number++){
xVal = _mm_loadu_si128(xPtr); xVal = _mm_load_si128(xPtr);
yVal = _mm_loadu_si128(yPtr); yVal = _mm_load_si128(yPtr);
zVal = _mm_sub_epi16(xVal, yVal); zVal = _mm_sub_epi16(xVal, yVal);
_mm_storeu_si128(zPtr, zVal); _mm_store_si128(zPtr, zVal);
xPtr ++; xPtr ++;
yPtr ++; yPtr ++;
@ -273,12 +273,12 @@ void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len)
__m128i xVal, yVal, zVal; __m128i xVal, yVal, zVal;
for(;number < points; number++){ for(;number < points; number++){
xVal = _mm_loadu_si128(xPtr); xVal = _mm_load_si128(xPtr);
yVal = _mm_loadu_si128(yPtr); yVal = _mm_load_si128(yPtr);
zVal = _mm_mullo_epi16(xVal, yVal); zVal = _mm_mullo_epi16(xVal, yVal);
_mm_storeu_si128(zPtr, zVal); _mm_store_si128(zPtr, zVal);
xPtr ++; xPtr ++;
yPtr ++; yPtr ++;
@ -341,11 +341,11 @@ void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len)
__m128i xVal, zVal; __m128i xVal, zVal;
for(;number < points; number++){ for(;number < points; number++){
xVal = _mm_loadu_si128(xPtr); xVal = _mm_load_si128(xPtr);
zVal = _mm_srai_epi16(xVal, k); zVal = _mm_srai_epi16(xVal, k);
_mm_storeu_si128(zPtr, zVal); _mm_store_si128(zPtr, zVal);
xPtr ++; xPtr ++;
zPtr ++; zPtr ++;

Loading…
Cancel
Save