|
|
@ -206,12 +206,12 @@ void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len)
|
|
|
|
__m128i xVal, yVal, zVal;
|
|
|
|
__m128i xVal, yVal, zVal;
|
|
|
|
for(;number < points; number++){
|
|
|
|
for(;number < points; number++){
|
|
|
|
|
|
|
|
|
|
|
|
xVal = _mm_loadu_si128(xPtr);
|
|
|
|
xVal = _mm_load_si128(xPtr);
|
|
|
|
yVal = _mm_loadu_si128(yPtr);
|
|
|
|
yVal = _mm_load_si128(yPtr);
|
|
|
|
|
|
|
|
|
|
|
|
zVal = _mm_sub_epi16(xVal, yVal);
|
|
|
|
zVal = _mm_sub_epi16(xVal, yVal);
|
|
|
|
|
|
|
|
|
|
|
|
_mm_storeu_si128(zPtr, zVal);
|
|
|
|
_mm_store_si128(zPtr, zVal);
|
|
|
|
|
|
|
|
|
|
|
|
xPtr ++;
|
|
|
|
xPtr ++;
|
|
|
|
yPtr ++;
|
|
|
|
yPtr ++;
|
|
|
@ -273,12 +273,12 @@ void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len)
|
|
|
|
__m128i xVal, yVal, zVal;
|
|
|
|
__m128i xVal, yVal, zVal;
|
|
|
|
for(;number < points; number++){
|
|
|
|
for(;number < points; number++){
|
|
|
|
|
|
|
|
|
|
|
|
xVal = _mm_loadu_si128(xPtr);
|
|
|
|
xVal = _mm_load_si128(xPtr);
|
|
|
|
yVal = _mm_loadu_si128(yPtr);
|
|
|
|
yVal = _mm_load_si128(yPtr);
|
|
|
|
|
|
|
|
|
|
|
|
zVal = _mm_mullo_epi16(xVal, yVal);
|
|
|
|
zVal = _mm_mullo_epi16(xVal, yVal);
|
|
|
|
|
|
|
|
|
|
|
|
_mm_storeu_si128(zPtr, zVal);
|
|
|
|
_mm_store_si128(zPtr, zVal);
|
|
|
|
|
|
|
|
|
|
|
|
xPtr ++;
|
|
|
|
xPtr ++;
|
|
|
|
yPtr ++;
|
|
|
|
yPtr ++;
|
|
|
@ -341,11 +341,11 @@ void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len)
|
|
|
|
__m128i xVal, zVal;
|
|
|
|
__m128i xVal, zVal;
|
|
|
|
for(;number < points; number++){
|
|
|
|
for(;number < points; number++){
|
|
|
|
|
|
|
|
|
|
|
|
xVal = _mm_loadu_si128(xPtr);
|
|
|
|
xVal = _mm_load_si128(xPtr);
|
|
|
|
|
|
|
|
|
|
|
|
zVal = _mm_srai_epi16(xVal, k);
|
|
|
|
zVal = _mm_srai_epi16(xVal, k);
|
|
|
|
|
|
|
|
|
|
|
|
_mm_storeu_si128(zPtr, zVal);
|
|
|
|
_mm_store_si128(zPtr, zVal);
|
|
|
|
|
|
|
|
|
|
|
|
xPtr ++;
|
|
|
|
xPtr ++;
|
|
|
|
zPtr ++;
|
|
|
|
zPtr ++;
|
|
|
|