updating avx vectors and viterbi

master
yagoda 8 years ago committed by Ismael Gomez
parent 0fe981e608
commit e0fb5d5cf6

@ -35,46 +35,46 @@ extern "C" {
#include <stdint.h> #include <stdint.h>
#include "srslte/config.h" #include "srslte/config.h"
SRSLTE_API int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len); SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_simd_avx(short *x, short *y, uint32_t len); SRSLTE_API int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_simd_avx(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_simd_avx(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_simd_avx(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len); SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_simd_avx(short *x, int k, short *z, uint32_t len); SRSLTE_API void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len); SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len); SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_32fc_s32f_multiply_32fc_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len); SRSLTE_API void srslte_vec_mult_scalar_cf_f_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -213,7 +213,7 @@ int main(int argc, char **argv) {
gettimeofday(&t[1], NULL); gettimeofday(&t[1], NULL);
int M = 1; int M = 1;
srslte_vec_fprint_b(stdout, data_tx, frame_length); //srslte_vec_fprint_b(stdout, data_tx, frame_length);
for (int i=0;i<M;i++) { for (int i=0;i<M;i++) {
srslte_viterbi_decode_uc(&dec, llr_c, data_rx, frame_length); srslte_viterbi_decode_uc(&dec, llr_c, data_rx, frame_length);

@ -43,13 +43,6 @@
#define DEFAULT_GAIN 100 #define DEFAULT_GAIN 100
#define AVX_ON
#ifdef LV_HAVE_AVX
#ifdef AVX_ON
#define USE_AVX
#endif
#endif
//#undef LV_HAVE_SSE //#undef LV_HAVE_SSE
int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
@ -391,7 +384,7 @@ int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, int pol
switch (type) { switch (type) {
case SRSLTE_VITERBI_37: case SRSLTE_VITERBI_37:
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
#ifdef USE_AVX #ifdef LV_HAVE_AVX
return init37_avx2(q, poly, max_frame_length, tail_bitting); return init37_avx2(q, poly, max_frame_length, tail_bitting);
#else #else
return init37_sse(q, poly, max_frame_length, tail_bitting); return init37_sse(q, poly, max_frame_length, tail_bitting);

@ -103,13 +103,17 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
} }
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) { void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE #ifdef LV_HAVE_AVX
int i; srslte_vec_sub_sss_avx(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sub_sss_sse(x, y, z, len);
#else
int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]-y[i]; z[i] = x[i]-y[i];
} }
#else #endif
srslte_vec_sub_sss_simd_avx(x, y, z, len);
#endif #endif
} }
@ -129,13 +133,17 @@ void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
} }
void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) { void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE #ifdef LV_HAVE_AVX
srslte_vec_sum_sss_avx(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sum_sss_sse(x, y, z, len);
#else
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]+y[i]; z[i] = x[i]+y[i];
} }
#else #endif
srslte_vec_sum_sss_simd_avx(x, y, z, len);
#endif #endif
} }
@ -197,14 +205,18 @@ void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
} }
void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) { void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE #ifdef LV_HAVE_AVX
srslte_vec_sc_div2_sss_avx(x, n_rightshift, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sc_div2_sss_sse(x, n_rightshift, z, len);
#else
int i; int i;
int pow2_div = 1<<n_rightshift; int pow2_div = 1<<n_rightshift;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]/pow2_div; z[i] = x[i]/pow2_div;
} }
#else #endif
srslte_vec_sc_div2_sss_simd_avx(x, n_rightshift, z, len);
#endif #endif
} }
@ -220,13 +232,14 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
} }
void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) { void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_FUNCTION #ifdef LV_HAVE_AVX
srslte_vec_mult_scalar_cf_f_avx(z,x, h, len);
#else
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]*h; z[i] = x[i]*h;
} }
#else
srslte_32fc_s32f_multiply_32fc_avx(z,x, h, len);
#endif #endif
} }
@ -271,7 +284,7 @@ void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
z[i] = (int16_t) (x[i]*scale); z[i] = (int16_t) (x[i]*scale);
} }
#else #else
srslte_vec_convert_fi_simd(x, z, scale, len); srslte_vec_convert_fi_sse(x, z, scale, len);
#endif #endif
} }
@ -284,14 +297,13 @@ void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) {
void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) { void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
#ifdef DEBUG_MODE #ifdef DEBUG_MODE
#warning FIXME: Disabling SSE/AVX in srslte_vec_lut_sss #warning FIXME: Disabling SSE/AVX in srslte_vec_lut_sss
srslte_vec_lut_sss_simd(x, lut, y, len);
#else #else
#ifndef LV_HAVE_SSE #ifdef LV_HAVE_SSE
for (int i=0;i<len;i++) { for (int i=0;i<len;i++) {
y[lut[i]] = x[i]; y[lut[i]] = x[i];
} }
#else #else
srslte_vec_lut_sss_simd(x, lut, y, len); srslte_vec_lut_sss_sse(x, lut, y, len);
#endif #endif
#endif #endif
} }
@ -505,13 +517,19 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
} }
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) { void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE
#ifdef LV_HAVE_AVX
srslte_vec_prod_sss_avx(x,y,z,len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_prod_sss_sse(x,y,z,len);
#else
int i; int i;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
z[i] = x[i]*y[i]; z[i] = x[i]*y[i];
} }
#else #endif
srslte_vec_prod_sss_simd_avx(x,y,z,len);
#endif #endif
} }
@ -642,15 +660,19 @@ float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
} }
int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) { int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
#ifndef LV_HAVE_SSE #ifdef LV_HAVE_AVX
return srslte_vec_dot_prod_sss_avx(x, y, len);
#else
#ifdef LV_HAVE_SSE
return srslte_vec_dot_prod_sss_sse(x, y, len);
#else
uint32_t i; uint32_t i;
int32_t res = 0; int32_t res = 0;
for (i=0;i<len;i++) { for (i=0;i<len;i++) {
res += x[i]*y[i]; res += x[i]*y[i];
} }
return res; return res;
#else #endif
return srslte_vec_dot_prod_sss_simd_avx(x, y, len);
#endif #endif
} }

@ -45,7 +45,7 @@
#endif #endif
int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len) int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len)
{ {
int result = 0; int result = 0;
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
@ -87,7 +87,7 @@ int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len)
} }
int srslte_vec_dot_prod_sss_simd_avx(short *x, short *y, uint32_t len) int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len)
{ {
int result = 0; int result = 0;
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX
@ -127,7 +127,7 @@ int srslte_vec_dot_prod_sss_simd_avx(short *x, short *y, uint32_t len)
void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len) void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
unsigned int number = 0; unsigned int number = 0;
@ -160,7 +160,7 @@ void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len)
} }
void srslte_vec_sum_sss_simd_avx(short *x, short *y, short *z, uint32_t len) void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
unsigned int number = 0; unsigned int number = 0;
@ -193,7 +193,7 @@ void srslte_vec_sum_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
} }
void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len) void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
unsigned int number = 0; unsigned int number = 0;
@ -225,7 +225,7 @@ void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
#endif #endif
} }
void srslte_vec_sub_sss_simd_avx(short *x, short *y, short *z, uint32_t len) void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX
unsigned int number = 0; unsigned int number = 0;
@ -260,7 +260,7 @@ void srslte_vec_sub_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len) void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
unsigned int number = 0; unsigned int number = 0;
@ -292,7 +292,7 @@ void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
#endif #endif
} }
void srslte_vec_prod_sss_simd_avx(short *x, short *y, short *z, uint32_t len) void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
unsigned int number = 0; unsigned int number = 0;
@ -324,7 +324,12 @@ void srslte_vec_prod_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
#endif #endif
} }
void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
unsigned int number = 0; unsigned int number = 0;
@ -354,7 +359,7 @@ void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
#endif #endif
} }
void srslte_vec_sc_div2_sss_simd_avx(short *x, int k, short *z, uint32_t len) void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len)
{ {
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX
unsigned int number = 0; unsigned int number = 0;
@ -384,8 +389,10 @@ void srslte_vec_sc_div2_sss_simd_avx(short *x, int k, short *z, uint32_t len)
#endif #endif
} }
/* No improvement with AVX */ /* No improvement with AVX */
void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len) void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len)
{ {
#ifndef DEBUG_MODE #ifndef DEBUG_MODE
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
@ -419,7 +426,7 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l
} }
/* Modified from volk_32f_s32f_convert_16i_a_simd2. Removed clipping */ /* Modified from volk_32f_s32f_convert_16i_a_simd2. Removed clipping */
void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len) void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len)
{ {
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
unsigned int number = 0; unsigned int number = 0;
@ -457,8 +464,8 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
#endif #endif
} }
//srslte_32fc_s32f_multiply_32fc_avx
void srslte_32fc_s32f_multiply_32fc_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len) void srslte_vec_mult_scalar_cf_f_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len)
{ {
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX

Loading…
Cancel
Save