diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 5cea166b3..7e4197e4b 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -37,19 +37,19 @@ extern "C" { SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len); -SRSLTE_API int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len); +SRSLTE_API int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len); SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len); @@ -57,12 +57,12 @@ SRSLTE_API void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t le SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len); -SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len); diff --git a/lib/src/phy/fec/viterbi.c b/lib/src/phy/fec/viterbi.c index cdff3216f..a010dfebb 100644 --- a/lib/src/phy/fec/viterbi.c +++ b/lib/src/phy/fec/viterbi.c @@ -122,7 +122,7 @@ void free37_sse(void *o) { -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 int decode37_avx2(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) { srslte_viterbi_t *q = o; @@ -333,7 +333,7 @@ int init37_neon(srslte_viterbi_t *q, int poly[3], uint32_t framebits, bool tail_ #endif -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 int init37_avx2(srslte_viterbi_t *q, int poly[3], uint32_t framebits, bool tail_biting) { q->K = 7; q->R = 3; @@ -383,7 +383,7 @@ int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, int pol switch (type) { case SRSLTE_VITERBI_37: #ifdef LV_HAVE_SSE - #ifdef LV_HAVE_AVX + #ifdef LV_HAVE_AVX2 return init37_avx2(q, poly, max_frame_length, tail_bitting); #else return init37_sse(q, poly, max_frame_length, tail_bitting); @@ -408,7 +408,7 @@ int srslte_viterbi_init_sse(srslte_viterbi_t *q, srslte_viterbi_type_t type, int } #endif -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 int srslte_viterbi_init_avx2(srslte_viterbi_t *q, srslte_viterbi_type_t type, int poly[3], uint32_t max_frame_length, bool tail_bitting) { return init37_avx2(q, poly, max_frame_length, tail_bitting); diff --git a/lib/src/phy/fec/viterbi37_avx2.c b/lib/src/phy/fec/viterbi37_avx2.c index bb8e90d10..a00bb494b 100644 --- a/lib/src/phy/fec/viterbi37_avx2.c +++ b/lib/src/phy/fec/viterbi37_avx2.c @@ -14,7 +14,7 @@ //#define DEBUG -#ifdef LV_HAVE_SSE +#ifdef LV_HAVE_AVX2 #include #include diff --git a/lib/src/phy/phch/pdsch.c b/lib/src/phy/phch/pdsch.c index 1b61cd5f4..4da881531 100644 --- a/lib/src/phy/phch/pdsch.c +++ b/lib/src/phy/phch/pdsch.c @@ -450,11 +450,11 @@ int srslte_pdsch_decode_multi(srslte_pdsch_t *q, if (SRSLTE_VERBOSE_ISDEBUG()) { DEBUG("SAVED FILE subframe.dat: received subframe symbols\n",0); - srslte_vec_save_file("subframe.dat", sf_symbols, SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); + srslte_vec_save_file("subframe.dat", sf_symbols[0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); DEBUG("SAVED FILE hest0.dat and hest1.dat: channel estimates for port 0 and port 1\n",0); - srslte_vec_save_file("hest0.dat", ce[0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); + srslte_vec_save_file("hest0.dat", ce[0][0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); if (q->cell.nof_ports > 1) { - srslte_vec_save_file("hest1.dat", ce[1], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); + srslte_vec_save_file("hest1.dat", ce[1][0], SRSLTE_SF_LEN_RE(q->cell.nof_prb, q->cell.cp)*sizeof(cf_t)); } DEBUG("SAVED FILE pdsch_symbols.dat: symbols after equalization\n",0); srslte_vec_save_file("pdsch_symbols.dat", q->d, cfg->nbits.nof_re*sizeof(cf_t)); diff --git a/lib/src/phy/phch/test/pusch_test.c b/lib/src/phy/phch/test/pusch_test.c index 29980a83f..7cf80a7d1 100644 --- a/lib/src/phy/phch/test/pusch_test.c +++ b/lib/src/phy/phch/test/pusch_test.c @@ -202,10 +202,12 @@ int main(int argc, char **argv) { fprintf(stderr, "Error initiating soft buffer\n"); goto quit; } + srslte_softbuffer_tx_reset(&softbuffer_tx); if (srslte_softbuffer_rx_init(&softbuffer_rx, 100)) { fprintf(stderr, "Error initiating soft buffer\n"); goto quit; } + srslte_softbuffer_rx_reset(&softbuffer_rx); uint32_t ntrials = 100; diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 725fdfd98..7ce954c78 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -110,8 +110,8 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) { } void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) { -#ifdef LV_HAVE_AVX - srslte_vec_sub_sss_avx(x, y, z, len); +#ifdef LV_HAVE_AVX2 + srslte_vec_sub_sss_avx2(x, y, z, len); #else #ifdef LV_HAVE_SSE srslte_vec_sub_sss_sse(x, y, z, len); @@ -140,8 +140,8 @@ void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) { } void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) { -#ifdef LV_HAVE_AVX - srslte_vec_sum_sss_avx(x, y, z, len); +#ifdef LV_HAVE_AVX2 + srslte_vec_sum_sss_avx2(x, y, z, len); #else #ifdef LV_HAVE_SSE srslte_vec_sum_sss_sse(x, y, z, len); @@ -212,8 +212,8 @@ void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) { } void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) { -#ifdef LV_HAVE_AVX - srslte_vec_sc_div2_sss_avx(x, n_rightshift, z, len); +#ifdef LV_HAVE_AVX2 + srslte_vec_sc_div2_sss_avx2(x, n_rightshift, z, len); #else #ifdef LV_HAVE_SSE srslte_vec_sc_div2_sss_sse(x, n_rightshift, z, len); @@ -345,14 +345,14 @@ void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) { #endif } -/* Note: We align memory to 32 bytes (for AVX compatibility) +/* Note: We align memory to 32 bytes (for AVX2 compatibility) * because in some cases volk can incorrectly detect the architecture. * This could be inefficient for SSE or non-SIMD platforms but shouldn't * be a huge problem. */ void *srslte_vec_malloc(uint32_t size) { void *ptr; - if (posix_memalign(&ptr,32,size)) { + if (posix_memalign(&ptr,256,size)) { return NULL; } else { return ptr; @@ -364,7 +364,7 @@ void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_size) { return realloc(ptr, new_size); #else void *new_ptr; - if (posix_memalign(&new_ptr,volk_get_alignment(),new_size)) { + if (posix_memalign(&new_ptr,256,new_size)) { return NULL; } else { memcpy(new_ptr, ptr, old_size); @@ -520,8 +520,8 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) { } void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) { -#ifdef LV_HAVE_AVX - srslte_vec_prod_sss_avx(x,y,z,len); +#ifdef LV_HAVE_AVX2 + srslte_vec_prod_sss_avx2(x,y,z,len); #else #ifdef LV_HAVE_SSE srslte_vec_prod_sss_sse(x,y,z,len); @@ -661,8 +661,8 @@ float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) { } int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) { -#ifdef LV_HAVE_AVX - return srslte_vec_dot_prod_sss_avx(x, y, len); +#ifdef LV_HAVE_AVX2 + return srslte_vec_dot_prod_sss_avx2(x, y, len); #else #ifdef LV_HAVE_SSE return srslte_vec_dot_prod_sss_sse(x, y, len); diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 38f20e6e0..01dd6d0ac 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -87,10 +87,10 @@ int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len) } -int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len) +int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len) { int result = 0; -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 unsigned int number = 0; const unsigned int points = len / 16; @@ -160,9 +160,9 @@ void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len) } -void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len) +void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len) { -#ifdef LV_HAVE_SSE +#ifdef LV_HAVE_AVX2 unsigned int number = 0; const unsigned int points = len / 16; @@ -225,9 +225,9 @@ void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len) #endif } -void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len) +void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len) { -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 unsigned int number = 0; const unsigned int points = len / 16; @@ -292,9 +292,9 @@ void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len) #endif } -void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len) +void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len) { -#ifdef LV_HAVE_SSE +#ifdef LV_HAVE_AVX2 unsigned int number = 0; const unsigned int points = len / 16; @@ -359,9 +359,9 @@ void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len) #endif } -void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len) +void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len) { -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 unsigned int number = 0; const unsigned int points = len / 16; @@ -394,7 +394,11 @@ void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len) /* No improvement with AVX */ void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len) { -#ifndef DEBUG_MODE +#ifdef DEBUG_MODE + for (int i=0;i