removed volk dependency. Checked and working

8 years ago · 86750b2db7
parent fe867af319
commit 86750b2db7
6 changed files with 222 additions and 305 deletions
--- a/cmake/modules/FindVolk.cmake
+++ b/cmake/modules/FindVolk.cmake
@ -1,144 +0,0 @@
-INCLUDE(FindPkgConfig)
-PKG_CHECK_MODULES(PC_VOLK volk QUIET)
-
-FIND_PATH(
-    VOLK_INCLUDE_DIRS
-    NAMES volk/volk.h
-    HINTS $ENV{VOLK_DIR}/include
-          ${CMAKE_INSTALL_PREFIX}/include
-          ${PC_VOLK_INCLUDE_DIR}
-    PATHS /usr/local/include
-          /usr/include
-)
-
-FIND_LIBRARY(
-    VOLK_LIBRARIES
-    NAMES volk
-    HINTS $ENV{VOLK_DIR}/lib
-          ${CMAKE_INSTALL_PREFIX}/lib
-          ${CMAKE_INSTALL_PREFIX}/lib64
-          ${PC_VOLK_LIBDIR}
-    PATHS /usr/local/lib
-          /usr/local/lib64
-          /usr/lib
-          /usr/lib64
-)
-
-INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
-MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS)
-
-IF(VOLK_FOUND)
-  SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m)
-  CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION)
-  CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION)
-
-
-
-  SET(VOLK_DEFINITIONS "HAVE_VOLK")
-  IF(${HAVE_VOLK_CONVERT_CI_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MAX_ABS_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MAX_VEC_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_INTERLEAVE_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_CONVERT_FI_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MAX_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_ACC_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MULT_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_CONJ_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MULT2_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MULT_REAL_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_MAG_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_DIVIDE_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_DOTPROD_F_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION")
-  ENDIF()
-  IF(${HAVE_VOLK_ATAN_FUNCTION})
-    SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION")
-  ENDIF()
-ENDIF(VOLK_FOUND)
--- a/srslte/CMakeLists.txt
+++ b/srslte/CMakeLists.txt
@ -74,25 +74,6 @@ else(BLADERF_FOUND OR UHD_FOUND)
  add_definitions(-DDISABLE_RF)
 endif(BLADERF_FOUND OR UHD_FOUND)

-include(CheckFunctionExistsMath)
-if(${DISABLE_VOLK})
-  if(${DISABLE_VOLK} EQUAL 0)
-    find_package(Volk)
-  else(${DISABLE_VOLK} EQUAL 0)
-    message(STATUS "VOLK library disabled (DISABLE_VOLK=1)")
-  endif(${DISABLE_VOLK} EQUAL 0)
-else(${DISABLE_VOLK})
-  find_package(Volk)
-endif(${DISABLE_VOLK})
-
-if(VOLK_FOUND)
-  include_directories(${VOLK_INCLUDE_DIRS})
-  link_directories(${VOLK_LIBRARY_DIRS})
-  message(STATUS "   Compiling with VOLK SIMD library.")
-else(VOLK_FOUND)
-  message(STATUS "   VOLK SIMD library NOT found. Using generic implementation.")
-endif(VOLK_FOUND)
-
 ########################################################################
 # Add subdirectories
 ########################################################################
--- a/srslte/include/srslte/utils/vector_simd.h
+++ b/srslte/include/srslte/utils/vector_simd.h
@ -49,6 +49,26 @@ SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y,

 SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len); 

+SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
+
+SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len);
+
+SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); 
+
+SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); 
+
+SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); 
+
+SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); 
+
+SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len); 
+
+SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len); 
+
+SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len); 
+
+SRSLTE_API void srslte_vec_convert_if_simd(int16_t *x, float *z, float scale, uint32_t len); 
+
 #ifdef __cplusplus
 }
 #endif
--- a/srslte/lib/CMakeLists.txt
+++ b/srslte/lib/CMakeLists.txt
@ -90,13 +90,6 @@ if(RF_FOUND)
  endif(BLADERF_FOUND)
 endif(RF_FOUND)

-if(VOLK_FOUND)
-  target_link_libraries(srslte ${VOLK_LIBRARIES})
-  if(NOT DisableMEX)
-    target_link_libraries(srslte_static ${VOLK_LIBRARIES})
-  endif(NOT DisableMEX)
-endif(VOLK_FOUND)
-
 INSTALL(TARGETS srslte DESTINATION ${LIBRARY_DIR})
 SRSLTE_SET_PIC(srslte)

--- a/srslte/lib/utils/vector.c
+++ b/srslte/lib/utils/vector.c
@ -35,10 +35,6 @@
 #include "srslte/utils/vector_simd.h"
 #include "srslte/utils/bit.h"

-#ifdef HAVE_VOLK
-#include "volk/volk.h"
-#endif
-
 int srslte_vec_acc_ii(int *x, uint32_t len) {
  int i;
  int z=0;
@ -48,19 +44,14 @@ int srslte_vec_acc_ii(int *x, uint32_t len) {
  return z;
 }

+// Used in PRACH detector
 float srslte_vec_acc_ff(float *x, uint32_t len) {
-#ifdef HAVE_VOLK_ACC_FUNCTION
-  float result;
-  volk_32f_accumulator_s32f(&result,x,len);
-  return result;
-#else
  int i;
  float z=0;
  for (i=0;i<len;i++) {
    z+=x[i];
  }
  return z;
-#endif
 }

 void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float coeff, uint32_t len) {
@ -79,27 +70,19 @@ cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len) {
 }

 void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints) {
-#ifndef HAVE_VOLK_SQUARE_DIST_FUNCTION
  uint32_t i;
  cf_t diff; 
  for (i=0;i<npoints;i++) {
    diff = symbol - points[i];
    distance[i] = crealf(diff) * crealf(diff) + cimagf(diff) * cimagf(diff);
  }
-#else
-  volk_32fc_x2_square_dist_32f(distance,&symbol,points,npoints);
-#endif 
 }

 void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
-#ifndef HAVE_VOLK_SUB_FLOAT_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]-y[i];
  }
-#else
-  volk_32f_x2_subtract_32f(z,x,y,len);
-#endif 
 }

 void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
@ -117,14 +100,15 @@ void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) {
  return srslte_vec_sub_fff((float*) x,(float*) y,(float*) z, 2*len);
 }

+// Used in PSS/SSS and sum_ccc
 void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
-#ifndef HAVE_VOLK_ADD_FLOAT_FUNCTION
+#ifndef LV_HAVE_SSE
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]+y[i];
  }
 #else
-  volk_32f_x2_add_32f(z,x,y,len);
+  srslte_vec_sum_fff_simd(x, y, z, len);
 #endif
 }

@ -179,14 +163,10 @@ void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len) {
 }

 void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len) {
-#ifndef HAVE_VOLK_MULT_FLOAT_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]*h;
  }
-#else
-  volk_32f_s32f_multiply_32f(z,x,h,len);
-#endif
 }

 void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
@ -219,8 +199,9 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
  srslte_vec_sc_prod_cfc(x, amplitude/max, y, len);
 }

+// Used throughout 
 void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
-#ifndef HAVE_VOLK_MULT_FUNCTION
+#ifndef LV_HAVE_SSE
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]*h;
@ -229,42 +210,36 @@ void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
  cf_t hh;
  __real__ hh = h;
  __imag__ hh = 0;
-  volk_32fc_s32fc_multiply_32fc(z,x,hh,len);
+  srslte_vec_sc_prod_ccc_simd(x,hh,z,len);
 #endif
 }

+// Chest UL 
 void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
-#ifndef HAVE_VOLK_MULT_FUNCTION
+#ifndef LV_HAVE_SSE
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]*h;
  }
 #else
-  volk_32fc_s32fc_multiply_32fc(z,x,h,len);
+  srslte_vec_sc_prod_ccc_simd(x,h,z,len);
 #endif
 }

+// Used in turbo decoder 
 void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len) {
-#ifndef HAVE_VOLK_CONVERT_IF_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    z[i] = ((float) x[i])/scale;
  }
-#else
-  volk_16i_s32f_convert_32f(z,x,scale,len);
-#endif  
 }


 void srslte_vec_convert_ci(int8_t *x, int16_t *z, uint32_t len) {
-#ifndef HAVE_VOLK_CONVERT_CI_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    z[i] = ((int16_t) x[i]);
  }
-#else
-  volk_8i_convert_16i(z,x,len);
-#endif  
 }

 void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
@ -295,37 +270,25 @@ void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
 }

 void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {
- #ifdef HAVE_VOLK_INTERLEAVE_FUNCTION
-  volk_32f_x2_interleave_32fc(x, real, imag, len);
-#else 
  int i;
  for (i=0;i<len;i++) {
    x[i] = real[i] + _Complex_I*imag[i];
  }
-#endif 
 }

 void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len) {
- #ifdef HAVE_VOLK_DEINTERLEAVE_FUNCTION
-  volk_32fc_deinterleave_32f_x2(real, imag, x, len);
-#else 
  int i;
  for (i=0;i<len;i++) {
    real[i] = __real__ x[i];
    imag[i] = __imag__ x[i];
  }
-#endif 
 }

 void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
-#ifdef HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION
-  volk_32fc_deinterleave_real_32f(real, x, len);
-#else 
  int i;
  for (i=0;i<len;i++) {
    real[i] = __real__ x[i];
  }
-#endif  
 }

 /* Note: We align memory to 32 bytes (for AVX compatibility) 
@ -335,7 +298,7 @@ void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
 */
 void *srslte_vec_malloc(uint32_t size) {
  void *ptr;
-  if (posix_memalign(&ptr,32,size)) {
+  if (posix_memalign(&ptr,64,size)) {
    return NULL;
  } else {
    return ptr;
@ -343,11 +306,11 @@ void *srslte_vec_malloc(uint32_t size) {
 }

 void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_size) {
-#ifndef HAVE_VOLK
+#ifndef LV_HAVE_SSE
  return realloc(ptr, new_size);
 #else
  void *new_ptr;
-  if (posix_memalign(&new_ptr,volk_get_alignment(),new_size)) {
+  if (posix_memalign(&new_ptr,64,new_size)) {
    return NULL;
  } else {
    memcpy(new_ptr, ptr, old_size);
@ -468,40 +431,31 @@ void srslte_vec_load_file(char *filename, void *buffer, uint32_t len) {
  }  
 }

-
+// Used in PSS
 void srslte_vec_conj_cc(cf_t *x, cf_t *y, uint32_t len) {
-#ifndef HAVE_VOLK_CONJ_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    y[i] = conjf(x[i]);
  }
-#else
-  volk_32fc_conjugate_32fc(y,x,len);
-#endif
 }

+// Used in scrambling complex 
 void srslte_vec_prod_cfc(cf_t *x, float *y, cf_t *z, uint32_t len) {
-#ifndef HAVE_VOLK_MULT_REAL_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]*y[i];
  }
-#else
-  volk_32fc_32f_multiply_32fc(z,x,y,len);
-#endif
 }

+// Used in scrambling float
 void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
-#ifndef HAVE_VOLK_MULT_REAL2_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]*y[i];
  }
-#else
-  volk_32f_x2_multiply_32f(z,x,y,len);
-#endif
 }

+// Scrambling Short
 void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
 #ifndef LV_HAVE_SSE
  int i;
@ -513,26 +467,27 @@ void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
 #endif
 }

+// CFO and OFDM processing
 void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
-#ifndef HAVE_VOLK_MULT2_FUNCTION
+#ifndef LV_HAVE_SSE
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]*y[i];
  }
 #else
-  volk_32fc_x2_multiply_32fc(z,x,y,len);
+  srslte_vec_prod_ccc_simd(x,y,z,len);
 #endif
 }

-
+// PRACH, CHEST UL, etc. 
 void srslte_vec_prod_conj_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
-#ifndef HAVE_VOLK_MULT2_CONJ_FUNCTION
+#ifndef LV_HAVE_SSE
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i]*conjf(y[i]);
  }
 #else
-  volk_32fc_x2_multiply_conjugate_32fc(z,x,y,len);
+  srslte_vec_prod_conj_ccc_simd(x,y,z,len);
 #endif
 }

@ -568,75 +523,48 @@ void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, float *z_real, float *z_imag
 }

 void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len) {
-#ifdef HAVE_VOLK_DIVIDE_FUNCTION
-  volk_32f_x2_divide_32f(z, x, y, len);
-#else
  int i;
  for (i=0;i<len;i++) {
    z[i] = x[i] / y[i];
  }
-#endif
 }

 cf_t srslte_vec_dot_prod_ccc(cf_t *x, cf_t *y, uint32_t len) {
-#ifdef HAVE_VOLK_DOTPROD_FC_FUNCTION
-  cf_t res;
-  volk_32fc_x2_dot_prod_32fc(&res, x, y, len);
-  return res; 
-#else 
  uint32_t i;
  cf_t res = 0;
  for (i=0;i<len;i++) {
    res += x[i]*y[i];
  }
  return res;
-#endif
 }

+// Convolution filter 
 cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len) {
-#ifdef HAVE_VOLK_DOTPROD_CFC_FUNCTION
-  cf_t res;
-  volk_32fc_32f_dot_prod_32fc(&res, x, y, len);
-  return res; 
-#else  
  uint32_t i;
  cf_t res = 0;
  for (i=0;i<len;i++) {
    res += x[i]*y[i];
  }
  return res;
-#endif
 }

 cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) {
-#ifdef HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION
-  cf_t res;
-  volk_32fc_x2_conjugate_dot_prod_32fc(&res, x, y, len);
-  return res; 
-#else 
  uint32_t i;
  cf_t res = 0;
  for (i=0;i<len;i++) {
    res += x[i]*conjf(y[i]);
  }
  return res;
-#endif
 }

-
+// PHICH 
 float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
-#ifdef HAVE_VOLK_DOTPROD_F_FUNCTION
-  float res;
-  volk_32f_x2_dot_prod_32f(&res, x, y, len);
-  return res; 
-#else 
  uint32_t i;
  float res = 0;
  for (i=0;i<len;i++) {
    res += x[i]*y[i];
  }
  return res;
-#endif  
 }

 int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
@ -656,48 +584,35 @@ float srslte_vec_avg_power_cf(cf_t *x, uint32_t len) {
  return crealf(srslte_vec_dot_prod_conj_ccc(x,x,len)) / len;
 }

+// PSS
 void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len) {
-#ifndef HAVE_VOLK_MAG_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    abs[i] = cabsf(x[i]);
  }
-#else
-  volk_32fc_magnitude_32f(abs,x,len);
-#endif
 }
+
+// PRACH 
 void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len) {
-#ifndef HAVE_VOLK_MAG_SQUARE_FUNCTION
+#ifndef LV_HAVE_SSE
  int i;
  for (i=0;i<len;i++) {
    abs_square[i] = crealf(x[i])*crealf(x[i])+cimagf(x[i])*cimagf(x[i]);
  }
 #else
-  volk_32fc_magnitude_squared_32f(abs_square,x,len);
+  srslte_vec_abs_square_cf_simd(x,abs_square,len);
 #endif
 }


 void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len) {
-#ifndef HAVE_VOLK_ATAN_FUNCTION
  int i;
  for (i=0;i<len;i++) {
    arg[i] = cargf(x[i]);
  }
-#else
-  volk_32fc_s32f_atan2_32f(arg,x,1,len);
-
-#endif
-
 }

 uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
-#ifdef HAVE_VOLK_MAX_FUNCTION
-  uint32_t target=0;
-  volk_32f_index_max_16u(&target,x,len);
-  return target;
-
-#else
  uint32_t i;
  float m=-FLT_MAX;
  uint32_t p=0;
@ -708,16 +623,9 @@ uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
    }
  }
  return p;
-#endif
 }

 int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
-#ifdef HAVE_VOLK_MAX_STAR_S_FUNCTION
-  int16_t target=0;
-  volk_16i_max_star_16i(&target,x,len);
-  return target;
-
-#else
  uint32_t i;
  int16_t m=-INT16_MIN;
  for (i=0;i<len;i++) {
@ -726,7 +634,6 @@ int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
    }
  }
  return m;
-#endif
 }

 int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
@ -741,9 +648,6 @@ int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
 }

 void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
-#ifdef HAVE_VOLK_MAX_VEC_FUNCTION
-  volk_32f_x2_max_32f(z,x,y,len);
-#else
  uint32_t i; 
  for (i=0;i<len;i++) {
    if (x[i] > y[i]) {
@ -752,17 +656,11 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
      z[i] = y[i]; 
    }
  }
-#endif  
 }


+// CP autocorr
 uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
-#ifdef HAVE_VOLK_MAX_ABS_FUNCTION
-  uint32_t target=0;
-  volk_32fc_index_max_16u(&target,x,len);
-  return target;
-
-#else
  uint32_t i;
  float m=-FLT_MAX;
  uint32_t p=0;
@ -775,7 +673,6 @@ uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
    }
  }
  return p;
-#endif
 }

 void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) {
--- a/srslte/lib/utils/vector_simd.c
+++ b/srslte/lib/utils/vector_simd.c
@ -280,3 +280,173 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
  }
 #endif
 }
+
+
+// for enb no-volk
+void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) {
+#ifdef LV_HAVE_SSE
+  unsigned int number = 0;
+  const unsigned int points = len / 4;
+
+  const float* xPtr = (const float*) x;
+  const float* yPtr = (const float*) y;
+  float* zPtr = (float*) z;
+
+  __m128 xVal, yVal, zVal;
+  for(;number < points; number++){
+
+    xVal = _mm_load_ps(xPtr);
+    yVal = _mm_load_ps(yPtr);
+
+    zVal = _mm_add_ps(xVal, yVal);
+
+    _mm_store_ps(zPtr, zVal); 
+
+    xPtr += 4;
+    yPtr += 4;
+    zPtr += 4;
+  }
+
+  number = points * 4;
+  for(;number < len; number++){
+    z[number] = x[number] + y[number];
+  }
+#endif
+}
+
+static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) {
+  __m128 yl, yh, tmp1, tmp2;
+  yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+  yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+  tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+  x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
+  tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+  return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
+}
+
+void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) 
+{
+#ifdef LV_HAVE_SSE
+  unsigned int number = 0;
+  const unsigned int halfPoints = len / 2;
+
+  __m128 xVal, yVal, zVal;
+  float* zPtr = (float*) z;
+  const float* xPtr = (const float*) x;
+  const float* yPtr = (const float*) y;
+
+  for(; number < halfPoints; number++){
+    xVal = _mm_load_ps(xPtr); 
+    yVal = _mm_load_ps(yPtr); 
+    zVal = _mm_complexmul_ps(xVal, yVal);
+    _mm_store_ps(zPtr, zVal); 
+
+    xPtr += 4;
+    yPtr += 4;
+    zPtr += 4;
+  }
+
+  if((len % 2) != 0){
+    *zPtr = (*xPtr) * (*yPtr);
+  }
+#endif
+}
+
+static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) {
+  const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
+  y = _mm_xor_ps(y, conjugator); 
+  return _mm_complexmul_ps(x, y);
+}
+
+void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
+#ifdef LV_HAVE_SSE
+  unsigned int number = 0;
+  const unsigned int halfPoints = len / 2;
+
+  __m128 xVal, yVal, zVal;
+  float* zPtr = (float*) z;
+  const float* xPtr = (const float*) x;
+  const float* yPtr = (const float*) y;
+
+  for(; number < halfPoints; number++){
+    xVal = _mm_load_ps(xPtr); 
+    yVal = _mm_load_ps(yPtr); 
+    zVal = _mm_complexmulconj_ps(xVal, yVal);
+    _mm_store_ps(zPtr, zVal); 
+
+    xPtr += 4;
+    yPtr += 4;
+    zPtr += 4;
+  }
+
+  if((len % 2) != 0){
+    *zPtr = (*xPtr) * (*yPtr);
+  }
+#endif
+}
+
+void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
+#ifdef LV_HAVE_SSE
+  unsigned int number = 0;
+  const unsigned int halfPoints = len / 2;
+
+  __m128 xVal, yl, yh, zVal, tmp1, tmp2;
+  float* zPtr = (float*) z;
+  const float* xPtr = (const float*) x;
+
+  // Set up constant scalar vector
+  yl = _mm_set_ps1(creal(h));
+  yh = _mm_set_ps1(cimag(h));
+
+  for(;number < halfPoints; number++){
+
+    xVal = _mm_load_ps(xPtr); 
+    tmp1 = _mm_mul_ps(xVal,yl); 
+    xVal = _mm_shuffle_ps(xVal,xVal,0xB1); 
+    tmp2 = _mm_mul_ps(xVal,yh); 
+    zVal = _mm_addsub_ps(tmp1,tmp2); 
+    _mm_storeu_ps(zPtr,zVal); 
+
+    xPtr += 4;
+    zPtr += 4;
+  }
+
+  if((len % 2) != 0) {
+    *zPtr = (*xPtr) * h;
+  }
+#endif
+}
+
+void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) {
+#ifdef LV_HAVE_SSE
+  unsigned int number = 0;
+  const unsigned int quarterPoints = len / 4;
+
+  const float* xPtr = (const float*) x;
+  float* zPtr = z;
+
+  __m128 xVal1, xVal2, zVal;
+  for(; number < quarterPoints; number++){
+    xVal1 = _mm_load_ps(xPtr);
+    xPtr += 4;
+    xVal2 = _mm_load_ps(xPtr);
+    xPtr += 4;
+    xVal1 = _mm_mul_ps(xVal1, xVal1); 
+    xVal2 = _mm_mul_ps(xVal2, xVal2); 
+    zVal = _mm_hadd_ps(xVal1, xVal2);
+    _mm_store_ps(zPtr, zVal);
+    zPtr += 4;
+  }
+
+  number = quarterPoints * 4;
+  for(; number < len; number++){
+    float val1Real = *xPtr++;
+    float val1Imag = *xPtr++;
+    *zPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
+  }
+#endif
+}
+
+
+
+