diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index e7820c307..0c378591e 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -310,10 +310,10 @@ static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { return _mm_addsub_ps(a, b); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON - float* a_ptr = &a; - float* b_ptr = &b; + float* a_ptr = (float*) &a; + float* b_ptr = (float*) &b; simd_f_t ret; - float* c_ptr = &ret; + float* c_ptr = (float*) &ret; for(int i = 0; i<4;i++){ if(i%2==0){ c_ptr[i] = a_ptr[i] - b_ptr[i]; @@ -1028,12 +1028,12 @@ static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t s #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON - int* a_ptr = &a; - int* b_ptr = &b; + int* a_ptr = (int*) &a; + int* b_ptr = (int*) &b; simd_i_t ret; - int* sel = &selector; + int* sel = (int*) &selector; - int* c_ptr = &ret; + int* c_ptr = (int*) &ret; for(int i = 0;i<4;i++) { if(sel[i] == -1){ @@ -1097,7 +1097,7 @@ static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { return _mm256_loadu_si256((__m256i*) ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return _mm_loadu_si128((__m128i*) ptr) + return _mm_loadu_si128((__m128i*) ptr); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON return vld1q_s16(ptr); diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index 8d5b9f2d6..4ebed9862 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -63,12 +63,12 @@ bool verbose = false; strncpy(func_name, #X, 32);\ CODE;\ passed = (mse < MAX_MSE);\ - printf("%32s (%5d) ... %7.1f MSamp/s ... %3s Passed\n", func_name, block_size, \ - (double) block_size*NOF_REPETITIONS/ *timing, passed?"":"Not");\ + printf("%32s (%5d) ... %7.1f MSamp/s ... %3s Passed (%.6f)\n", func_name, block_size, \ + (double) block_size*NOF_REPETITIONS/ *timing, passed?"":"Not", mse);\ return passed;\ } -#define MALLOC(TYPE, NAME) TYPE *NAME = malloc(sizeof(TYPE)*block_size) +#define MALLOC(TYPE, NAME) TYPE *NAME = srslte_vec_malloc(sizeof(TYPE)*block_size) static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { @@ -507,7 +507,7 @@ TEST(srslte_vec_abs_cf, for (int i = 0; i < block_size; i++) { gold = sqrtf(crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i])); - mse += cabsf(gold - z[i]); + mse += cabsf(gold - z[i])/block_size; } free(x); @@ -771,12 +771,27 @@ int main(int argc, char **argv) { size_count++; } + char fname[68]; + FILE *f = NULL; + void * p = popen("(date +%g%m%d && hostname) | tr '\\r\\n' '__'", "r"); + if (p) { + fgets(fname, 64, p); + strncpy(fname + strnlen(fname, 64) - 1, ".tsv", 4); + f = fopen(fname, "w"); + if (f) printf("Saving benchmark results in '%s'\n", fname); + } + pclose(p); + + printf("\n"); printf("%32s |", "Subroutine/MSps"); + if (f) fprintf(f, "Subroutine/MSps Vs Vector size\t"); for (int i = 0; i < size_count; i++) { printf(" %7d", sizes[i]); + if (f) fprintf(f, "%d\t", sizes[i]); } printf(" |\n"); + if (f) fprintf(f, "\n"); for (int j = 0; j < 32; j++) { printf("-"); @@ -789,12 +804,19 @@ int main(int argc, char **argv) { for (int i = 0; i < func_count; i++) { printf("%32s | ", func_names[i]); + if (f) fprintf(f, "%s\t", func_names[i]); + for (int j = 0; j < size_count; j++) { printf(" %s%7.1f\x1b[0m", (passed[i][j])?"":"\x1B[31m", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + if (f) fprintf(f, "%.1f\t", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + all_passed &= passed[i][j]; } printf(" |\n"); + if (f) fprintf(f, "\n"); } + if (f) fclose(f); + return (all_passed)?SRSLTE_SUCCESS:SRSLTE_ERROR; }