Added vector CFO

master
Xavier Arteaga 7 years ago
parent 63df8d4c19
commit 681b98ae50

@ -284,6 +284,10 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
endif(HAVE_AVX) endif(HAVE_AVX)
endif (HAVE_AVX2) endif (HAVE_AVX2)
if (HAVE_FMA)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfma -DLV_HAVE_FMA")
endif (HAVE_FMA)
if (HAVE_AVX512) if (HAVE_AVX512)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512")

@ -789,10 +789,15 @@ static inline simd_cf_t srslte_simd_cf_prod (simd_cf_t a, simd_cf_t b) {
_mm512_mul_ps(a.im, b.re)); _mm512_mul_ps(a.im, b.re));
#else /* LV_HAVE_AVX512 */ #else /* LV_HAVE_AVX512 */
#ifdef LV_HAVE_AVX2 #ifdef LV_HAVE_AVX2
#ifdef LV_HAVE_FMA
ret.re = _mm256_fmsub_ps(a.re, b.re, _mm256_mul_ps(a.im, b.im));
ret.im = _mm256_fmadd_ps(a.re, b.im, _mm256_mul_ps(a.im, b.re));
#else /* LV_HAVE_FMA */
ret.re = _mm256_sub_ps(_mm256_mul_ps(a.re, b.re), ret.re = _mm256_sub_ps(_mm256_mul_ps(a.re, b.re),
_mm256_mul_ps(a.im, b.im)); _mm256_mul_ps(a.im, b.im));
ret.im = _mm256_add_ps(_mm256_mul_ps(a.re, b.im), ret.im = _mm256_add_ps(_mm256_mul_ps(a.re, b.im),
_mm256_mul_ps(a.im, b.re)); _mm256_mul_ps(a.im, b.re));
#endif /* LV_HAVE_FMA */
#else #else
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
ret.re = _mm_sub_ps(_mm_mul_ps(a.re, b.re), ret.re = _mm_sub_ps(_mm_mul_ps(a.re, b.re),

@ -156,6 +156,9 @@ SRSLTE_API void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, con
SRSLTE_API void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len); SRSLTE_API void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len);
SRSLTE_API void srslte_vec_apply_cfo(const cf_t *x, float cfo, cf_t *z, int len);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -130,6 +130,9 @@ SRSLTE_API void srslte_vec_interleave_simd(const cf_t *x, const cf_t *y, cf_t *z
SRSLTE_API void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len); SRSLTE_API void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len);
SRSLTE_API void srslte_vec_apply_cfo_simd(const cf_t *x, float cfo, cf_t *z, int len);
/* SIMD Find Max functions */ /* SIMD Find Max functions */
SRSLTE_API uint32_t srslte_vec_max_fi_simd(const float *x, const int len); SRSLTE_API uint32_t srslte_vec_max_fi_simd(const float *x, const int len);

@ -34,7 +34,11 @@
#include "srslte/phy/utils/vector.h" #include "srslte/phy/utils/vector.h"
#include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/debug.h"
/* Set next macro to 1 for using table generated CFO compensation */
#define SRSLTE_CFO_USE_EXP_TABLE 0
int srslte_cfo_init(srslte_cfo_t *h, uint32_t nsamples) { int srslte_cfo_init(srslte_cfo_t *h, uint32_t nsamples) {
#if SRSLTE_CFO_USE_EXP_TABLE
int ret = SRSLTE_ERROR; int ret = SRSLTE_ERROR;
bzero(h, sizeof(srslte_cfo_t)); bzero(h, sizeof(srslte_cfo_t));
@ -57,13 +61,19 @@ clean:
srslte_cfo_free(h); srslte_cfo_free(h);
} }
return ret; return ret;
#else /* SRSLTE_CFO_USE_EXP_TABLE */
h->nsamples = nsamples;
return SRSLTE_SUCCESS;
#endif /* SRSLTE_CFO_USE_EXP_TABLE */
} }
void srslte_cfo_free(srslte_cfo_t *h) { void srslte_cfo_free(srslte_cfo_t *h) {
#if SRSLTE_CFO_USE_EXP_TABLE
srslte_cexptab_free(&h->tab); srslte_cexptab_free(&h->tab);
if (h->cur_cexp) { if (h->cur_cexp) {
free(h->cur_cexp); free(h->cur_cexp);
} }
#endif /* SRSLTE_CFO_USE_EXP_TABLE */
bzero(h, sizeof(srslte_cfo_t)); bzero(h, sizeof(srslte_cfo_t));
} }
@ -72,6 +82,7 @@ void srslte_cfo_set_tol(srslte_cfo_t *h, float tol) {
} }
int srslte_cfo_resize(srslte_cfo_t *h, uint32_t samples) { int srslte_cfo_resize(srslte_cfo_t *h, uint32_t samples) {
#if SRSLTE_CFO_USE_EXP_TABLE
if (samples <= h->max_samples) { if (samples <= h->max_samples) {
srslte_cexptab_gen(&h->tab, h->cur_cexp, h->last_freq, samples); srslte_cexptab_gen(&h->tab, h->cur_cexp, h->last_freq, samples);
h->nsamples = samples; h->nsamples = samples;
@ -79,15 +90,19 @@ int srslte_cfo_resize(srslte_cfo_t *h, uint32_t samples) {
fprintf(stderr, "Error in cfo_resize(): nof_samples must be lower than initialized\n"); fprintf(stderr, "Error in cfo_resize(): nof_samples must be lower than initialized\n");
return SRSLTE_ERROR; return SRSLTE_ERROR;
} }
#endif /* SRSLTE_CFO_USE_EXP_TABLE */
return SRSLTE_SUCCESS; return SRSLTE_SUCCESS;
} }
void srslte_cfo_correct(srslte_cfo_t *h, const cf_t *input, cf_t *output, float freq) { void srslte_cfo_correct(srslte_cfo_t *h, const cf_t *input, cf_t *output, float freq) {
#if SRSLTE_CFO_USE_EXP_TABLE
if (fabs(h->last_freq - freq) > h->tol) { if (fabs(h->last_freq - freq) > h->tol) {
h->last_freq = freq; h->last_freq = freq;
srslte_cexptab_gen(&h->tab, h->cur_cexp, h->last_freq, h->nsamples); srslte_cexptab_gen(&h->tab, h->cur_cexp, h->last_freq, h->nsamples);
DEBUG("CFO generating new table for frequency %.4fe-6\n", freq*1e6); DEBUG("CFO generating new table for frequency %.4fe-6\n", freq*1e6);
} }
srslte_vec_prod_ccc(h->cur_cexp, input, output, h->nsamples); srslte_vec_prod_ccc(h->cur_cexp, input, output, h->nsamples);
#else /* SRSLTE_CFO_USE_EXP_TABLE */
srslte_vec_apply_cfo(input, freq, output, h->nsamples);
#endif /* SRSLTE_CFO_USE_EXP_TABLE */
} }

@ -32,6 +32,7 @@
#include <sys/time.h> #include <sys/time.h>
#include <memory.h> #include <memory.h>
#include <math.h> #include <math.h>
#include <srslte/srslte.h>
#include "srslte/phy/utils/mat.h" #include "srslte/phy/utils/mat.h"
#include "srslte/phy/utils/simd.h" #include "srslte/phy/utils/simd.h"
@ -729,6 +730,80 @@ TEST(srslte_vec_max_abs_ci,
free(x); free(x);
) )
TEST(srslte_vec_apply_cfo,
MALLOC(cf_t, x);
MALLOC(cf_t, z);
const float cfo = 0.1f;
cf_t gold;
for (int i = 0; i < block_size; i++) {
x[i] = RANDOM_CF();
}
TEST_CALL(srslte_vec_apply_cfo(x, cfo, z, block_size))
for (int i = 0; i < block_size; i++) {
gold = x[i] * cexpf(_Complex_I * 2.0f * (float) M_PI * i * cfo);
mse += cabsf(gold - z[i]) / cabsf(gold);
}
mse /= block_size;
free(x);
free(z);
)
TEST(srslte_cfo_correct,
srslte_cfo_t srslte_cfo = {0};
MALLOC(cf_t, x);
MALLOC(cf_t, z);
const float cfo = 0.1f;
cf_t gold;
for (int i = 0; i < block_size; i++) {
x[i] = RANDOM_CF();
}
srslte_cfo_init(&srslte_cfo, block_size);
TEST_CALL(srslte_cfo_correct(&srslte_cfo, x, z, cfo))
for (int i = 0; i < block_size; i++) {
gold = x[i] * cexpf(_Complex_I * 2.0f * (float) M_PI * i * cfo);
mse += cabsf(gold - z[i]) / cabsf(gold);
}
mse /= block_size;
free(x);
free(z);
srslte_cfo_free(&srslte_cfo);
)
TEST(srslte_cfo_correct_change,
srslte_cfo_t srslte_cfo = {0};
MALLOC(cf_t, x);
MALLOC(cf_t, z);
float cfo = 0.1f;
cf_t gold;
for (int i = 0; i < block_size; i++) {
x[i] = RANDOM_CF();
}
srslte_cfo_init(&srslte_cfo, block_size);
TEST_CALL(cfo = (i%2)?0.1:-0.1; srslte_cfo_correct(&srslte_cfo, x, z, cfo))
for (int i = 0; i < block_size; i++) {
gold = x[i] * cexpf(_Complex_I * 2.0f * (float) M_PI * i * cfo);
mse += cabsf(gold - z[i]) / cabsf(gold);
}
mse /= block_size;
free(x);
free(z);
srslte_cfo_free(&srslte_cfo);
)
int main(int argc, char **argv) { int main(int argc, char **argv) {
char func_names[MAX_FUNCTIONS][32]; char func_names[MAX_FUNCTIONS][32];
double timmings[MAX_FUNCTIONS][MAX_BLOCKS]; double timmings[MAX_FUNCTIONS][MAX_BLOCKS];
@ -738,7 +813,7 @@ int main(int argc, char **argv) {
bool passed[MAX_FUNCTIONS][MAX_BLOCKS]; bool passed[MAX_FUNCTIONS][MAX_BLOCKS];
bool all_passed = true; bool all_passed = true;
for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { for (uint32_t block_size = 1; block_size <= 1024*32; block_size *= 2) {
func_count = 0; func_count = 0;
@ -827,6 +902,15 @@ int main(int argc, char **argv) {
passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size); passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++; func_count++;
passed[func_count][size_count] = test_srslte_vec_apply_cfo(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] = test_srslte_cfo_correct(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] = test_srslte_cfo_correct_change(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
sizes[size_count] = block_size; sizes[size_count] = block_size;
size_count++; size_count++;
} }

@ -438,3 +438,7 @@ void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, const int len)
void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len) { void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len) {
srslte_vec_interleave_add_simd(x, y, z, len); srslte_vec_interleave_add_simd(x, y, z, len);
} }
void srslte_vec_apply_cfo(const cf_t *x, float cfo, cf_t *z, int len) {
srslte_vec_apply_cfo_simd(x, cfo, z, len);
}

@ -1251,3 +1251,54 @@ void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const
z[k++] += y[i]; z[k++] += y[i];
} }
} }
void srslte_vec_apply_cfo_simd(const cf_t *x, float cfo, cf_t *z, int len) {
const float TWOPI = 2.0f * (float) M_PI;
int i = 0;
#if SRSLTE_SIMD_CF_SIZE
__attribute__ ((aligned (SRSLTE_SIMD_BIT_ALIGN/8))) cf_t _osc[SRSLTE_SIMD_CF_SIZE];
__attribute__ ((aligned (SRSLTE_SIMD_BIT_ALIGN/8))) cf_t _phase[SRSLTE_SIMD_CF_SIZE];
if (i < len - SRSLTE_SIMD_CF_SIZE + 1) {
for (int k = 0; k < SRSLTE_SIMD_CF_SIZE; k++) {
_osc[k] = cexpf(_Complex_I * TWOPI * cfo * SRSLTE_SIMD_CF_SIZE);
_phase[k] = cexpf(_Complex_I * TWOPI * cfo * k);
}
}
simd_cf_t _simd_osc = srslte_simd_cfi_load(_osc);
simd_cf_t _simd_phase = srslte_simd_cfi_load(_phase);
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
simd_cf_t a = srslte_simd_cfi_load(&x[i]);
simd_cf_t r = srslte_simd_cf_prod(a, _simd_phase);
srslte_simd_cfi_store(&z[i], r);
_simd_phase = srslte_simd_cf_prod(_simd_phase, _simd_osc);
}
} else {
for (; i < len - SRSLTE_SIMD_F_SIZE + 1; i += SRSLTE_SIMD_F_SIZE) {
for (; i < len - SRSLTE_SIMD_CF_SIZE + 1; i += SRSLTE_SIMD_CF_SIZE) {
simd_cf_t a = srslte_simd_cfi_loadu(&x[i]);
simd_cf_t r = srslte_simd_cf_prod(a, _simd_phase);
_simd_phase = srslte_simd_cf_prod(_simd_phase, _simd_osc);
srslte_simd_cfi_storeu(&z[i], r);
}
}
}
#endif
cf_t osc = cexpf(_Complex_I * TWOPI * cfo);
cf_t phase = cexpf(_Complex_I * TWOPI * cfo * i);
for (; i < len; i++) {
z[i] = x[i] * phase;
phase *= osc;
}
}

Loading…
Cancel
Save