Added srs_lte_cpy for aligned copy which improves a bit performance for aligned data

master
Xavier Arteaga 7 years ago
parent a9d9c92205
commit 48d508aeba

@ -172,6 +172,9 @@ SRSLTE_API void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t le
/* argument of each vector element */ /* argument of each vector element */
SRSLTE_API void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len); SRSLTE_API void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len);
/* Copy 256 bit aligned vector */
SRSLTE_API void srs_vec_cf_cpy(cf_t *src, cf_t *dst, int len);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

@ -28,6 +28,7 @@
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>
#include <srslte/phy/utils/vector.h>
#include "srslte/phy/common/phy_common.h" #include "srslte/phy/common/phy_common.h"
#include "srslte/phy/mimo/layermap.h" #include "srslte/phy/mimo/layermap.h"
@ -51,7 +52,12 @@ int srslte_layermap_diversity(cf_t *d, cf_t *x[SRSLTE_MAX_LAYERS], int nof_layer
int srslte_layermap_multiplex(cf_t *d[SRSLTE_MAX_CODEWORDS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_cw, int nof_layers, int srslte_layermap_multiplex(cf_t *d[SRSLTE_MAX_CODEWORDS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_cw, int nof_layers,
int nof_symbols[SRSLTE_MAX_CODEWORDS]) { int nof_symbols[SRSLTE_MAX_CODEWORDS]) {
if (nof_cw == 1) { if (nof_cw == nof_layers) {
for (int i = 0; i < nof_cw; i++) {
srs_vec_cf_cpy(x[i], d[i], (uint32_t) nof_symbols[0]);
}
return nof_symbols[0];
} else if (nof_cw == 1) {
return srslte_layermap_diversity(d[0], x, nof_layers, nof_symbols[0]); return srslte_layermap_diversity(d[0], x, nof_layers, nof_symbols[0]);
} else { } else {
int n[2]; int n[2];

@ -96,19 +96,19 @@ int main(int argc, char **argv) {
} }
for (i=0;i<nof_cw;i++) { for (i=0;i<nof_cw;i++) {
d[i] = malloc(sizeof(cf_t) * nof_symb_cw[i]); d[i] = srslte_vec_malloc(sizeof(cf_t) * nof_symb_cw[i]);
if (!d[i]) { if (!d[i]) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
dp[i] = malloc(sizeof(cf_t) * nof_symb_cw[i]); dp[i] = srslte_vec_malloc(sizeof(cf_t) * nof_symb_cw[i]);
if (!dp[i]) { if (!dp[i]) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);
} }
} }
for (i=0;i<nof_layers;i++) { for (i=0;i<nof_layers;i++) {
x[i] = malloc(sizeof(cf_t) * nof_symbols); x[i] = srslte_vec_malloc(sizeof(cf_t) * nof_symbols);
if (!x[i]) { if (!x[i]) {
perror("malloc"); perror("malloc");
exit(-1); exit(-1);

@ -843,3 +843,24 @@ void srslte_vec_quant_suc(int16_t *in, uint8_t *out, float gain, int16_t offset,
} }
} }
void srs_vec_cf_cpy(cf_t *dst, cf_t *src, int len) {
int i = 0;
#ifdef LV_HAVE_AVX
for (; i < len - 3; i += 4) {
_mm256_store_ps((float *) &dst[i], _mm256_load_ps((float *) &src[i]));
}
#endif /* LV_HAVE_AVX */
#ifdef LV_HAVE_SSE
for (; i < len - 1; i += 2) {
_mm_store_ps((float *) &dst[i], _mm_load_ps((float *) &src[i]));
}
for (; i < len; i++) {
((__m64*) dst)[i] = ((__m64*) src)[i];
}
#else
for (; i < len; i++) {
dst[i] = src[i];
}
#endif /* LV_HAVE_SSE */
}

Loading…
Cancel
Save