|
|
@ -28,6 +28,56 @@
|
|
|
|
#include "srslte/phy/utils/debug.h"
|
|
|
|
#include "srslte/phy/utils/debug.h"
|
|
|
|
#include "srslte/phy/utils/vector.h"
|
|
|
|
#include "srslte/phy/utils/vector.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_NEONv8
|
|
|
|
|
|
|
|
#include <arm_neon.h>
|
|
|
|
|
|
|
|
inline static uint8x16_t v_load_s8(int i15, int i14, int i13, int i12, int i11,
|
|
|
|
|
|
|
|
int i10, int i9, int i8, int i7, int i6,
|
|
|
|
|
|
|
|
int i5, int i4, int i3, int i2, int i1,
|
|
|
|
|
|
|
|
int i0) {
|
|
|
|
|
|
|
|
uint8_t __attribute__((aligned(16))) data[16] = {
|
|
|
|
|
|
|
|
i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15};
|
|
|
|
|
|
|
|
return vld1q_u8(data);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define int8x16_to_8x8x2(v) ((int8x8x2_t){{vget_low_s8(v), vget_high_s8(v)}})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline static void vshuff_s32_even(int32x4_t a, int imm, int32x4_t *res) {
|
|
|
|
|
|
|
|
*res = vsetq_lane_s32(vgetq_lane_s32((a), ((imm) >> 2) & 0x3), *res, 1);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s32(vgetq_lane_s32((a), ((imm) >> 6) & 0x3), *res, 3);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
inline static void vshuff_s32_odd(int32x4_t a, int imm, int32x4_t *res) {
|
|
|
|
|
|
|
|
*res = vsetq_lane_s32(vgetq_lane_s32((a), (imm)&0x3), *res, 0);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s32(vgetq_lane_s32((a), ((imm) >> 4) & 0x3), *res, 2);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline static void vshuff_s32_idx(int32x4_t a, int imm, int32x4_t *res,
|
|
|
|
|
|
|
|
int idx) {
|
|
|
|
|
|
|
|
*res =
|
|
|
|
|
|
|
|
vsetq_lane_s32(vgetq_lane_s32((a), ((imm) >> idx * 2) & 0x3), *res, idx);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline static void vshuff_s16_idx(int16x8_t a, int imm, int16x8_t *res,
|
|
|
|
|
|
|
|
int idx) {
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> (idx * 4)) & 0xF), *res,
|
|
|
|
|
|
|
|
idx);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline static void vshuff_s16_even(int16x8_t a, int imm, int16x8_t *res) {
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> 4) & 0xF), *res, 1);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> 12) & 0xF), *res, 3);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> 20) & 0xF), *res, 5);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> 28) & 0xF), *res, 7);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inline static void vshuff_s16_odd(int16x8_t a, int imm, int16x8_t *res) {
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm)) & 0xF), *res, 0);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> 8) & 0xF), *res, 2);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> 16) & 0xF), *res, 4);
|
|
|
|
|
|
|
|
*res = vsetq_lane_s16(vgetq_lane_s16((a), ((imm) >> 24) & 0xF), *res, 6);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#include <smmintrin.h>
|
|
|
|
#include <smmintrin.h>
|
|
|
|
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols);
|
|
|
|
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols);
|
|
|
@ -85,6 +135,118 @@ void demod_16qam_lte(const cf_t *symbols, float *llr, int nsymbols) {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_NEONv8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void demod_16qam_lte_s_neon(const cf_t *symbols, short *llr, int nsymbols) {
|
|
|
|
|
|
|
|
float *symbolsPtr = (float *)symbols;
|
|
|
|
|
|
|
|
int16x8_t *resultPtr = (int16x8_t *)llr;
|
|
|
|
|
|
|
|
float32x4_t symbol1, symbol2;
|
|
|
|
|
|
|
|
int32x4_t symbol_i1, symbol_i2;
|
|
|
|
|
|
|
|
int16x8_t symbol_i, symbol_abs;
|
|
|
|
|
|
|
|
int8x16_t result11, result21;
|
|
|
|
|
|
|
|
result11 = vdupq_n_s8(0);
|
|
|
|
|
|
|
|
result21 = vdupq_n_s8(0);
|
|
|
|
|
|
|
|
int16x8_t offset = vdupq_n_s16(2 * SCALE_SHORT_CONV_QAM16 / sqrt(10));
|
|
|
|
|
|
|
|
float32x4_t scale_v = vdupq_n_f32(-SCALE_SHORT_CONV_QAM16);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nsymbols / 4; i++) {
|
|
|
|
|
|
|
|
symbol1 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol2 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
symbol_i1 = vcvtnq_s32_f32(vmulq_f32(symbol1, scale_v));
|
|
|
|
|
|
|
|
symbol_i2 = vcvtnq_s32_f32(vmulq_f32(symbol2, scale_v));
|
|
|
|
|
|
|
|
symbol_i = vcombine_s16(vqmovn_s32(symbol_i1), vqmovn_s32(symbol_i2));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
symbol_abs = vqabsq_s16(symbol_i);
|
|
|
|
|
|
|
|
symbol_abs = vsubq_s16(symbol_abs, offset);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s32_odd((int32x4_t)symbol_i, 16, (int32x4_t *)&result11);
|
|
|
|
|
|
|
|
vshuff_s32_even((int32x4_t)symbol_abs, 64, (int32x4_t *)&result11);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s32_odd((int32x4_t)symbol_i, 50, (int32x4_t *)&result21);
|
|
|
|
|
|
|
|
vshuff_s32_even((int32x4_t)symbol_abs, 200, (int32x4_t *)&result21);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vst1q_s8((int8_t *)resultPtr, result11);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
vst1q_s8((int8_t *)resultPtr, result21);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// Demodulate last symbols
|
|
|
|
|
|
|
|
for (int i = 4 * (nsymbols / 4); i < nsymbols; i++) {
|
|
|
|
|
|
|
|
short yre = (short)(SCALE_SHORT_CONV_QAM16 * crealf(symbols[i]));
|
|
|
|
|
|
|
|
short yim = (short)(SCALE_SHORT_CONV_QAM16 * cimagf(symbols[i]));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llr[4 * i + 0] = -yre;
|
|
|
|
|
|
|
|
llr[4 * i + 1] = -yim;
|
|
|
|
|
|
|
|
llr[4 * i + 2] = abs(yre) - 2 * SCALE_SHORT_CONV_QAM16 / sqrt(10);
|
|
|
|
|
|
|
|
llr[4 * i + 3] = abs(yim) - 2 * SCALE_SHORT_CONV_QAM16 / sqrt(10);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void demod_16qam_lte_b_neon(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
|
|
|
|
|
|
|
float *symbolsPtr = (float *)symbols;
|
|
|
|
|
|
|
|
int8x16_t *resultPtr = (int8x16_t *)llr;
|
|
|
|
|
|
|
|
float32x4_t symbol1, symbol2, symbol3, symbol4;
|
|
|
|
|
|
|
|
int8x16_t symbol_i, symbol_abs;
|
|
|
|
|
|
|
|
int16x8_t symbol_12, symbol_34;
|
|
|
|
|
|
|
|
int32x4_t symbol_i1, symbol_i2, symbol_i3, symbol_i4;
|
|
|
|
|
|
|
|
int8x16_t offset = vdupq_n_s8(2 * SCALE_BYTE_CONV_QAM16 / sqrt(10));
|
|
|
|
|
|
|
|
int8x16_t result1n, result2n;
|
|
|
|
|
|
|
|
float32x4_t scale_v = vdupq_n_f32(-SCALE_BYTE_CONV_QAM16);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
result1n = vdupq_n_s8(0);
|
|
|
|
|
|
|
|
result2n = vdupq_n_s8(0);
|
|
|
|
|
|
|
|
for (int i = 0; i < nsymbols / 8; i++) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
symbol1 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol2 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol3 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol4 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol_i1 = vcvtnq_s32_f32(vmulq_f32(symbol1, scale_v));
|
|
|
|
|
|
|
|
symbol_i2 = vcvtnq_s32_f32(vmulq_f32(symbol2, scale_v));
|
|
|
|
|
|
|
|
symbol_i3 = vcvtnq_s32_f32(vmulq_f32(symbol3, scale_v));
|
|
|
|
|
|
|
|
symbol_i4 = vcvtnq_s32_f32(vmulq_f32(symbol4, scale_v));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
symbol_12 =
|
|
|
|
|
|
|
|
(int16x8_t)vcombine_s16(vqmovn_s32(symbol_i1), vqmovn_s32(symbol_i2));
|
|
|
|
|
|
|
|
symbol_34 =
|
|
|
|
|
|
|
|
(int16x8_t)vcombine_s16(vqmovn_s32(symbol_i3), vqmovn_s32(symbol_i4));
|
|
|
|
|
|
|
|
symbol_i =
|
|
|
|
|
|
|
|
(int8x16_t)vcombine_s8(vqmovn_s16(symbol_12), vqmovn_s16(symbol_34));
|
|
|
|
|
|
|
|
symbol_abs = vqabsq_s8(symbol_i);
|
|
|
|
|
|
|
|
symbol_abs = vsubq_s8(symbol_abs, offset);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s16_odd((int16x8_t)symbol_i, 0x3020100, (int16x8_t *)&result1n);
|
|
|
|
|
|
|
|
vshuff_s16_even((int16x8_t)symbol_abs, 0x30201000, (int16x8_t *)&result1n);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s16_odd((int16x8_t)symbol_i, 0x07060504, (int16x8_t *)&result2n);
|
|
|
|
|
|
|
|
vshuff_s16_even((int16x8_t)symbol_abs, 0x70605040, (int16x8_t *)&result2n);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vst1q_s8((int8_t *)resultPtr, result1n);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
vst1q_s8((int8_t *)resultPtr, result2n);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// Demodulate last symbols
|
|
|
|
|
|
|
|
for (int i = 8 * (nsymbols / 8); i < nsymbols; i++) {
|
|
|
|
|
|
|
|
short yre = (int8_t)(SCALE_BYTE_CONV_QAM16 * crealf(symbols[i]));
|
|
|
|
|
|
|
|
short yim = (int8_t)(SCALE_BYTE_CONV_QAM16 * cimagf(symbols[i]));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llr[4 * i + 0] = -yre;
|
|
|
|
|
|
|
|
llr[4 * i + 1] = -yim;
|
|
|
|
|
|
|
|
llr[4 * i + 2] = abs(yre) - 2 * SCALE_BYTE_CONV_QAM16 / sqrt(10);
|
|
|
|
|
|
|
|
llr[4 * i + 3] = abs(yim) - 2 * SCALE_BYTE_CONV_QAM16 / sqrt(10);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
|
|
|
|
|
|
|
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols) {
|
|
|
|
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols) {
|
|
|
@ -190,6 +352,9 @@ void demod_16qam_lte_b_sse(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
|
|
|
void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
|
|
|
void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
demod_16qam_lte_s_sse(symbols, llr, nsymbols);
|
|
|
|
demod_16qam_lte_s_sse(symbols, llr, nsymbols);
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#ifdef HAVE_NEONv8
|
|
|
|
|
|
|
|
demod_16qam_lte_s_neon(symbols, llr, nsymbols);
|
|
|
|
#else
|
|
|
|
#else
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
short yre = (short) (SCALE_SHORT_CONV_QAM16*crealf(symbols[i]));
|
|
|
|
short yre = (short) (SCALE_SHORT_CONV_QAM16*crealf(symbols[i]));
|
|
|
@ -201,11 +366,15 @@ void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
|
|
|
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
|
|
|
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void demod_16qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
|
|
|
void demod_16qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
demod_16qam_lte_b_sse(symbols, llr, nsymbols);
|
|
|
|
demod_16qam_lte_b_sse(symbols, llr, nsymbols);
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#ifdef HAVE_NEONv8
|
|
|
|
|
|
|
|
demod_16qam_lte_b_neon(symbols, llr, nsymbols);
|
|
|
|
#else
|
|
|
|
#else
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
int8_t yre = (int8_t) (SCALE_BYTE_CONV_QAM16*crealf(symbols[i]));
|
|
|
|
int8_t yre = (int8_t) (SCALE_BYTE_CONV_QAM16*crealf(symbols[i]));
|
|
|
@ -217,6 +386,7 @@ void demod_16qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
|
|
|
llr[4*i+3] = abs(yim)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
|
|
|
|
llr[4*i+3] = abs(yim)-2*SCALE_BYTE_CONV_QAM16/sqrt(10);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
|
|
|
|
void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
|
|
|
@ -234,6 +404,171 @@ void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef HAVE_NEONv8
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void demod_64qam_lte_s_neon(const cf_t *symbols, short *llr, int nsymbols) {
|
|
|
|
|
|
|
|
float *symbolsPtr = (float *)symbols;
|
|
|
|
|
|
|
|
uint16x8_t *resultPtr = (uint16x8_t *)llr;
|
|
|
|
|
|
|
|
float32x4_t symbol1, symbol2;
|
|
|
|
|
|
|
|
int16x8_t symbol_i, symbol_abs, symbol_abs2;
|
|
|
|
|
|
|
|
int32x4_t symbol_i1, symbol_i2;
|
|
|
|
|
|
|
|
int16x8_t offset1 = vdupq_n_s16(4 * SCALE_SHORT_CONV_QAM64 / sqrt(42));
|
|
|
|
|
|
|
|
int16x8_t offset2 = vdupq_n_s16(2 * SCALE_SHORT_CONV_QAM64 / sqrt(42));
|
|
|
|
|
|
|
|
float32x4_t scale_v = vdupq_n_f32(-SCALE_SHORT_CONV_QAM64);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int16x8_t result11 = vdupq_n_s16(0);
|
|
|
|
|
|
|
|
int16x8_t result21 = vdupq_n_s16(0);
|
|
|
|
|
|
|
|
int16x8_t result31 = vdupq_n_s16(0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nsymbols / 4; i++) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
symbol1 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol2 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol_i1 = vcvtnq_s32_f32(vmulq_f32(symbol1, scale_v));
|
|
|
|
|
|
|
|
symbol_i2 = vcvtnq_s32_f32(vmulq_f32(symbol2, scale_v));
|
|
|
|
|
|
|
|
symbol_i = vcombine_s16(vqmovn_s32(symbol_i1), vqmovn_s32(symbol_i2));
|
|
|
|
|
|
|
|
symbol_abs = vqabsq_s16(symbol_i);
|
|
|
|
|
|
|
|
symbol_abs = vsubq_s16(symbol_abs, offset1);
|
|
|
|
|
|
|
|
symbol_abs2 = vsubq_s16(vqabsq_s16(symbol_abs), offset2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_i, 64, (int32x4_t *)&result11, 0);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs, 64, (int32x4_t *)&result11, 1);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs2, 64, (int32x4_t *)&result11, 2);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_i, 64, (int32x4_t *)&result11, 3);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs, 165, (int32x4_t *)&result21, 0);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs2, 165, (int32x4_t *)&result21, 1);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_i, 165, (int32x4_t *)&result21, 2);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs, 165, (int32x4_t *)&result21, 3);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs2, 254, (int32x4_t *)&result31, 0);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_i, 254, (int32x4_t *)&result31, 1);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs, 254, (int32x4_t *)&result31, 2);
|
|
|
|
|
|
|
|
vshuff_s32_idx((int32x4_t)symbol_abs2, 254, (int32x4_t *)&result31, 3);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vst1q_s16((int16_t *)resultPtr, result11);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
vst1q_s16((int16_t *)resultPtr, result21);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
vst1q_s16((int16_t *)resultPtr, result31);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 4 * (nsymbols / 4); i < nsymbols; i++) {
|
|
|
|
|
|
|
|
float yre = (short)(SCALE_SHORT_CONV_QAM64 * crealf(symbols[i]));
|
|
|
|
|
|
|
|
float yim = (short)(SCALE_SHORT_CONV_QAM64 * cimagf(symbols[i]));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llr[6 * i + 0] = -yre;
|
|
|
|
|
|
|
|
llr[6 * i + 1] = -yim;
|
|
|
|
|
|
|
|
llr[6 * i + 2] = abs(yre) - 4 * SCALE_SHORT_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
llr[6 * i + 3] = abs(yim) - 4 * SCALE_SHORT_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
llr[6 * i + 4] =
|
|
|
|
|
|
|
|
abs(llr[6 * i + 2]) - 2 * SCALE_SHORT_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
llr[6 * i + 5] =
|
|
|
|
|
|
|
|
abs(llr[6 * i + 3]) - 2 * SCALE_SHORT_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void demod_64qam_lte_b_neon(const cf_t *symbols, int8_t *llr, int nsymbols) {
|
|
|
|
|
|
|
|
float *symbolsPtr = (float *)symbols;
|
|
|
|
|
|
|
|
uint8x16_t *resultPtr = (uint8x16_t *)llr;
|
|
|
|
|
|
|
|
float32x4_t symbol1, symbol2, symbol3, symbol4;
|
|
|
|
|
|
|
|
int8x16_t symbol_i, symbol_abs, symbol_abs2;
|
|
|
|
|
|
|
|
int16x8_t symbol_12, symbol_34;
|
|
|
|
|
|
|
|
int32x4_t symbol_i1, symbol_i2, symbol_i3, symbol_i4;
|
|
|
|
|
|
|
|
int8x16_t offset1 = vdupq_n_s8(4 * SCALE_BYTE_CONV_QAM64 / sqrt(42));
|
|
|
|
|
|
|
|
int8x16_t offset2 = vdupq_n_s8(2 * SCALE_BYTE_CONV_QAM64 / sqrt(42));
|
|
|
|
|
|
|
|
float32x4_t scale_v = vdupq_n_f32(-SCALE_BYTE_CONV_QAM64);
|
|
|
|
|
|
|
|
int8x16_t result11 = vdupq_n_s8(0);
|
|
|
|
|
|
|
|
int8x16_t result21 = vdupq_n_s8(0);
|
|
|
|
|
|
|
|
int8x16_t result31 = vdupq_n_s8(0);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < nsymbols / 8; i++) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
symbol1 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol2 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol3 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol4 = vld1q_f32(symbolsPtr);
|
|
|
|
|
|
|
|
symbolsPtr += 4;
|
|
|
|
|
|
|
|
symbol_i1 = vcvtnq_s32_f32(vmulq_f32(symbol1, scale_v));
|
|
|
|
|
|
|
|
symbol_i2 = vcvtnq_s32_f32(vmulq_f32(symbol2, scale_v));
|
|
|
|
|
|
|
|
symbol_i3 = vcvtnq_s32_f32(vmulq_f32(symbol3, scale_v));
|
|
|
|
|
|
|
|
symbol_i4 = vcvtnq_s32_f32(vmulq_f32(symbol4, scale_v));
|
|
|
|
|
|
|
|
symbol_12 = vcombine_s16(vqmovn_s32(symbol_i1), vqmovn_s32(symbol_i2));
|
|
|
|
|
|
|
|
symbol_34 = vcombine_s16(vqmovn_s32(symbol_i3), vqmovn_s32(symbol_i4));
|
|
|
|
|
|
|
|
symbol_i = vcombine_s8(vqmovn_s16(symbol_12), vqmovn_s16(symbol_34));
|
|
|
|
|
|
|
|
symbol_abs = vqabsq_s8(symbol_i);
|
|
|
|
|
|
|
|
symbol_abs = vsubq_s8(symbol_abs, offset1);
|
|
|
|
|
|
|
|
symbol_abs2 = vsubq_s8(vqabsq_s8(symbol_abs), offset2);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x22111000, (int16x8_t *)&result11, 0);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x22111000, (int16x8_t *)&result11,
|
|
|
|
|
|
|
|
1);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x22111000, (int16x8_t *)&result11,
|
|
|
|
|
|
|
|
2);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x22111000, (int16x8_t *)&result11, 3);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x22111000, (int16x8_t *)&result11,
|
|
|
|
|
|
|
|
4);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x22111000, (int16x8_t *)&result11,
|
|
|
|
|
|
|
|
5);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x22111000, (int16x8_t *)&result11, 6);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x22111000, (int16x8_t *)&result11,
|
|
|
|
|
|
|
|
7);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x54443332, (int16x8_t *)&result21,
|
|
|
|
|
|
|
|
0);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x54443332, (int16x8_t *)&result21, 1);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x54443332, (int16x8_t *)&result21,
|
|
|
|
|
|
|
|
2);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x54443332, (int16x8_t *)&result21,
|
|
|
|
|
|
|
|
3);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x54443332, (int16x8_t *)&result21, 4);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x54443332, (int16x8_t *)&result21,
|
|
|
|
|
|
|
|
5);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x54443332, (int16x8_t *)&result21,
|
|
|
|
|
|
|
|
6);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x54443332, (int16x8_t *)&result21, 7);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x77766655, (int16x8_t *)&result31,
|
|
|
|
|
|
|
|
0);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x77766655, (int16x8_t *)&result31,
|
|
|
|
|
|
|
|
1);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x77766655, (int16x8_t *)&result31, 2);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x77766655, (int16x8_t *)&result31,
|
|
|
|
|
|
|
|
3);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x77766655, (int16x8_t *)&result31,
|
|
|
|
|
|
|
|
4);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_i, 0x77766655, (int16x8_t *)&result31, 5);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs, 0x77766655, (int16x8_t *)&result31,
|
|
|
|
|
|
|
|
6);
|
|
|
|
|
|
|
|
vshuff_s16_idx((int16x8_t)symbol_abs2, 0x77766655, (int16x8_t *)&result31,
|
|
|
|
|
|
|
|
7);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vst1q_s8((int8_t *)resultPtr, result11);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
vst1q_s8((int8_t *)resultPtr, result21);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
vst1q_s8((int8_t *)resultPtr, result31);
|
|
|
|
|
|
|
|
resultPtr++;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
for (int i = 8 * (nsymbols / 8); i < nsymbols; i++) {
|
|
|
|
|
|
|
|
float yre = (int8_t)(SCALE_BYTE_CONV_QAM64 * crealf(symbols[i]));
|
|
|
|
|
|
|
|
float yim = (int8_t)(SCALE_BYTE_CONV_QAM64 * cimagf(symbols[i]));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llr[6 * i + 0] = -yre;
|
|
|
|
|
|
|
|
llr[6 * i + 1] = -yim;
|
|
|
|
|
|
|
|
llr[6 * i + 2] = abs(yre) - 4 * SCALE_BYTE_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
llr[6 * i + 3] = abs(yim) - 4 * SCALE_BYTE_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
llr[6 * i + 4] = abs(llr[6 * i + 2]) - 2 * SCALE_BYTE_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
llr[6 * i + 5] = abs(llr[6 * i + 3]) - 2 * SCALE_BYTE_CONV_QAM64 / sqrt(42);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
|
|
|
|
|
|
|
@ -355,7 +690,6 @@ void demod_64qam_lte_b_sse(const cf_t *symbols, int8_t *llr, int nsymbols)
|
|
|
|
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result11, result12),result13)); resultPtr++;
|
|
|
|
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result11, result12),result13)); resultPtr++;
|
|
|
|
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result21, result22),result23)); resultPtr++;
|
|
|
|
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result21, result22),result23)); resultPtr++;
|
|
|
|
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result31, result32),result33)); resultPtr++;
|
|
|
|
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result31, result32),result33)); resultPtr++;
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (int i=8*(nsymbols/8);i<nsymbols;i++) {
|
|
|
|
for (int i=8*(nsymbols/8);i<nsymbols;i++) {
|
|
|
|
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
|
|
|
|
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
|
|
|
@ -376,6 +710,9 @@ void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
demod_64qam_lte_s_sse(symbols, llr, nsymbols);
|
|
|
|
demod_64qam_lte_s_sse(symbols, llr, nsymbols);
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#ifdef HAVE_NEONv8
|
|
|
|
|
|
|
|
demod_64qam_lte_s_neon(symbols, llr, nsymbols);
|
|
|
|
#else
|
|
|
|
#else
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
float yre = (short) (SCALE_SHORT_CONV_QAM64*crealf(symbols[i]));
|
|
|
|
float yre = (short) (SCALE_SHORT_CONV_QAM64*crealf(symbols[i]));
|
|
|
@ -389,12 +726,16 @@ void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
|
|
|
|
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
|
|
|
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void demod_64qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols)
|
|
|
|
void demod_64qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
#ifdef LV_HAVE_SSE
|
|
|
|
demod_64qam_lte_b_sse(symbols, llr, nsymbols);
|
|
|
|
demod_64qam_lte_b_sse(symbols, llr, nsymbols);
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
|
|
|
#ifdef HAVE_NEONv8
|
|
|
|
|
|
|
|
demod_64qam_lte_b_neon(symbols, llr, nsymbols);
|
|
|
|
#else
|
|
|
|
#else
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
for (int i=0;i<nsymbols;i++) {
|
|
|
|
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
|
|
|
|
float yre = (int8_t) (SCALE_BYTE_CONV_QAM64*crealf(symbols[i]));
|
|
|
@ -408,6 +749,7 @@ void demod_64qam_lte_b(const cf_t *symbols, int8_t *llr, int nsymbols)
|
|
|
|
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
|
|
|
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_BYTE_CONV_QAM64/sqrt(42);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void demod_256qam_lte(const cf_t* symbols, float* llr, int nsymbols)
|
|
|
|
void demod_256qam_lte(const cf_t* symbols, float* llr, int nsymbols)
|
|
|
|