mirror of https://github.com/pvnis/srsRAN_4G.git
You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
750 lines
20 KiB
C
750 lines
20 KiB
C
6 years ago
|
/**
|
||
|
*
|
||
|
* \section COPYRIGHT
|
||
|
*
|
||
|
* Copyright 2013-2015 Software Radio Systems Limited
|
||
|
*
|
||
|
* \section LICENSE
|
||
|
*
|
||
|
* This file is part of the srsLTE library.
|
||
|
*
|
||
|
* srsLTE is free software: you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU Affero General Public License as
|
||
|
* published by the Free Software Foundation, either version 3 of
|
||
|
* the License, or (at your option) any later version.
|
||
|
*
|
||
|
* srsLTE is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU Affero General Public License for more details.
|
||
|
*
|
||
|
* A copy of the GNU Affero General Public License can be found in
|
||
|
* the LICENSE file in the top-level directory of this distribution
|
||
|
* and at http://www.gnu.org/licenses/.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#include "srslte/config.h"
|
||
|
|
||
|
#define MAKE_FUNC(a) CONCAT2(CONCAT2(tdec_win,WINIMP),CONCAT2(_,a))
|
||
|
#define MAKE_TYPE CONCAT2(CONCAT2(tdec_win_,WINIMP),_t)
|
||
|
|
||
|
#ifdef WINIMP_IS_SSE16
|
||
|
|
||
|
#ifndef LV_HAVE_SSE
|
||
|
#error "Selected SSE window decoder but instruction set not supported"
|
||
|
#endif
|
||
|
|
||
|
#include <nmmintrin.h>
|
||
|
|
||
|
#define WINIMP sse16
|
||
|
#define nof_blocks 8
|
||
|
|
||
|
#define llr_t int16_t
|
||
|
|
||
|
#define simd_type_t __m128i
|
||
|
#define simd_load _mm_load_si128
|
||
|
#define simd_store _mm_store_si128
|
||
|
#define simd_add _mm_adds_epi16
|
||
|
#define simd_sub _mm_subs_epi16
|
||
|
#define simd_max _mm_max_epi16
|
||
|
#define simd_set1 _mm_set1_epi16
|
||
|
#define simd_insert _mm_insert_epi16
|
||
|
#define simd_shuffle _mm_shuffle_epi8
|
||
|
#define move_right _mm_set_epi8(15,14,15,14,13,12,11,10,9,8,7,6,5,4,3,2)
|
||
|
#define move_left _mm_set_epi8(13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0)
|
||
|
#define simd_rb_shift _mm_srai_epi16
|
||
|
|
||
|
#define normalize_period 2
|
||
|
#define win_overlap_len 40
|
||
|
|
||
|
#define divide_output 1
|
||
|
|
||
|
#define INF 10000
|
||
|
|
||
|
#else
|
||
|
#ifdef WINIMP_IS_AVX16
|
||
|
|
||
|
#ifndef LV_HAVE_AVX
|
||
|
#error "Selected AVX window decoder but instruction set not supported"
|
||
|
#endif
|
||
|
|
||
|
#include <immintrin.h>
|
||
|
|
||
|
#define WINIMP avx16
|
||
|
#define nof_blocks 16
|
||
|
|
||
|
#define llr_t int16_t
|
||
|
|
||
|
#define simd_type_t __m256i
|
||
|
#define simd_load _mm256_load_si256
|
||
|
#define simd_store _mm256_store_si256
|
||
|
#define simd_add _mm256_adds_epi16
|
||
|
#define simd_sub _mm256_subs_epi16
|
||
|
#define simd_max _mm256_max_epi16
|
||
|
#define simd_set1 _mm256_set1_epi16
|
||
|
#define simd_insert _mm256_insert_epi16
|
||
|
#define simd_shuffle _mm256_shuffle_epi8
|
||
|
#define move_right _mm256_set_epi8(31,30,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)
|
||
|
#define move_left _mm256_set_epi8(29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,1,0)
|
||
|
|
||
|
#define normalize_period 2
|
||
|
#define win_overlap_len 40
|
||
|
|
||
|
#define INF 10000
|
||
|
#else
|
||
|
|
||
|
#ifdef WINIMP_IS_SSE8
|
||
|
|
||
|
#ifndef LV_HAVE_SSE
|
||
|
#error "Selected SSE window decoder but instruction set not supported"
|
||
|
#endif
|
||
|
|
||
|
#include <nmmintrin.h>
|
||
|
|
||
|
#define WINIMP sse8
|
||
|
#define nof_blocks 16
|
||
|
|
||
|
#define llr_t int8_t
|
||
|
|
||
|
#define simd_type_t __m128i
|
||
|
#define simd_load _mm_load_si128
|
||
|
#define simd_store _mm_store_si128
|
||
|
#define simd_add _mm_adds_epi8
|
||
|
#define simd_sub _mm_subs_epi8
|
||
|
#define simd_max _mm_max_epi8
|
||
|
#define simd_set1 _mm_set1_epi8
|
||
|
#define simd_insert _mm_insert_epi8
|
||
|
#define simd_shuffle _mm_shuffle_epi8
|
||
|
#define move_right _mm_set_epi8(15,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)
|
||
|
#define move_left _mm_set_epi8(14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0)
|
||
|
#define simd_rb_shift simd_rb_shift_128
|
||
|
|
||
|
#define normalize_max
|
||
|
#define normalize_period 1
|
||
|
#define win_overlap_len 40
|
||
|
#define use_saturated_add
|
||
|
#define divide_output 1
|
||
|
|
||
|
#define INF 0
|
||
|
|
||
|
inline static simd_type_t simd_rb_shift_128(simd_type_t v, const int l) {
|
||
|
__m128i low = _mm_srai_epi16(_mm_slli_epi16(v,8), l+8);
|
||
|
__m128i hi = _mm_srai_epi16(v,l);
|
||
|
return _mm_blendv_epi8(hi, low, _mm_set1_epi32(0x00FF00FF));
|
||
|
}
|
||
|
|
||
|
|
||
|
#else
|
||
|
|
||
|
#ifdef WINIMP_IS_AVX8
|
||
|
|
||
|
#ifndef LV_HAVE_AVX
|
||
|
#error "Selected AVX window decoder but instruction set not supported"
|
||
|
#endif
|
||
|
|
||
|
#include <immintrin.h>
|
||
|
|
||
|
#define WINIMP avx8
|
||
|
#define nof_blocks 32
|
||
|
|
||
|
#define llr_t int8_t
|
||
|
|
||
|
#define simd_type_t __m256i
|
||
|
#define simd_load _mm256_load_si256
|
||
|
#define simd_store _mm256_store_si256
|
||
|
#define simd_add _mm256_adds_epi8
|
||
|
#define simd_sub _mm256_subs_epi8
|
||
|
#define simd_max _mm256_max_epi8
|
||
|
#define simd_set1 _mm256_set1_epi8
|
||
|
#define simd_insert _mm256_insert_epi8
|
||
|
#define simd_shuffle _mm256_shuffle_epi8
|
||
|
#define move_right _mm256_set_epi8(31,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1)
|
||
|
#define move_left _mm256_set_epi8(30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0)
|
||
|
#define simd_rb_shift simd_rb_shift_256
|
||
|
|
||
|
#define INF 0
|
||
|
|
||
|
#define normalize_max
|
||
|
#define normalize_period 1
|
||
|
#define win_overlap_len 40
|
||
|
#define use_saturated_add
|
||
|
#define divide_output 1
|
||
|
|
||
|
inline static simd_type_t simd_rb_shift_256(simd_type_t v, const int l) {
|
||
|
__m256i low = _mm256_srai_epi16(_mm256_slli_epi16(v,8), l+8);
|
||
|
__m256i hi = _mm256_srai_epi16(v,l);
|
||
|
return _mm256_blendv_epi8(hi, low, _mm256_set1_epi32(0x00FF00FF));
|
||
|
}
|
||
|
|
||
|
|
||
|
#else
|
||
|
#error "Unknown WINIMP value"
|
||
|
#endif
|
||
|
#endif
|
||
|
#endif
|
||
|
#endif
|
||
|
|
||
|
typedef struct SRSLTE_API {
|
||
|
uint32_t max_long_cb;
|
||
|
llr_t *beta;
|
||
|
} MAKE_TYPE;
|
||
|
|
||
|
|
||
|
#define long_sb (long_cb/nof_blocks)
|
||
|
|
||
|
|
||
|
|
||
|
#define debug_enabled_win 0
|
||
|
|
||
|
#if debug_enabled_win
|
||
|
#define debug_state(d) printf("k=%5d, in=%5d, pa=%3d, out=%5d, alpha=[", d*long_sb+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d), MAKE_FUNC(get_simd)(out,d)); \
|
||
|
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \
|
||
|
printf("], beta=["); \
|
||
|
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(beta_save[j], d));printf("\n");
|
||
|
|
||
|
#define debug_state_pre(d) printf("pre-window k=%5d, in=%5d, pa=%3d, alpha=[", (d+1)*long_sb-loop_len+k+1, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \
|
||
|
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d)); \
|
||
|
printf("]\n");
|
||
|
|
||
|
#define debug_state_beta(d) printf("k=%5d, in=%5d, pa=%3d, beta=[", d*long_sb+k, MAKE_FUNC(get_simd)(x,d), MAKE_FUNC(get_simd)(y,d)); \
|
||
|
for (int j=0;j<8;j++) printf("%5d, ", MAKE_FUNC(get_simd)(old[j],d));\
|
||
|
printf("\n");
|
||
|
|
||
|
static llr_t MAKE_FUNC(get_simd)(simd_type_t x, uint32_t pos) {
|
||
|
llr_t *s = (llr_t*) &x;
|
||
|
return s[pos];
|
||
|
}
|
||
|
|
||
|
|
||
|
#else
|
||
|
#define debug_state(a)
|
||
|
#define debug_state_pre(a)
|
||
|
#define debug_state_beta(a)
|
||
|
#endif
|
||
|
/*
|
||
|
static void MAKE_FUNC(print_simd)(simd_type_t x) {
|
||
|
llr_t *s = (llr_t*) &x;
|
||
|
printf("[");
|
||
|
for (int i=0;i<nof_blocks;i++) {
|
||
|
printf("%4d, ", s[i]);
|
||
|
}
|
||
|
printf("]\n");
|
||
|
}*/
|
||
|
|
||
|
inline static llr_t MAKE_FUNC(sadd)(llr_t x, llr_t y) {
|
||
|
#ifndef use_saturated_add
|
||
|
return x+y;
|
||
|
#else
|
||
|
int16_t z = (int16_t) x+y;
|
||
|
return z>127?127:(int8_t) z;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
inline static void MAKE_FUNC(normalize)(uint32_t k, simd_type_t old[8]) {
|
||
|
if ((k % normalize_period) == 0 && k != 0) {
|
||
|
#ifdef normalize_max
|
||
|
simd_type_t m = simd_max(old[0],old[1]);
|
||
|
for (int i=2;i<8;i++) {
|
||
|
m = simd_max(m,old[i]);
|
||
|
}
|
||
|
for (int i=0;i<8;i++) {
|
||
|
old[i] = simd_sub(old[i], m);
|
||
|
}
|
||
|
#else
|
||
|
for (int i = 1; i < 8; i++) {
|
||
|
old[i] = simd_sub(old[i], old[0]);
|
||
|
}
|
||
|
old[0] = simd_set1(0);
|
||
|
#endif
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void MAKE_FUNC(beta_trellis)(llr_t *input, llr_t *parity, uint32_t long_cb, llr_t old[8])
|
||
|
{
|
||
|
llr_t m_b[8], new[8];
|
||
|
llr_t x, y, xy;
|
||
|
|
||
|
/* Calculate last state using Tail. No need to use SIMD here */
|
||
|
old[0] = 0;
|
||
|
for (int i = 1; i < 8; i++) {
|
||
|
old[i] = -INF;
|
||
|
}
|
||
|
for (int k=long_cb+2;k >= long_cb; k--) {
|
||
|
x = input[k];
|
||
|
y = parity[k];
|
||
|
|
||
|
xy = MAKE_FUNC(sadd)(x, y);
|
||
|
|
||
|
m_b[0] = MAKE_FUNC(sadd)(old[4],xy);
|
||
|
m_b[1] = old[4];
|
||
|
m_b[2] = MAKE_FUNC(sadd)(old[5], y);
|
||
|
m_b[3] = MAKE_FUNC(sadd)(old[5], x);
|
||
|
m_b[4] = MAKE_FUNC(sadd)(old[6], x);
|
||
|
m_b[5] = MAKE_FUNC(sadd)(old[6], y);
|
||
|
m_b[6] = old[7];
|
||
|
m_b[7] = MAKE_FUNC(sadd)(old[7], xy);
|
||
|
|
||
|
new[0] = old[0];
|
||
|
new[1] = MAKE_FUNC(sadd)(old[0], xy);
|
||
|
new[2] = MAKE_FUNC(sadd)(old[1], x);
|
||
|
new[3] = MAKE_FUNC(sadd)(old[1], y);
|
||
|
new[4] = MAKE_FUNC(sadd)(old[2], y);
|
||
|
new[5] = MAKE_FUNC(sadd)(old[2], x);
|
||
|
new[6] = MAKE_FUNC(sadd)(old[3], xy);
|
||
|
new[7] = old[3];
|
||
|
|
||
|
#if debug_enabled_win
|
||
|
printf("trellis: k=%d, in=%d, pa=%d, beta: ", k, x, y); for (int i=0;i<8;i++) {printf("%d,", old[i]);} printf("\n");
|
||
|
#endif
|
||
|
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
if (m_b[i] > new[i])
|
||
|
new[i] = m_b[i];
|
||
|
old[i] = new[i];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Computes beta values */
|
||
|
static void MAKE_FUNC(beta)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, uint32_t long_cb)
|
||
|
{
|
||
|
simd_type_t m_b[8], new[8], old[8];
|
||
|
simd_type_t x, y, xy, ap;
|
||
|
|
||
|
simd_type_t *inputPtr;
|
||
|
simd_type_t *appPtr;
|
||
|
simd_type_t *parityPtr;
|
||
|
simd_type_t *betaPtr = (simd_type_t*) s->beta;
|
||
|
|
||
|
uint32_t loop_len;
|
||
|
for (int j=0;j<2;j++) {
|
||
|
|
||
|
// First run L states to find initial state for all sub-blocks after first
|
||
|
if (j==0) {
|
||
|
loop_len = win_overlap_len;
|
||
|
} else {
|
||
|
loop_len = long_sb;
|
||
|
}
|
||
|
|
||
|
// When passing through all window pick estimated initial states (known state for sb=0)
|
||
|
if (loop_len == long_sb) {
|
||
|
|
||
|
// shuffle across 128-bit boundary manually
|
||
|
#ifdef WINIMP_IS_AVX16
|
||
|
llr_t tmp[8];
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
tmp[i] = _mm256_extract_epi16(old[i], 8);
|
||
|
}
|
||
|
#endif
|
||
|
#ifdef WINIMP_IS_AVX8
|
||
|
llr_t tmp[8];
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
tmp[i] = _mm256_extract_epi8(old[i], 16);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = simd_shuffle(old[i], move_right);
|
||
|
}
|
||
|
// last sub-block state is calculated from the trellis
|
||
|
llr_t trellis_old[8];
|
||
|
MAKE_FUNC(beta_trellis)(input, parity, long_cb, trellis_old);
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = simd_insert(old[i], trellis_old[i], nof_blocks-1);
|
||
|
}
|
||
|
|
||
|
#ifdef WINIMP_IS_AVX16
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = _mm256_insert_epi16(old[i], tmp[i], 7);
|
||
|
}
|
||
|
#endif
|
||
|
#ifdef WINIMP_IS_AVX8
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = _mm256_insert_epi8(old[i], tmp[i], 15);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
inputPtr = (simd_type_t*) &input[long_cb-nof_blocks];
|
||
|
appPtr = (simd_type_t*) &app[long_cb-nof_blocks];
|
||
|
parityPtr = (simd_type_t*) &parity[long_cb-nof_blocks];
|
||
|
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
simd_store(&betaPtr[8*long_sb + i], old[i]);
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
// when estimating states, just set all to unknown
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = simd_set1(-INF);
|
||
|
}
|
||
|
inputPtr = (simd_type_t*) &input[nof_blocks*(loop_len-1)];
|
||
|
appPtr = (simd_type_t*) &app[nof_blocks*(loop_len-1)];
|
||
|
parityPtr = (simd_type_t*) &parity[nof_blocks*(loop_len-1)];
|
||
|
}
|
||
|
|
||
|
for (int k = loop_len - 1; k >= 0; k--) {
|
||
|
x = simd_load(inputPtr--);
|
||
|
y = simd_load(parityPtr--);
|
||
|
|
||
|
if (app) {
|
||
|
ap = simd_load(appPtr--);
|
||
|
x = simd_add(ap, x);
|
||
|
}
|
||
|
|
||
|
xy = simd_add(x, y);
|
||
|
|
||
|
m_b[0] = simd_add(old[4], xy);
|
||
|
m_b[1] = old[4];
|
||
|
m_b[2] = simd_add(old[5], y);
|
||
|
m_b[3] = simd_add(old[5], x);
|
||
|
m_b[4] = simd_add(old[6], x);
|
||
|
m_b[5] = simd_add(old[6], y);
|
||
|
m_b[6] = old[7];
|
||
|
m_b[7] = simd_add(old[7], xy);
|
||
|
|
||
|
new[0] = old[0];
|
||
|
new[1] = simd_add(old[0], xy);
|
||
|
new[2] = simd_add(old[1], x);
|
||
|
new[3] = simd_add(old[1], y);
|
||
|
new[4] = simd_add(old[2], y);
|
||
|
new[5] = simd_add(old[2], x);
|
||
|
new[6] = simd_add(old[3], xy);
|
||
|
new[7] = old[3];
|
||
|
|
||
|
// Calculate maximum metric
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = simd_max(m_b[i], new[i]);
|
||
|
}
|
||
|
// Store metric only when doing the final pass
|
||
|
if (loop_len == long_sb) {
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
simd_store(&betaPtr[8*k + i], old[i]);
|
||
|
}
|
||
|
}
|
||
|
if (loop_len!=long_sb) {
|
||
|
debug_state_beta(0);
|
||
|
} else {
|
||
|
debug_state_beta(0);
|
||
|
}
|
||
|
|
||
|
// normalize
|
||
|
MAKE_FUNC(normalize)(k, old);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Computes alpha metrics */
|
||
|
static void MAKE_FUNC(alpha)(MAKE_TYPE * s, llr_t *input, llr_t *app, llr_t *parity, llr_t * output, uint32_t long_cb)
|
||
|
{
|
||
|
simd_type_t m_b[8], new[8], old[8], max1[8], max0[8];
|
||
|
simd_type_t x, y, xy, ap;
|
||
|
simd_type_t m1, m0;
|
||
|
|
||
|
simd_type_t *inputPtr;
|
||
|
simd_type_t *appPtr;
|
||
|
simd_type_t *parityPtr;
|
||
|
simd_type_t *betaPtr = (simd_type_t*) s->beta;
|
||
|
simd_type_t *outputPtr = (simd_type_t*) output;
|
||
|
|
||
|
#if debug_enabled_win
|
||
|
simd_type_t beta_save[8];
|
||
|
#endif
|
||
|
|
||
|
// Skip state 0
|
||
|
betaPtr+=8;
|
||
|
|
||
|
uint32_t loop_len;
|
||
|
|
||
|
for (int j=0;j<2;j++) {
|
||
|
|
||
|
// First run L states to find initial state for all sub-blocks after first
|
||
|
if (j==0) {
|
||
|
loop_len = win_overlap_len;
|
||
|
} else {
|
||
|
loop_len = long_sb;
|
||
|
}
|
||
|
|
||
|
// When passing through all window pick estimated initial states (known state for sb=0)
|
||
|
if (loop_len == long_sb) {
|
||
|
|
||
|
#ifdef WINIMP_IS_AVX16
|
||
|
llr_t tmp[8];
|
||
|
for (int i=0;i<8;i++) {
|
||
|
tmp[i] = _mm256_extract_epi16(old[i], 7);
|
||
|
}
|
||
|
#endif
|
||
|
#ifdef WINIMP_IS_AVX8
|
||
|
llr_t tmp[8];
|
||
|
for (int i=0;i<8;i++) {
|
||
|
tmp[i] = _mm256_extract_epi8(old[i], 15);
|
||
|
}
|
||
|
#endif
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = simd_shuffle(old[i], move_left);
|
||
|
}
|
||
|
#ifdef WINIMP_IS_AVX16
|
||
|
for (int i=0;i<8;i++) {
|
||
|
old[i] = _mm256_insert_epi16(old[i], tmp[i], 8);
|
||
|
}
|
||
|
#endif
|
||
|
#ifdef WINIMP_IS_AVX8
|
||
|
for (int i=0;i<8;i++) {
|
||
|
old[i] = _mm256_insert_epi8(old[i], tmp[i], 16);
|
||
|
}
|
||
|
#endif
|
||
|
// 1st sub-block state is known
|
||
|
old[0] = simd_insert(old[0], 0, 0);
|
||
|
for (int i = 1; i < 8; i++) {
|
||
|
old[i] = simd_insert(old[i], -INF, 0);
|
||
|
}
|
||
|
} else {
|
||
|
// when estimating states, just set all to unknown
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = simd_set1(-INF);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
inputPtr = (simd_type_t*) &input[nof_blocks*(long_sb-loop_len)];
|
||
|
appPtr = (simd_type_t*) &app[nof_blocks*(long_sb-loop_len)];
|
||
|
parityPtr = (simd_type_t*) &parity[nof_blocks*(long_sb-loop_len)];
|
||
|
|
||
|
for (int k = 0; k < loop_len; k++) {
|
||
|
x = simd_load(inputPtr++);
|
||
|
y = simd_load(parityPtr++);
|
||
|
|
||
|
if (app) {
|
||
|
ap = simd_load(appPtr++);
|
||
|
x = simd_add(ap, x);
|
||
|
}
|
||
|
|
||
|
xy = simd_add(x,y);
|
||
|
|
||
|
m_b[0] = old[0];
|
||
|
m_b[1] = simd_add(old[3], y);
|
||
|
m_b[2] = simd_add(old[4], y);
|
||
|
m_b[3] = old[7];
|
||
|
m_b[4] = old[1];
|
||
|
m_b[5] = simd_add(old[2], y);
|
||
|
m_b[6] = simd_add(old[5], y);
|
||
|
m_b[7] = old[6];
|
||
|
|
||
|
new[0] = simd_add(old[1], xy);
|
||
|
new[1] = simd_add(old[2], x);
|
||
|
new[2] = simd_add(old[5], x);
|
||
|
new[3] = simd_add(old[6], xy);
|
||
|
new[4] = simd_add(old[0], xy);
|
||
|
new[5] = simd_add(old[3], x);
|
||
|
new[6] = simd_add(old[4], x);
|
||
|
new[7] = simd_add(old[7], xy);
|
||
|
|
||
|
// Load beta and compute output only when passing through all window
|
||
|
if (loop_len == long_sb) {
|
||
|
simd_type_t beta;
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
beta = simd_load(betaPtr++);
|
||
|
max0[i] = simd_add(beta, m_b[i]);
|
||
|
max1[i] = simd_add(beta, new[i]);
|
||
|
|
||
|
#if debug_enabled_win
|
||
|
beta_save[i] = beta;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
m1 = simd_max(max1[0], max1[1]);
|
||
|
m0 = simd_max(max0[0], max0[1]);
|
||
|
|
||
|
for (int i = 2; i < 8; i++) {
|
||
|
m1 = simd_max(m1, max1[i]);
|
||
|
m0 = simd_max(m0, max0[i]);
|
||
|
}
|
||
|
|
||
|
simd_type_t out = simd_sub(m1, m0);
|
||
|
|
||
|
// Divide output when using 8-bit arithmetic
|
||
|
#ifdef divide_output
|
||
|
out = simd_rb_shift(out, divide_output);
|
||
|
#endif
|
||
|
|
||
|
simd_store(outputPtr++, out);
|
||
|
|
||
|
debug_state(0);
|
||
|
}
|
||
|
|
||
|
for (int i = 0; i < 8; i++) {
|
||
|
old[i] = simd_max(m_b[i], new[i]);
|
||
|
}
|
||
|
|
||
|
// normalize
|
||
|
MAKE_FUNC(normalize)(k, old);
|
||
|
|
||
|
if (loop_len != long_sb) {
|
||
|
debug_state_pre(0);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int MAKE_FUNC(init)(void **hh, uint32_t max_long_cb)
|
||
|
{
|
||
|
*hh = calloc(1, sizeof(MAKE_TYPE));
|
||
|
|
||
|
MAKE_TYPE *h = (MAKE_TYPE*) *hh;
|
||
|
|
||
|
h->beta = srslte_vec_malloc(sizeof(llr_t) * 8 * max_long_cb * nof_blocks);
|
||
|
if (!h->beta) {
|
||
|
perror("srslte_vec_malloc");
|
||
|
return -1;
|
||
|
}
|
||
|
h->max_long_cb = max_long_cb;
|
||
|
return nof_blocks;
|
||
|
}
|
||
|
|
||
|
void MAKE_FUNC(free)(void *hh)
|
||
|
{
|
||
|
MAKE_TYPE *h = (MAKE_TYPE*) hh;
|
||
|
if (h->beta) {
|
||
|
free(h->beta);
|
||
|
}
|
||
|
bzero(h, sizeof(MAKE_TYPE));
|
||
|
}
|
||
|
|
||
|
void MAKE_FUNC(dec)(void *hh, llr_t *input, llr_t *app, llr_t *parity, llr_t *output, uint32_t long_cb)
|
||
|
{
|
||
|
MAKE_TYPE *h = (MAKE_TYPE*) hh;
|
||
|
MAKE_FUNC(beta)(h, input, app, parity, long_cb);
|
||
|
MAKE_FUNC(alpha)(h, input, app, parity, output, long_cb);
|
||
|
#if debug_enabled_win
|
||
|
printf("running win decoder: %s\n", STRING(WINIMP));
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
#define INSERT8_INPUT(reg, st, off) reg = simd_insert(reg, input[3*(i+(st+0)*long_sb)+off], st+0);\
|
||
|
reg = simd_insert(reg, input[3*(i+(st+1)*long_sb)+off], st+1);\
|
||
|
reg = simd_insert(reg, input[3*(i+(st+2)*long_sb)+off], st+2);\
|
||
|
reg = simd_insert(reg, input[3*(i+(st+3)*long_sb)+off], st+3);\
|
||
|
reg = simd_insert(reg, input[3*(i+(st+4)*long_sb)+off], st+4);\
|
||
|
reg = simd_insert(reg, input[3*(i+(st+5)*long_sb)+off], st+5);\
|
||
|
reg = simd_insert(reg, input[3*(i+(st+6)*long_sb)+off], st+6);\
|
||
|
reg = simd_insert(reg, input[3*(i+(st+7)*long_sb)+off], st+7);
|
||
|
|
||
|
|
||
|
void MAKE_FUNC(extract_input)(llr_t *input, llr_t *systematic, llr_t *app2, llr_t *parity_0, llr_t *parity_1, uint32_t long_cb)
|
||
|
{
|
||
|
simd_type_t *systPtr = (simd_type_t*) systematic;
|
||
|
simd_type_t *parity0Ptr = (simd_type_t*) parity_0;
|
||
|
simd_type_t *parity1Ptr = (simd_type_t*) parity_1;
|
||
|
|
||
|
simd_type_t syst, parity0, parity1;
|
||
|
|
||
|
for (int i=0;i<long_sb;i++) {
|
||
|
INSERT8_INPUT(syst, 0, 0);
|
||
|
INSERT8_INPUT(parity0, 0, 1);
|
||
|
INSERT8_INPUT(parity1, 0, 2);
|
||
|
|
||
|
#if nof_blocks >= 16
|
||
|
INSERT8_INPUT(syst, 8, 0);
|
||
|
INSERT8_INPUT(parity0, 8, 1);
|
||
|
INSERT8_INPUT(parity1, 8, 2);
|
||
|
#endif
|
||
|
|
||
|
#if nof_blocks >= 32
|
||
|
INSERT8_INPUT(syst, 16, 0);
|
||
|
INSERT8_INPUT(parity0, 16, 1);
|
||
|
INSERT8_INPUT(parity1, 16, 2);
|
||
|
INSERT8_INPUT(syst, 24, 0);
|
||
|
INSERT8_INPUT(parity0, 24, 1);
|
||
|
INSERT8_INPUT(parity1, 24, 2);
|
||
|
#endif
|
||
|
|
||
|
simd_store(systPtr++, syst);
|
||
|
simd_store(parity0Ptr++, parity0);
|
||
|
simd_store(parity1Ptr++, parity1);
|
||
|
}
|
||
|
|
||
|
for (int i = long_cb; i < long_cb + 3; i++) {
|
||
|
systematic[i] = input[3*long_cb + 2*(i - long_cb)];
|
||
|
parity_0[i] = input[3*long_cb + 2*(i - long_cb) + 1];
|
||
|
|
||
|
app2[i] = input[3*long_cb + 6 + 2*(i - long_cb)];
|
||
|
parity_1[i] = input[3*long_cb + 6 + 2*(i - long_cb) + 1];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#define deinter(x,win) ((x%(long_cb/win))*(win)+x/(long_cb/win))
|
||
|
|
||
|
#define reset_cnt(a,b) if(!((a+1)%b)) { \
|
||
|
k+=b*nof_blocks; \
|
||
|
if (k >= long_cb) { \
|
||
|
k -= (long_cb-1);\
|
||
|
}\
|
||
|
}
|
||
|
#define insert_bit(a,b) ap = _mm_insert_epi16(ap, app1[k+(a%b)*nof_blocks], 7-a); \
|
||
|
reset_cnt(a,b); \
|
||
|
|
||
|
|
||
|
#define decide_for(b) for (uint32_t i = 0; i < long_cb/8; i++) { \
|
||
|
insert_bit(0,b);\
|
||
|
insert_bit(1,b);\
|
||
|
insert_bit(2,b);\
|
||
|
insert_bit(3,b);\
|
||
|
insert_bit(4,b);\
|
||
|
insert_bit(5,b);\
|
||
|
insert_bit(6,b);\
|
||
|
insert_bit(7,b);\
|
||
|
output[i] = (uint8_t) _mm_movemask_epi8(_mm_cmpgt_epi8(_mm_packs_epi16(ap,zeros),zeros));\
|
||
|
}
|
||
|
|
||
|
/* No improvement to use AVX here */
|
||
|
void MAKE_FUNC(decision_byte)(llr_t *app1, uint8_t *output, uint32_t long_cb)
|
||
|
{
|
||
|
uint32_t k=0;
|
||
|
__m128i zeros = _mm_setzero_si128();
|
||
|
__m128i ap;
|
||
|
|
||
|
if ((long_cb%(nof_blocks*8)) == 0) {
|
||
|
decide_for(8);
|
||
|
} else if ((long_cb%(nof_blocks*4)) == 0) {
|
||
|
decide_for(4);
|
||
|
} else if ((long_cb%(nof_blocks*2)) == 0) {
|
||
|
decide_for(2);
|
||
|
} else {
|
||
|
decide_for(1);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
#undef WINIMP
|
||
|
#undef nof_blocks
|
||
|
#undef llr_t
|
||
|
#undef normalize_period
|
||
|
#undef INF
|
||
|
#undef win_overlap_len
|
||
|
#undef simd_type_t
|
||
|
#undef simd_load
|
||
|
#undef simd_store
|
||
|
#undef simd_add
|
||
|
#undef simd_sub
|
||
|
#undef simd_max
|
||
|
#undef simd_set1
|
||
|
#undef simd_insert
|
||
|
#undef simd_shuffle
|
||
|
#undef move_right
|
||
|
#undef move_left
|
||
|
#undef debug_enabled_win
|
||
|
|
||
|
#ifdef normalize_max
|
||
|
#undef normalize_max
|
||
|
#endif
|
||
|
|
||
|
#ifdef use_saturated_add
|
||
|
#undef use_saturated_add
|
||
|
#endif
|
||
|
|
||
|
#ifdef simd_rb_shift
|
||
|
#undef simd_rb_shift
|
||
|
#endif
|
||
|
|
||
|
#ifdef divide_output
|
||
|
#undef divide_output
|
||
|
#endif
|