Merge branch 'next_novolk' into next

master
Ismael Gomez 7 years ago
commit 142ee30029

@ -58,6 +58,8 @@ SRSLTE_API int srslte_rm_turbo_tx(uint8_t *w_buff,
SRSLTE_API void srslte_rm_turbo_gentables(); SRSLTE_API void srslte_rm_turbo_gentables();
SRSLTE_API void srslte_rm_turbo_free_tables();
SRSLTE_API int srslte_rm_turbo_tx_lut(uint8_t *w_buff, SRSLTE_API int srslte_rm_turbo_tx_lut(uint8_t *w_buff,
uint8_t *systematic, uint8_t *systematic,
uint8_t *parity, uint8_t *parity,

@ -40,6 +40,25 @@
#include "srslte/config.h" #include "srslte/config.h"
typedef struct {
uint32_t nof_bits;
uint16_t *interleaver;
uint16_t *byte_idx;
uint8_t *bit_mask;
uint8_t n_128;
} srslte_bit_interleaver_t;
SRSLTE_API void srslte_bit_interleaver_init(srslte_bit_interleaver_t *q,
uint16_t *interleaver,
uint32_t nof_bits);
SRSLTE_API void srslte_bit_interleaver_free(srslte_bit_interleaver_t *q);
SRSLTE_API void srslte_bit_interleaver_run(srslte_bit_interleaver_t *q,
uint8_t *input,
uint8_t *output,
uint16_t w_offset);
SRSLTE_API void srslte_bit_interleave(uint8_t *input, SRSLTE_API void srslte_bit_interleave(uint8_t *input,
uint8_t *output, uint8_t *output,
uint16_t *interleaver, uint16_t *interleaver,

@ -61,7 +61,9 @@ static uint8_t RM_PERM_TC[NCOLS] = { 0, 16, 8, 24, 4, 20, 12, 28, 2, 18, 10, 26,
/* Align tables to 16-byte boundary */ /* Align tables to 16-byte boundary */
static uint16_t interleaver_systematic_bits[192][6160]; // 4 tail bits static uint16_t interleaver_systematic_bits[192][6160]; // 4 tail bits
static srslte_bit_interleaver_t bit_interleavers_systematic_bits[192];
static uint16_t interleaver_parity_bits[192][2*6160]; static uint16_t interleaver_parity_bits[192][2*6160];
static srslte_bit_interleaver_t bit_interleavers_parity_bits[192];
static uint16_t deinterleaver[192][4][18448]; static uint16_t deinterleaver[192][4][18448];
static int k0_vec[SRSLTE_NOF_TC_CB_SIZES][4][2]; static int k0_vec[SRSLTE_NOF_TC_CB_SIZES][4][2];
static bool rm_turbo_tables_generated = false; static bool rm_turbo_tables_generated = false;
@ -235,7 +237,12 @@ void srslte_rm_turbo_gentables() {
k0_vec[cb_idx][i][1] = -1; k0_vec[cb_idx][i][1] = -1;
} }
srslte_rm_turbo_gentable_systematic(interleaver_systematic_bits[cb_idx], k0_vec[cb_idx], nrows, ndummy); srslte_rm_turbo_gentable_systematic(interleaver_systematic_bits[cb_idx], k0_vec[cb_idx], nrows, ndummy);
srslte_bit_interleaver_init(&bit_interleavers_systematic_bits[cb_idx], interleaver_systematic_bits[cb_idx],
(uint32_t) srslte_cbsegm_cbsize(cb_idx)+4);
srslte_rm_turbo_gentable_parity(interleaver_parity_bits[cb_idx], k0_vec[cb_idx], in_len/3, nrows, ndummy); srslte_rm_turbo_gentable_parity(interleaver_parity_bits[cb_idx], k0_vec[cb_idx], in_len/3, nrows, ndummy);
srslte_bit_interleaver_init(&bit_interleavers_parity_bits[cb_idx], interleaver_parity_bits[cb_idx],
(uint32_t) (srslte_cbsegm_cbsize(cb_idx)+4)*2);
for (int i=0;i<4;i++) { for (int i=0;i<4;i++) {
srslte_rm_turbo_gentable_receive(deinterleaver[cb_idx][i], in_len, i); srslte_rm_turbo_gentable_receive(deinterleaver[cb_idx][i], in_len, i);
@ -244,6 +251,12 @@ void srslte_rm_turbo_gentables() {
} }
} }
void srslte_rm_turbo_free_tables () {
for (int i = 0; i < SRSLTE_NOF_TC_CB_SIZES; i++) {
srslte_bit_interleaver_free(&bit_interleavers_systematic_bits[i]);
srslte_bit_interleaver_free(&bit_interleavers_parity_bits[i]);
}
}
/** /**
* Rate matching for LTE Turbo Coder * Rate matching for LTE Turbo Coder
@ -274,11 +287,13 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
if (rv_idx == 0) { if (rv_idx == 0) {
// Systematic bits // Systematic bits
srslte_bit_interleave(systematic, w_buff, interleaver_systematic_bits[cb_idx], in_len/3); //srslte_bit_interleave(systematic, w_buff, interleaver_systematic_bits[cb_idx], in_len/3);
srslte_bit_interleaver_run(&bit_interleavers_systematic_bits[cb_idx], systematic, w_buff, 0);
// Parity bits // Parity bits
srslte_bit_interleave_w_offset(parity, &w_buff[in_len/24], interleaver_parity_bits[cb_idx], 2*in_len/3, 4); //srslte_bit_interleave_w_offset(parity, &w_buff[in_len/24], interleaver_parity_bits[cb_idx], 2*in_len/3, 4);
srslte_bit_interleaver_run(&bit_interleavers_parity_bits[cb_idx], parity, &w_buff[in_len/24], 4);
} }
/* Bit selection and transmission 5.1.4.1.2 */ /* Bit selection and transmission 5.1.4.1.2 */

@ -197,6 +197,7 @@ int main(int argc, char **argv) {
} }
} }
srslte_rm_turbo_free_tables();
free(rm_bits); free(rm_bits);
free(rm_bits2); free(rm_bits2);
free(rm_bits2_bytes); free(rm_bits2_bytes);

@ -43,6 +43,7 @@
uint8_t tcod_lut_next_state[188][8][256]; uint8_t tcod_lut_next_state[188][8][256];
uint8_t tcod_lut_output[188][8][256]; uint8_t tcod_lut_output[188][8][256];
uint16_t tcod_per_fw[188][6144]; uint16_t tcod_per_fw[188][6144];
static srslte_bit_interleaver_t tcod_interleavers[188];
static bool table_initiated = false; static bool table_initiated = false;
@ -63,6 +64,9 @@ void srslte_tcod_free(srslte_tcod_t *h) {
if (h->temp) { if (h->temp) {
free(h->temp); free(h->temp);
} }
for (int i = 0; i < 188; i++) {
srslte_bit_interleaver_free(&tcod_interleavers[i]);
}
} }
/* Expects bits (1 byte = 1 bit) and produces bits. The systematic and parity bits are interlaced in the output */ /* Expects bits (1 byte = 1 bit) and produces bits. The systematic and parity bits are interlaced in the output */
@ -198,8 +202,9 @@ int srslte_tcod_encode_lut(srslte_tcod_t *h, uint8_t *input, uint8_t *parity, ui
} }
parity[long_cb/8] = 0; // will put tail here later parity[long_cb/8] = 0; // will put tail here later
/* Interleave input */ /* Interleave input */
srslte_bit_interleave(input, h->temp, tcod_per_fw[cblen_idx], long_cb); srslte_bit_interleaver_run(&tcod_interleavers[cblen_idx], input, h->temp, 0);
//srslte_bit_interleave(input, h->temp, tcod_per_fw[cblen_idx], long_cb);
/* Parity bits for the 2nd constituent encoders */ /* Parity bits for the 2nd constituent encoders */
uint8_t state1 = 0; uint8_t state1 = 0;
@ -297,6 +302,7 @@ void srslte_tcod_gentable() {
for (uint32_t i=0;i<long_cb;i++) { for (uint32_t i=0;i<long_cb;i++) {
tcod_per_fw[len][i] = interl.forward[i]; tcod_per_fw[len][i] = interl.forward[i];
} }
srslte_bit_interleaver_init(&tcod_interleavers[len], tcod_per_fw[len], long_cb);
for (uint32_t i=long_cb;i<6144;i++) { for (uint32_t i=long_cb;i<6144;i++) {
tcod_per_fw[len][i] = 0; tcod_per_fw[len][i] = 0;
} }

@ -147,6 +147,8 @@ clean:
} }
void srslte_sch_free(srslte_sch_t *q) { void srslte_sch_free(srslte_sch_t *q) {
srslte_rm_turbo_free_tables();
if (q->cb_in) { if (q->cb_in) {
free(q->cb_in); free(q->cb_in);
} }

@ -30,6 +30,7 @@
#include <limits.h> #include <limits.h>
#include <string.h> #include <string.h>
#include <stddef.h> #include <stddef.h>
#include <stdlib.h>
#ifdef LV_HAVE_SSE #ifdef LV_HAVE_SSE
@ -38,6 +39,172 @@
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#include "srslte/phy/utils/bit.h" #include "srslte/phy/utils/bit.h"
#include "srslte/phy/utils/vector.h"
void srslte_bit_interleaver_init(srslte_bit_interleaver_t *q,
uint16_t *interleaver,
uint32_t nof_bits) {
static const uint8_t mask[] = { 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1 };
bzero(q, sizeof(srslte_bit_interleaver_t));
q->interleaver = srslte_vec_malloc(sizeof(uint16_t)*nof_bits);
q->byte_idx = srslte_vec_malloc(sizeof(uint16_t)*nof_bits);
q->bit_mask = srslte_vec_malloc(sizeof(uint8_t)*nof_bits);
q->nof_bits = nof_bits;
for (int i = 0; i < nof_bits; i++) {
uint16_t i_px = interleaver[i];
q->interleaver[i] = i_px;
q->byte_idx[i] = (uint16_t) (interleaver[i] / 8);
q->bit_mask[i] = (uint8_t) (mask[i_px%8]);
}
}
void srslte_bit_interleaver_free(srslte_bit_interleaver_t *q) {
if (q->interleaver) {
free(q->interleaver);
}
if (q->byte_idx) {
free(q->byte_idx);
}
if (q->bit_mask) {
free(q->bit_mask);
}
bzero(q, sizeof(srslte_bit_interleaver_t));
}
void srslte_bit_interleaver_run(srslte_bit_interleaver_t *q, uint8_t *input, uint8_t *output, uint16_t w_offset) {
static const uint8_t mask[] = { 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1 };
uint16_t *byte_idx = q->byte_idx;
uint8_t *bit_mask = q->bit_mask;
uint8_t *output_ptr = output;
uint32_t st=0, w_offset_p=0;
if (w_offset < 8 && w_offset > 0) {
st=1;
for (uint32_t j=0;j<8-w_offset;j++) {
uint16_t i_p = q->interleaver[j];
if (input[i_p/8] & mask[i_p%8]) {
output[0] |= mask[j+w_offset];
} else {
output[0] &= ~(mask[j+w_offset]);
}
}
w_offset_p=8-w_offset;
}
uint32_t i = st * 8;
byte_idx += i - w_offset_p;
bit_mask += i - w_offset_p;
output_ptr += st;
#ifdef LV_HAVE_SSE
for(; i < q->nof_bits - 15; i += 16) {
__m128i in128;
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x7);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x6);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x5);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x4);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x3);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x2);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x1);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x0);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0xF);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0xE);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0xD);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0xC);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0xB);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0xA);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x9);
in128 = _mm_insert_epi8(in128, input[*(byte_idx++)], 0x8);
__m128i mask128 = _mm_loadu_si128((__m128i *) bit_mask);
mask128 = _mm_shuffle_epi8(mask128, _mm_set_epi8(0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF,
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7));
__m128i cmp128 = _mm_cmpeq_epi8(_mm_and_si128(in128, mask128), mask128);
*((uint16_t *) (output_ptr)) = (uint16_t) _mm_movemask_epi8(cmp128);
bit_mask += 16;
output_ptr += 2;
}
#endif /* LV_HAVE_SSE */
for(; i < q->nof_bits; i += 8) {
uint8_t out0 = (input[*(byte_idx++)] & *(bit_mask++))?mask[0]:(uint8_t)0;
uint8_t out1 = (input[*(byte_idx++)] & *(bit_mask++))?mask[1]:(uint8_t)0;
uint8_t out2 = (input[*(byte_idx++)] & *(bit_mask++))?mask[2]:(uint8_t)0;
uint8_t out3 = (input[*(byte_idx++)] & *(bit_mask++))?mask[3]:(uint8_t)0;
uint8_t out4 = (input[*(byte_idx++)] & *(bit_mask++))?mask[4]:(uint8_t)0;
uint8_t out5 = (input[*(byte_idx++)] & *(bit_mask++))?mask[5]:(uint8_t)0;
uint8_t out6 = (input[*(byte_idx++)] & *(bit_mask++))?mask[6]:(uint8_t)0;
uint8_t out7 = (input[*(byte_idx++)] & *(bit_mask++))?mask[7]:(uint8_t)0;
*output_ptr = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
output_ptr++;
}
for (uint32_t j=0;j<q->nof_bits%8;j++) {
uint16_t i_p = q->interleaver[(q->nof_bits/8)*8+j-w_offset];
if (input[i_p/8] & mask[i_p%8]) {
output[q->nof_bits/8] |= mask[j];
} else {
output[q->nof_bits/8] &= ~(mask[j]);
}
}
for (uint32_t j=0;j<w_offset;j++) {
uint16_t i_p = q->interleaver[(q->nof_bits/8)*8+j-w_offset];
if (input[i_p/8] & (1<<(7-i_p%8))) {
output[q->nof_bits/8] |= mask[j];
} else {
output[q->nof_bits/8] &= ~(mask[j]);
}
}
#if 0
/* THIS PIECE OF CODE IS FOR CHECKING SIMD BEHAVIOUR. DO NOT ENABLE. */
uint8_t *output2 = malloc(q->nof_bits/8);
for (i=st;i<q->nof_bits/8;i++) {
uint16_t i_p0 = q->interleaver[i*8+0-w_offset_p];
uint16_t i_p1 = q->interleaver[i*8+1-w_offset_p];
uint16_t i_p2 = q->interleaver[i*8+2-w_offset_p];
uint16_t i_p3 = q->interleaver[i*8+3-w_offset_p];
uint16_t i_p4 = q->interleaver[i*8+4-w_offset_p];
uint16_t i_p5 = q->interleaver[i*8+5-w_offset_p];
uint16_t i_p6 = q->interleaver[i*8+6-w_offset_p];
uint16_t i_p7 = q->interleaver[i*8+7-w_offset_p];
uint8_t out0 = (input[i_p0/8] & mask[i_p0%8])?mask[0]:(uint8_t)0;
uint8_t out1 = (input[i_p1/8] & mask[i_p1%8])?mask[1]:(uint8_t)0;
uint8_t out2 = (input[i_p2/8] & mask[i_p2%8])?mask[2]:(uint8_t)0;
uint8_t out3 = (input[i_p3/8] & mask[i_p3%8])?mask[3]:(uint8_t)0;
uint8_t out4 = (input[i_p4/8] & mask[i_p4%8])?mask[4]:(uint8_t)0;
uint8_t out5 = (input[i_p5/8] & mask[i_p5%8])?mask[5]:(uint8_t)0;
uint8_t out6 = (input[i_p6/8] & mask[i_p6%8])?mask[6]:(uint8_t)0;
uint8_t out7 = (input[i_p7/8] & mask[i_p7%8])?mask[7]:(uint8_t)0;
output2[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
}
for(i = st; i < q->nof_bits/8; i++) {
if (true || output[i] != output2[i]) {
printf("%05d/%05d %02X %02X\n", i, q->nof_bits/8, output[i], output2[i]);
}
//output[i] = output2[i];
}
free(output2);
#endif
}
void srslte_bit_interleave(uint8_t *input, uint8_t *output, uint16_t *interleaver, uint32_t nof_bits) { void srslte_bit_interleave(uint8_t *input, uint8_t *output, uint16_t *interleaver, uint32_t nof_bits) {
srslte_bit_interleave_w_offset(input, output, interleaver, nof_bits, 0); srslte_bit_interleave_w_offset(input, output, interleaver, nof_bits, 0);
@ -90,7 +257,11 @@ void srslte_bit_interleave_w_offset(uint8_t *input, uint8_t *output, uint16_t *i
epx2.m128 = _mm_shuffle_epi8(ipx2.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, epx2.m128 = _mm_shuffle_epi8(ipx2.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E,
0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E));
epx.m64.reg_b = epx2.m64.reg_a; epx.m128 = _mm_blendv_epi8(epx.m128, epx2.m128, _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0,
(uint8_t) 0xFF, (uint8_t) 0xFF,
(uint8_t) 0xFF, (uint8_t) 0xFF,
(uint8_t) 0xFF, (uint8_t) 0xFF,
(uint8_t) 0xFF, (uint8_t) 0xFF));
b128.m128 = _mm_and_si128(epx.m128, _mm_set1_epi8(0x7)); b128.m128 = _mm_and_si128(epx.m128, _mm_set1_epi8(0x7));
b128.m128 = _mm_shuffle_epi8(m128mask, b128.m128); b128.m128 = _mm_shuffle_epi8(m128mask, b128.m128);

Loading…
Cancel
Save