diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake new file mode 100644 index 000000000..1af23d0be --- /dev/null +++ b/cmake/modules/FindSSE.cmake @@ -0,0 +1,85 @@ +# Check if SSE instructions are available on the machine where +# the project is compiled. + +# Minimum requirement to enable SSE turbo decoder is SSE4.1 +# Since SSE 4.1 includes all previous SSE, look only for this one. + +# Check also AVX availability (for equalizer) + +IF(CMAKE_SYSTEM_NAME MATCHES "Linux") + EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO) + + STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) + + STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE) + IF (SSE42_TRUE) + set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host") + ELSE (SSE42_TRUE) + set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") + ENDIF (SSE42_TRUE) + + STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "avx" "${SSE_THERE}" AVX_TRUE) + IF (AVX_TRUE) + set(AVX_FOUND true CACHE BOOL "AVX available on host") + ELSE (AVX_TRUE) + set(AVX_FOUND false CACHE BOOL "AVX available on host") + ENDIF (AVX_TRUE) +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin") + EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE + CPUINFO) + + STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE) + IF (SSE41_TRUE) + set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host") + ELSE (SSE41_TRUE) + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + ENDIF (SSE41_TRUE) + + STRING(REGEX REPLACE "^.*(SSE4.2).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "SSE4.2" "${SSE_THERE}" SSE42_TRUE) + IF (SSE42_TRUE) + set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host") + ELSE (SSE42_TRUE) + set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") + ENDIF (SSE42_TRUE) + + STRING(REGEX REPLACE "^.*(AVX).*$" "\\1" SSE_THERE ${CPUINFO}) + STRING(COMPARE EQUAL "AVX" "${SSE_THERE}" AVX_TRUE) + IF (AVX_TRUE) + set(AVX_FOUND true CACHE BOOL "AVX available on host") + ELSE (AVX_TRUE) + set(AVX_FOUND false CACHE BOOL "AVX available on host") + ENDIF (AVX_TRUE) + +ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows") + # TODO + set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + set(AVX_FOUND false CACHE BOOL "AVX available on host") +ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux") + set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host") + set(AVX_FOUND false CACHE BOOL "AVX available on host") +ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux") + +if(NOT SSE4_1_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.") +endif(NOT SSE4_1_FOUND) + +if(NOT SSE4_2_FOUND) + MESSAGE(STATUS "Could not find hardware support for SSE4.2 on this machine.") +endif(NOT SSE4_2_FOUND) + +if(NOT AVX_FOUND) + MESSAGE(STATUS "Could not find hardware support for AVX on this machine.") +endif(NOT AVX_FOUND) + +mark_as_advanced(SSE4_1_FOUND AVX_FOUND) \ No newline at end of file diff --git a/srslte/include/srslte/fec/turbodecoder.h b/srslte/include/srslte/fec/turbodecoder.h new file mode 100644 index 000000000..401db1042 --- /dev/null +++ b/srslte/include/srslte/fec/turbodecoder.h @@ -0,0 +1,93 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 The srsLTE Developers. See the + * COPYRIGHT file at the top-level directory of this distribution. + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +/********************************************************************************************** + * File: turbodecoder.h + * + * Description: Turbo Decoder. + * Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent + * encoders and one turbo code internal interleaver. The coding rate of turbo + * encoder is 1/3. + * MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder. + * + * Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2 + *********************************************************************************************/ + +#ifndef TURBODECODER_ +#define TURBODECODER_ + +#include "srslte/config.h" +#include "srslte/fec/tc_interl.h" +#include "srslte/fec/cbsegm.h" + +#define SRSLTE_TCOD_RATE 3 +#define SRSLTE_TCOD_TOTALTAIL 12 + +#define SRSLTE_TCOD_MAX_LEN_CB 6144 +#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL) + +#include "srslte/fec/turbodecoder_gen.h" + +#ifdef LV_HAVE_SSE +#include "srslte/fec/turbodecoder_sse.h" +#endif + +typedef struct SRSLTE_API { +#ifdef LV_HAVE_SSE + srslte_tdec_sse_t tdec_sse; +#else + float *input_conv; + srslte_tdec_gen_t tdec_gen; +#endif +} srslte_tdec_t; + +SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h, + uint32_t max_long_cb); + +SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h); + +SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb); + +SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h, + int16_t* input, + uint32_t long_cb); + +SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h, + uint8_t *output, + uint32_t long_cb); + +SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h, + uint8_t *output, + uint32_t long_cb); + +SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h, + int16_t * input, + uint8_t *output, + uint32_t nof_iterations, + uint32_t long_cb); + +#endif diff --git a/srslte/lib/fec/src/turbodecoder.c b/srslte/lib/fec/src/turbodecoder.c new file mode 100644 index 000000000..e6158c9e6 --- /dev/null +++ b/srslte/lib/fec/src/turbodecoder.c @@ -0,0 +1,113 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 The srsLTE Developers. See the + * COPYRIGHT file at the top-level directory of this distribution. + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + + +#include +#include +#include +#include + +#include "srslte/fec/turbodecoder.h" +#include "srslte/fec/turbodecoder_gen.h" + + +#ifdef LV_HAVE_SSE +#include "srslte/fec/turbodecoder_sse.h" +#endif + +#include "srslte/utils/vector.h" + + +int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) { +#ifdef LV_HAVE_SSE + return srslte_tdec_sse_init(&h->tdec_sse, max_long_cb); +#else + h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12)); + if (!h->input_conv) { + perror("malloc"); + return -1; + } + return srslte_tdec_gen_init(&h->tdec_gen, max_long_cb); +#endif +} + +void srslte_tdec_free(srslte_tdec_t * h) { +#ifdef LV_HAVE_SSE + srslte_tdec_sse_free(&h->tdec_sse); +#else + if (h->input_conv) { + free(h->input_conv); + } + srslte_tdec_gen_free(&h->tdec_gen); +#endif + +} + +int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb) { +#ifdef LV_HAVE_SSE + return srslte_tdec_sse_reset(&h->tdec_sse, long_cb); +#else + return srslte_tdec_gen_reset(&h->tdec_gen, long_cb); +#endif +} + +void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) { +#ifdef LV_HAVE_SSE + srslte_tdec_sse_iteration(&h->tdec_sse, input, long_cb); +#else + srslte_vec_convert_if(input, h->input_conv, 0.01, 3*long_cb+12); + srslte_tdec_gen_iteration(&h->tdec_gen, h->input_conv, long_cb); +#endif +} + +void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { +#ifdef LV_HAVE_SSE + return srslte_tdec_sse_decision(&h->tdec_sse, output, long_cb); +#else + return srslte_tdec_gen_decision(&h->tdec_gen, output, long_cb); +#endif + +} + +void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { +#ifdef LV_HAVE_SSE + return srslte_tdec_sse_decision_byte(&h->tdec_sse, output, long_cb); +#else + return srslte_tdec_gen_decision_byte(&h->tdec_gen, output, long_cb); +#endif + +} + +int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb) +{ +#ifdef LV_HAVE_SSE + return srslte_tdec_sse_run_all(&h->tdec_sse, input, output, nof_iterations, long_cb); +#else + srslte_vec_convert_if(input, h->input_conv, 0.01, 3*long_cb+12); + return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output, nof_iterations, long_cb); +#endif +}