Conditional SSE compilation

master
ismagom 9 years ago
parent 6c194dc078
commit cd971bc15e

@ -0,0 +1,85 @@
# Check if SSE instructions are available on the machine where
# the project is compiled.
# Minimum requirement to enable SSE turbo decoder is SSE4.1
# Since SSE 4.1 includes all previous SSE, look only for this one.
# Check also AVX availability (for equalizer)
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
EXEC_PROGRAM(cat ARGS "/proc/cpuinfo" OUTPUT_VARIABLE CPUINFO)
STRING(REGEX REPLACE "^.*(sse4_1).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse4_1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
STRING(REGEX REPLACE "^.*(sse4_2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "sse4_2" "${SSE_THERE}" SSE42_TRUE)
IF (SSE42_TRUE)
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host")
ELSE (SSE42_TRUE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF (SSE42_TRUE)
STRING(REGEX REPLACE "^.*(avx).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "avx" "${SSE_THERE}" AVX_TRUE)
IF (AVX_TRUE)
set(AVX_FOUND true CACHE BOOL "AVX available on host")
ELSE (AVX_TRUE)
set(AVX_FOUND false CACHE BOOL "AVX available on host")
ENDIF (AVX_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
CPUINFO)
STRING(REGEX REPLACE "^.*(SSE4.1).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE4.1" "${SSE_THERE}" SSE41_TRUE)
IF (SSE41_TRUE)
set(SSE4_1_FOUND true CACHE BOOL "SSE4.1 available on host")
ELSE (SSE41_TRUE)
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
ENDIF (SSE41_TRUE)
STRING(REGEX REPLACE "^.*(SSE4.2).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "SSE4.2" "${SSE_THERE}" SSE42_TRUE)
IF (SSE42_TRUE)
set(SSE4_2_FOUND true CACHE BOOL "SSE4.2 available on host")
ELSE (SSE42_TRUE)
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
ENDIF (SSE42_TRUE)
STRING(REGEX REPLACE "^.*(AVX).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "AVX" "${SSE_THERE}" AVX_TRUE)
IF (AVX_TRUE)
set(AVX_FOUND true CACHE BOOL "AVX available on host")
ELSE (AVX_TRUE)
set(AVX_FOUND false CACHE BOOL "AVX available on host")
ENDIF (AVX_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO
set(SSE4_2_FOUND false CACHE BOOL "SSE4.2 available on host")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(AVX_FOUND false CACHE BOOL "AVX available on host")
ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(AVX_FOUND false CACHE BOOL "AVX available on host")
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
if(NOT SSE4_1_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE4.1 on this machine.")
endif(NOT SSE4_1_FOUND)
if(NOT SSE4_2_FOUND)
MESSAGE(STATUS "Could not find hardware support for SSE4.2 on this machine.")
endif(NOT SSE4_2_FOUND)
if(NOT AVX_FOUND)
MESSAGE(STATUS "Could not find hardware support for AVX on this machine.")
endif(NOT AVX_FOUND)
mark_as_advanced(SSE4_1_FOUND AVX_FOUND)

@ -0,0 +1,93 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/**********************************************************************************************
* File: turbodecoder.h
*
* Description: Turbo Decoder.
* Parallel Concatenated Convolutional Code (PCCC) with two 8-state constituent
* encoders and one turbo code internal interleaver. The coding rate of turbo
* encoder is 1/3.
* MAP_GEN is the MAX-LOG-MAP generic implementation of the decoder.
*
* Reference: 3GPP TS 36.212 version 10.0.0 Release 10 Sec. 5.1.3.2
*********************************************************************************************/
#ifndef TURBODECODER_
#define TURBODECODER_
#include "srslte/config.h"
#include "srslte/fec/tc_interl.h"
#include "srslte/fec/cbsegm.h"
#define SRSLTE_TCOD_RATE 3
#define SRSLTE_TCOD_TOTALTAIL 12
#define SRSLTE_TCOD_MAX_LEN_CB 6144
#define SRSLTE_TCOD_MAX_LEN_CODED (SRSLTE_TCOD_RATE*SRSLTE_TCOD_MAX_LEN_CB+SRSLTE_TCOD_TOTALTAIL)
#include "srslte/fec/turbodecoder_gen.h"
#ifdef LV_HAVE_SSE
#include "srslte/fec/turbodecoder_sse.h"
#endif
typedef struct SRSLTE_API {
#ifdef LV_HAVE_SSE
srslte_tdec_sse_t tdec_sse;
#else
float *input_conv;
srslte_tdec_gen_t tdec_gen;
#endif
} srslte_tdec_t;
SRSLTE_API int srslte_tdec_init(srslte_tdec_t * h,
uint32_t max_long_cb);
SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb);
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
int16_t* input,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h,
uint8_t *output,
uint32_t long_cb);
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
int16_t * input,
uint8_t *output,
uint32_t nof_iterations,
uint32_t long_cb);
#endif

@ -0,0 +1,113 @@
/**
*
* \section COPYRIGHT
*
* Copyright 2013-2015 The srsLTE Developers. See the
* COPYRIGHT file at the top-level directory of this distribution.
*
* \section LICENSE
*
* This file is part of the srsLTE library.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include "srslte/fec/turbodecoder.h"
#include "srslte/fec/turbodecoder_gen.h"
#ifdef LV_HAVE_SSE
#include "srslte/fec/turbodecoder_sse.h"
#endif
#include "srslte/utils/vector.h"
int srslte_tdec_init(srslte_tdec_t * h, uint32_t max_long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_init(&h->tdec_sse, max_long_cb);
#else
h->input_conv = srslte_vec_malloc(sizeof(float) * (3*max_long_cb+12));
if (!h->input_conv) {
perror("malloc");
return -1;
}
return srslte_tdec_gen_init(&h->tdec_gen, max_long_cb);
#endif
}
void srslte_tdec_free(srslte_tdec_t * h) {
#ifdef LV_HAVE_SSE
srslte_tdec_sse_free(&h->tdec_sse);
#else
if (h->input_conv) {
free(h->input_conv);
}
srslte_tdec_gen_free(&h->tdec_gen);
#endif
}
int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_reset(&h->tdec_sse, long_cb);
#else
return srslte_tdec_gen_reset(&h->tdec_gen, long_cb);
#endif
}
void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
srslte_tdec_sse_iteration(&h->tdec_sse, input, long_cb);
#else
srslte_vec_convert_if(input, h->input_conv, 0.01, 3*long_cb+12);
srslte_tdec_gen_iteration(&h->tdec_gen, h->input_conv, long_cb);
#endif
}
void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_decision(&h->tdec_sse, output, long_cb);
#else
return srslte_tdec_gen_decision(&h->tdec_gen, output, long_cb);
#endif
}
void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) {
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_decision_byte(&h->tdec_sse, output, long_cb);
#else
return srslte_tdec_gen_decision_byte(&h->tdec_gen, output, long_cb);
#endif
}
int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb)
{
#ifdef LV_HAVE_SSE
return srslte_tdec_sse_run_all(&h->tdec_sse, input, output, nof_iterations, long_cb);
#else
srslte_vec_convert_if(input, h->input_conv, 0.01, 3*long_cb+12);
return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output, nof_iterations, long_cb);
#endif
}
Loading…
Cancel
Save