Added Polar and LDPC forward error correction

master
Xavier Arteaga 4 years ago committed by Xavier Arteaga
parent a351b2534e
commit 2c4aa1e379

@ -0,0 +1,107 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file base_graph.h
* \brief Declaration of the two LDPC base graphs employed in the 5G NR
* standard.
* \author David Gregoratti (CTTC)
* \date 2020
*
* This file declares the dimensions of the base graphs and provides an interface
* for obtaining the set index and the permutation matrix corresponding to a
* given *lifting size*.
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef SRSLTE_BASEGRAPH_H
#define SRSLTE_BASEGRAPH_H
#include "srslte/config.h"
#include <stdint.h>
#define BG1Nfull 68 /*!< \brief Number of variable nodes in BG1. */
#define BG1N 66 /*!< \brief Number of variable nodes in BG1 after puncturing. */
#define BG1M 46 /*!< \brief Number of check nodes in BG1. */
#define BG1K 22 /*!< \brief Number of "uncoded bits" in BG1. */
#define BG2Nfull 52 /*!< \brief Number of variable nodes in BG2. */
#define BG2N 50 /*!< \brief Number of variable nodes in BG2 after puncturing. */
#define BG2M 42 /*!< \brief Number of check nodes in BG2. */
#define BG2K 10 /*!< \brief Number of "uncoded bits" in BG2. */
#define MAX_CNCT 20 /*!< \brief Maximum number (+1) of connected variables per check node. */
#define NOF_LIFTSIZE 8 /*!< \brief Number of possible lifting size indices. */
#define MAX_LIFTSIZE 384 /*!< \brief Maximum lifting size. */
#define VOID_LIFTSIZE 255 /*!< \brief Identifies an invalid lifting size in the lookup table. */
/*!
* \brief Identifies a missing connection between a check node and a variable node
* in the protograph. */
#define NO_CNCT 0xFFFF
/*! \brief Possible base graphs, BG1 or BG2. */
typedef enum SRSLTE_API {
BG1 = 0, /*!< \brief Base Graph 1 */
BG2, /*!< \brief Base Graph 2 */
} srslte_basegraph_t;
/*!
* Creates the parity-check matrix for the given base graph and lifting size
* in the compact form (a normalized permutation matrix). Also returns the
* indices of the variable nodes associated to each check node.
* \param[out] pcm The compact parity-check matrix: entry \f$(m,n)\f$ is an
* integer between 0 and LS-1 if check-node \f$m\f$ is
* connected to variable node \f$n\f$ in the protograph.
* This number specifies the order of the circular
* rotation applied to the identity matrix in the full
* graph (see also Section 3.4.1 of Deliverable 1). This
* pointer can be safely cast to 'uint16_t(*)[BGbgNfull]'
* (see also ::BG1Nfull and ::BG2Nfull) to get an BGbgM x
* BGbgNfull matrix.
* \param[out] positions For each check node, the corresponding row of this
* matrix contains the indices of the connected variable
* nodes (see also ::BG1_positions and ::BG2_positions).
* \param[in] bg The desired base graph (BG1 or BG2).
* \param[in] ls The desired lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int create_compact_pcm(uint16_t* pcm, int8_t (*positions)[MAX_CNCT], srslte_basegraph_t bg, uint16_t ls);
/*!
* Reads the lookup table and returns the set index corresponding to the given
* lifting size.
* \param[in] ls A lifting size.
* \return An integer between 0 and 7 (included), ::VOID_LIFTSIZE if ls is an
* invalid lifting size
*/
static inline uint8_t get_ls_index(uint16_t ls)
{
extern const uint8_t LSindex[];
return (ls <= MAX_LIFTSIZE ? LSindex[ls] : VOID_LIFTSIZE);
}
#endif // SRSLTE_BASEGRAPH_H

@ -0,0 +1,37 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_common.h
* \brief Declaration of elements common to both the LDPC encoder and the LDPC decoder.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef SRSLTE_LDPCCOMMON_H
#define SRSLTE_LDPCCOMMON_H
#define FILLER_BIT 254 /*!< \brief Identifies a filler bit. */
#endif // SRSLTE_LDPCCOMMON_H

@ -0,0 +1,150 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_decoder.h
* \brief Declaration of the LDPC decoder.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef SRSLTE_LDPCDECODER_H
#define SRSLTE_LDPCDECODER_H
#include "srslte/phy/fec/ldpc/base_graph.h"
/*!
* \brief Types of LDPC decoder.
*/
typedef enum {
SRSLTE_LDPC_DECODER_F, /*!< \brief %Decoder working with real-valued LLRs. */
SRSLTE_LDPC_DECODER_S, /*!< \brief %Decoder working with 16-bit integer-valued LLRs. */
SRSLTE_LDPC_DECODER_C, /*!< \brief %Decoder working with 8-bit integer-valued LLRs. */
SRSLTE_LDPC_DECODER_C_FLOOD, /*!< \brief %Decoder working with 8-bit integer-valued LLRs, flooded scheduling. */
SRSLTE_LDPC_DECODER_C_AVX2, /*!< \brief %Decoder working with 8-bit integer-valued LLRs (AVX2 version). */
SRSLTE_LDPC_DECODER_C_AVX2_FLOOD, /*!< \brief %Decoder working with 8-bit integer-valued LLRs, flooded scheduling
(AVX2 version). */
} srslte_ldpc_decoder_type_t;
/*!
* \brief Describes an LDPC decoder.
*/
typedef struct SRSLTE_API {
void* ptr; /*!< \brief Registers used by the decoder. */
srslte_basegraph_t bg; /*!< \brief Current base graph. */
uint16_t ls; /*!< \brief Current lifting size. */
uint8_t bgN; /*!< \brief Number of variable nodes in the BG. */
uint16_t liftN; /*!< \brief Number of variable nodes in the lifted graph. */
uint8_t bgM; /*!< \brief Number of check nodes in the BG. */
uint16_t liftM; /*!< \brief Number of check nodes in the lifted graph. */
uint8_t bgK; /*!< \brief Number of "uncoded bits" in the BG. */
uint16_t liftK; /*!< \brief Number of uncoded bits in the lifted graph. */
uint16_t* pcm; /*!< \brief Pointer to the parity check matrix (compact form). */
int8_t (*var_indices)[MAX_CNCT]; /*!< \brief Pointer to lists of variable indices connected to a given check node. */
float scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum algorithm. */
void (*free)(void*); /*!< \brief Pointer to a "destructor". */
int (*decode_f)(void*,
const float*,
uint8_t*,
uint32_t); /*!< \brief Pointer to the decoding function (float version). */
int (*decode_s)(void*,
const int16_t*,
uint8_t*,
uint32_t); /*!< \brief Pointer to the decoding function (16-bit version). */
int (*decode_c)(void*,
const int8_t*,
uint8_t*,
uint32_t); /*!< \brief Pointer to the decoding function (16-bit version). */
} srslte_ldpc_decoder_t;
/*!
* Initializes all the LDPC decoder variables according to the given base graph
* and lifting size.
* \param[out] q A pointer to a srslte_ldpc_decoder_t structure.
* \param[in] type Type of LDPC decoder.
* \param[in] bg The desired base graph (BG1 or BG2).
* \param[in] ls The desired lifting size.
* \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_decoder_init(srslte_ldpc_decoder_t* q,
srslte_ldpc_decoder_type_t type,
srslte_basegraph_t bg,
uint16_t ls,
float scaling_fctr);
/*!
* The LDPC decoder "destructor": it frees all the resources allocated to the decoder.
* \param[in] q A pointer to the dismantled decoder.
*/
SRSLTE_API void srslte_ldpc_decoder_free(srslte_ldpc_decoder_t* q);
/*!
* Carries out the actual decoding with real-valued LLRs.
* \param[in] q A pointer to the LDPC decoder (a srslte_ldpc_decoder_t structure
* instance) that carries out the decoding.
* \param[in] llrs The LLRs obtained from the channel samples that correspond to
* the codeword to be decoded.
* \param[out] message The message (uncoded bits) resulting from the decoding
* operation.
* \param[in] cdwd_rm_length The number of bits forming the codeword (after rate matching).
*/
SRSLTE_API int
srslte_ldpc_decoder_decode_f(srslte_ldpc_decoder_t* q, const float* llrs, uint8_t* message, uint32_t cdwd_rm_length);
/*!
* Carries out the actual decoding with 16-bit integer-valued LLRs. It is
* recommended to use a 15-bit representation for the LLRs, given that all
* values exceeding \f$ 2^{15}-1 \f$ (in magnitude) will be considered as infinity.
* \param[in] q A pointer to the LDPC decoder (a srslte_ldpc_decoder_t structure
* instance) that carries out the decoding.
* \param[in] llrs The LLRs obtained from the channel samples that correspond to
* the codeword to be decoded.
* \param[out] message The message (uncoded bits) resulting from the decoding
* operation.
* \param[in] cdwd_rm_length The number of bits forming the codeword (after rate matching).
*/
SRSLTE_API int
srslte_ldpc_decoder_decode_s(srslte_ldpc_decoder_t* q, const int16_t* llrs, uint8_t* message, uint32_t cdwd_rm_length);
/*!
* Carries out the actual decoding with 8-bit integer-valued LLRs. It is
* recommended to use a 7-bit representation for the LLRs, given that all
* values exceeding \f$ 2^{7}-1 \f$ (in magnitude) will be considered as infinity.
* \param[in] q A pointer to the LDPC decoder (a srslte_ldpc_decoder_t structure
* instance) that carries out the decoding.
* \param[in] llrs The LLRs obtained from the channel samples that correspond to
* the codeword to be decoded.
* \param[out] message The message (uncoded bits) resulting from the decoding
* operation.
* \param[in] cdwd_rm_length The number of bits forming the codeword (after rate matching).
*/
SRSLTE_API int
srslte_ldpc_decoder_decode_c(srslte_ldpc_decoder_t* q, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length);
#endif // SRSLTE_LDPCDECODER_H

@ -0,0 +1,104 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_encoder.h
* \brief Declaration of the LDPC encoder.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef SRSLTE_LDPCENCODER_H
#define SRSLTE_LDPCENCODER_H
#include "srslte/phy/fec/ldpc/base_graph.h"
/*!
* \brief Types of LDPC encoder.
*/
typedef enum SRSLTE_API {
SRSLTE_LDPC_ENCODER_C = 0, /*!< \brief Non-optimized encoder. */
#if LV_HAVE_AVX2
SRSLTE_LDPC_ENCODER_AVX2, /*!< \brief SIMD-optimized encoder. */
#endif // LV_HAVE_AVX2
} srslte_ldpc_encoder_type_t;
/*!
* \brief Describes an LDPC encoder.
*/
typedef struct SRSLTE_API {
void* ptr; /*!< \brief %Encoder auxiliary registers. */
srslte_basegraph_t bg; /*!< \brief Current base graph. */
uint16_t ls; /*!< \brief Current lifting size. */
uint8_t bgN; /*!< \brief Number of variable nodes in the BG. */
uint16_t liftN; /*!< \brief Number of variable nodes in the lifted graph. */
uint8_t bgM; /*!< \brief Number of check nodes in the BG. */
uint16_t liftM; /*!< \brief Number of check nodes in the lifted graph. */
uint8_t bgK; /*!< \brief Number of "uncoded bits" in the BG. */
uint16_t liftK; /*!< \brief Number of uncoded bits in the lifted graph. */
uint16_t* pcm; /*!< \brief Pointer to the parity check matrix (compact form). */
void (*free)(void*); /*!< \brief Pointer to a "destructor". */
/*! \brief Pointer to the encoder function. */
int (*encode)(void*, const uint8_t*, uint8_t*, uint32_t, uint32_t);
/*! \brief Pointer to the encoder for the high-rate region. */
void (*encode_high_rate)(void*, uint8_t*);
/*! \brief Pointer to the encoder for the high-rate region (SIMD-optimized version). */
void (*encode_high_rate_avx2)(void*);
} srslte_ldpc_encoder_t;
/*!
* Initializes all the LDPC encoder variables according to the given base graph
* and lifting size.
* \param[out] q A pointer to a srslte_ldpc_encoder_t structure.
* \param[in] type The encoder type.
* \param[in] bg The desired base graph (BG1 or BG2).
* \param[in] ls The desired lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int
srslte_ldpc_encoder_init(srslte_ldpc_encoder_t* q, srslte_ldpc_encoder_type_t type, srslte_basegraph_t bg, uint16_t ls);
/*!
* The LDPC encoder "destructor": it frees all the resources allocated to the encoder.
* \param[in] q A pointer to the dismantled encoder.
*/
SRSLTE_API void srslte_ldpc_encoder_free(srslte_ldpc_encoder_t* q);
/*!
* Encodes a message into a codeword with the specified encoder.
* \param[in] q A pointer to the desired encoder.
* \param[in] input The message to encode.
* \param[out] output The resulting codeword.
* \param[in] input_length The number of uncoded bits in the input message.
* \param[in] cdwd_rm_length The codeword length after rate matching.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_encoder_encode(srslte_ldpc_encoder_t* q,
const uint8_t* input,
uint8_t* output,
uint32_t input_length,
uint32_t cdwd_rm_length);
#endif // SRSLTE_LDPCENCODER_H

@ -0,0 +1,224 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_rm.h
* \brief Declaration of the LDPC RateMatcher and RateDematcher.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef SRSLTE_LDPCRM_H
#define SRSLTE_LDPCRM_H
#include "srslte/phy/fec/ldpc/base_graph.h"
/*!
* \brief Types of modulations and associated modulation order.
*/
typedef enum SRSLTE_API {
BPSK, /*!< \brief pi/2-BPSK. */
QPSK, /*!< \brief QPSK. */
QAM16, /*!< \brief QAM16. */
QAM64, /*!< \brief QAM64. */
QAM256 /*!< \brief QAM256. */
} mod_type_t;
/*!
* \brief Describes a rate matcher or rate dematcher (K, F are ignored at rate matcher)
*/
typedef struct SRSLTE_API {
void* ptr; /*!< \brief %Rate Matcher auxiliary registers. */
srslte_basegraph_t bg; /*!< \brief Current base graph. */
uint16_t ls; /*!< \brief Current lifting size. */
uint32_t N; /*!< \brief Codeword size. */
uint32_t E; /*!< \brief Rate-Matched codeword size. */
uint32_t K; /*!< \brief Codeblock size (including punctured and filler bits). */
uint32_t F; /*!< \brief Number of filler bits in the codeblock. */
uint32_t k0; /*!< \brief Starting position in the circular buffer. */
uint32_t mod_order; /*!< \brief Modulation order. */
uint32_t Ncb; /*!< \brief Limit to the number of bits in the circular buffer. */
} srslte_ldpc_rm_t;
/*!
* Initializes the Rate Matcher for the maximum rate-matched codeword length
* \param[out] q A pointer to a srslte_ldpc_rm_t structure.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_rm_tx_init(srslte_ldpc_rm_t* q);
/*!
* Carries out the actual rate-matching.
* \param[in] q A pointer to the Rate-Matcher (a srslte_ldpc_rm_t structure
* instance) that carries out the rate matching.
* \param[in] input The codeword obtained from the ldpc encoder.
* \param[out] output The rate-matched codeword resulting from the rate-matching
* operation.
* \param[in] E Rate-matched codeword length.
* \param[in] bg; Current base graph.
* \param[in] ls Current lifting size.
* \param[in] rv Redundancy version 0,1,2,3.
* \param[in] mod_type Modulation type.
* \param[in] Nref Size of limited buffer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_rm_tx(srslte_ldpc_rm_t* q,
const uint8_t* input,
uint8_t* output,
const uint32_t E,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref);
/*!
* Initializes all the Rate DeMatcher variables.
* \param[out] q A pointer to a srslte_ldpc_rm_t structure.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_rm_rx_init_f(srslte_ldpc_rm_t* q);
/*!
* Carries out the actual rate-dematching.
* \param[in] q A pointer to the Rate-DeMatcher (a srslte_ldpc_rm_t structure
* instance) that carries out the rate matching.
* \param[in] input The LLRs obtained from the channel samples that correspond to
* the codeword to be first, rate-dematched and then decoded.
* \param[out] output The rate-dematched codeword resulting from the rate-dematching
* operation.
* \param[in] E Rate-matched codeword length.
* \param[in] F Number of filler bits.
* \param[in] bg; Current base graph.
* \param[in] ls Current lifting size.
* \param[in] rv Redundancy version 0,1,2,3.
* \param[in] mod_type Modulation type.
* \param[in] Nref Size of limited buffer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_rm_rx_f(srslte_ldpc_rm_t* q,
const float* input,
float* output,
const uint32_t E,
const uint32_t F,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref);
/*!
* Initializes all the Rate DeMatcher variables (short inputs).
* \param[out] q A pointer to a srslte_ldpc_rm_t structure.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_rm_rx_init_s(srslte_ldpc_rm_t* q);
/*!
* Carries out the actual rate-dematching (short symbols).
* \param[in] q A pointer to the Rate-DeMatcher (a srslte_ldpc_rm_t structure
* instance) that carries out the rate matching.
* \param[in] input The LLRs obtained from the channel samples that correspond to
* the codeword to be first, rate-dematched and then decoded.
* \param[in] E Rate-matched codeword length.
* \param[in] F Number of filler bits.
* \param[in] bg; Current base graph.
* \param[in] ls Current lifting size.
* \param[in] rv Redundancy version 0,1,2,3.
* \param[in] mod_type Modulation type.
* \param[in] Nref Size of limited buffer.
* \param[out] output The rate-dematched codeword resulting from the rate-dematching
* operation.
*/
SRSLTE_API int srslte_ldpc_rm_rx_s(srslte_ldpc_rm_t* q,
const int16_t* input,
int16_t* output,
const uint32_t E,
const uint32_t F,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref);
/*!
* Initializes all the Rate DeMatcher variables (char inputs).
* \param[out] q A pointer to a srslte_ldpc_rm_t structure.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_rm_rx_init_c(srslte_ldpc_rm_t* q);
/*!
* Carries out the actual rate-dematching (int8_t symbols).
* \param[in] q A pointer to the Rate-DeMatcher (a srslte_ldpc_rm_t structure
* instance) that carries out the rate matching.
* \param[in] input The LLRs obtained from the channel samples that correspond to
* the codeword to be first, rate-dematched and then decoded.
* \param[out] output The rate-dematched codeword resulting from the rate-dematching
* operation.
* \param[in] E Rate-matched codeword length.
* \param[in] F Number of filler bits.
* \param[in] bg; Current base graph.
* \param[in] ls Current lifting size.
* \param[in] rv Redundancy version 0,1,2,3.
* \param[in] mod_type Modulation type.
* \param[in] Nref Size of limited buffer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_ldpc_rm_rx_c(srslte_ldpc_rm_t* q,
const int8_t* input,
int8_t* output,
const uint32_t E,
const uint32_t F,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref);
/*!
* The Rate Matcher "destructor": it frees all the resources allocated to the rate-matcher.
* \param[in] q A pointer to the dismantled rate-matcher.
*/
SRSLTE_API void srslte_ldpc_rm_tx_free(srslte_ldpc_rm_t* q);
/*!
* The Rate Matcher "destructor": it frees all the resources allocated to the rate-dematcher.
* \param[in] q A pointer to the dismantled rate-dematcher.
*/
SRSLTE_API void srslte_ldpc_rm_rx_free_f(srslte_ldpc_rm_t* q);
/*!
* The Rate Matcher "destructor" for short symbols: it frees all the resources allocated to the rate-dematcher.
* \param[in] q A pointer to the dismantled rate-dematcher.
*/
SRSLTE_API void srslte_ldpc_rm_rx_free_s(srslte_ldpc_rm_t* q);
/*!
* The Rate Matcher "destructor" for int8_t symbols: it frees all the resources allocated to the rate-dematcher.
* \param[in] q A pointer to the dismantled rate-dematcher.
*/
SRSLTE_API void srslte_ldpc_rm_rx_free_c(srslte_ldpc_rm_t* q);
#endif // SRSLTE_LDPCENCODER_H

@ -0,0 +1,118 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder.h
* \brief Declaration of the polar decoder.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* 5G uses a polar decoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
*
*/
#ifndef SRSLTE_POLARDECODER_H
#define SRSLTE_POLARDECODER_H
#include "srslte/config.h"
#include <stdbool.h>
#include <stdint.h>
/*!
* Lists the different types of polar decoder.
*/
typedef enum {
SRSLTE_POLAR_DECODER_SSC_F = 0, /*!< \brief Floating-point Simplified Successive Cancellation (SSC) decoder. */
SRSLTE_POLAR_DECODER_SSC_S = 1, /*!< \brief Fixed-point (16 bit) Simplified Successive Cancellation (SSC) decoder. */
SRSLTE_POLAR_DECODER_SSC_C = 2, /*!< \brief Fixed-point (8 bit) Simplified Successive Cancellation (SSC) decoder. */
SRSLTE_POLAR_DECODER_SSC_C_AVX2 =
3 /*!< \brief Fixed-point (8 bit, avx2) Simplified Successive Cancellation (SSC) decoder. */
} srslte_polar_decoder_type_t;
/*!
* \brief Describes a polar decoder.
*/
typedef struct SRSLTE_API {
void* ptr; /*!< \brief Pointer to the actual polar decoder structure. */
int (*decode_f)(void* ptr,
const float* symbols,
uint8_t* data_decoded); /*!< \brief Pointer to the decoder function (float version). */
int (*decode_s)(void* ptr,
const int16_t* symbols,
uint8_t* data_decoded); /*!< \brief Pointer to the decoder function (16-bit version). */
int (*decode_c)(void* ptr,
const int8_t* symbols,
uint8_t* data_decoded); /*!< \brief Pointer to the decoder function (8-bit version). */
void (*free)(void*); /*!< \brief Pointer to a "destructor". */
} srslte_polar_decoder_t;
/*!
* Initializes all the polar decoder variables according to the selected decoding
* algorithm and the given code size.
* \param[out] q A pointer to the initialized polar decoder.
* \param[in] polar_decoder_type Polar decoder type.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
* \param[in] frozen_set A pointer to the frozenbit set (array of indices).
* \param[in] frozen_set_size Number of frozen bits.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_polar_decoder_init(srslte_polar_decoder_t* q,
srslte_polar_decoder_type_t polar_decoder_type,
uint16_t code_size_log,
uint16_t* frozen_set,
uint16_t frozen_set_size);
/*!
* The polar decoder "destructor": it frees all the resources.
* \param[in, out] q A pointer to the dismantled decoder.
*/
SRSLTE_API void srslte_polar_decoder_free(srslte_polar_decoder_t* q);
/*!
* Decodes the input (float) codeword with the specified polar decoder.
* \param[in] q A pointer to the desired polar decoder.
* \param[in] input_llr The decoder LLR input vector.
* \param[out] data_decoded The decoder output vector.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_polar_decoder_decode_f(srslte_polar_decoder_t* q, const float* input_llr, uint8_t* data_decoded);
/*!
* Decodes the input (int16_t) codeword with the specified polar decoder.
* \param[in] q A pointer to the desired polar decoder.
* \param[in] input_llr The decoder LLR input vector.
* \param[out] data_decoded The decoder output vector.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int
srslte_polar_decoder_decode_s(srslte_polar_decoder_t* q, const int16_t* input_llr, uint8_t* data_decoded);
/*!
* Decodes the input (int8_t) codeword with the specified polar decoder.
* \param[in] q A pointer to the desired polar decoder.
* \param[in] input_llr The decoder LLR input vector.
* \param[out] data_decoded The decoder output vector.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_polar_decoder_decode_c(srslte_polar_decoder_t* q, const int8_t* input_llr, uint8_t* data_decoded);
#endif // SRSLTE_POLARDECODER_H

@ -0,0 +1,90 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_encoder.h
* \brief Declaration of the polar encoder.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
*
*/
#ifndef SRSLTE_POLAR_ENCODER_H
#define SRSLTE_POLAR_ENCODER_H
#include "srslte/config.h"
#include <stdint.h>
/*!
* Lists the different types of polar decoder.
*/
typedef enum SRSLTE_API {
SRSLTE_POLAR_ENCODER_PIPELINED = 0, /*!< \brief Non-optimized version of the pipelined polar encoder*/
SRSLTE_POLAR_ENCODER_AVX2 = 1, /*!< \brief SIMD implementation of the polar encoder */
} srslte_polar_encoder_type_t;
/*!
* \brief Describes a polar encoder.
*/
typedef struct srslte_polar_encoder_t {
void* ptr; /*!< \brief Pointer to the actual polar encoder structure. */
int (*encode)(void* ptr,
const uint8_t* input,
uint8_t* output,
const uint8_t code_size_log); /*!< \brief Pointer to the encoder function. */
void (*free)(void*); /*!< \brief Pointer to a "destructor". */
} srslte_polar_encoder_t;
/*!
* Initializes all the polar encoder variables according to the given code size.
* \param[out] q A pointer to the initialized polar encoder.
* \param[in] polar_encoder_type Polar encoder type.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the encoder input/output vector.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int srslte_polar_encoder_init(srslte_polar_encoder_t* q,
srslte_polar_encoder_type_t polar_encoder_type,
uint8_t code_size_log);
/*!
* The polar encoder "destructor": it frees all the resources.
* \param[in, out] q A pointer to the dismantled encoder.
*/
SRSLTE_API void srslte_polar_encoder_free(srslte_polar_encoder_t* q);
/*!
* Encodes the input vector into a codeword with the specified polar encoder.
* \param[in] q A pointer to the desired polar encoder.
* \param[in] input The encoder input vector.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the encoder input/output vector.
* It cannot be larger than the maximum code_size_log specified in q.code_size_log of
* the srslte_polar_encoder_t structure.
* \param[out] output The encoder output vector.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
SRSLTE_API int
srslte_polar_encoder_encode(srslte_polar_encoder_t* q, const uint8_t* input, uint8_t* output, uint8_t code_size_log);
#endif // SRSLTE_POLAR_ENCODER_H

@ -0,0 +1,80 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_sets.h
* \brief Declaration of the auxiliary function that reads polar index sets from a file.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* The message and parity check sets provided by this functions are needed by
* the subchannel allocation block.
* The frozen bit set provided by this function is used by the polar decoder.
*
*/
#ifndef SRSLTE_POLAR_SETS_H
#define SRSLTE_POLAR_SETS_H
#include "srslte/config.h"
#include <stdint.h>
/*!
* \brief Describes a polar set.
*/
typedef struct {
uint16_t message_set_size; /*!< \brief Number of message bits (data and CRC). */
uint16_t info_set_size; /*!< \brief Number of message bits plus parity bits. */
uint16_t parity_set_size; /*!< \brief Number of parity check bits. */
uint16_t frozen_set_size; /*!< \brief Number of frozen bits. */
uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
uint16_t* info_set; /*!< \brief Pointer to the indices of the encoder input vector containing data, CRC and
parity check bits.*/
uint16_t* parity_set; /*!< \brief Pointer to the indices of the encoder input vector containing the parity bits.*/
uint16_t* frozen_set; /*!< \brief Pointer to the indices of the encoder input vector containing frozen bits.*/
} srslte_polar_sets_t;
/*!
* Initializes the different index sets as needed by the subchannel allocation block and/or by the polar decoder.
* \param[out] c A pointer to the initialized polar set.
* \param[in] message_size Number of data + CRC bits.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
* \param[in] rate_matching_size Number of bits of the codeword after rate matching.
* \param[in] parity_set_size Number of parity bits.
* \param[in] nWmPC Number of parity bits of minimum weight type.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int srslte_polar_code_sets_read(srslte_polar_sets_t* c,
uint16_t message_size,
uint8_t code_size_log,
uint16_t rate_matching_size,
uint8_t parity_set_size,
uint8_t nWmPC);
/*!
* The polar set "destructor": it frees all the resources.
* \param[in] c A pointer to the dismantled polar set.
*/
void srslte_polar_code_sets_free(srslte_polar_sets_t* c);
#endif // SRSLTE_POLAR_SETS_H

@ -0,0 +1,86 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file subchannel_allocation.h
* \brief Declaration of the auxiliary subchannel allocation block.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* These functions are not fully functional nor tested to be 3gpp-5G compliant.
* Please, use only for testing purposes.
*
*/
#ifndef SRSLTE_SUB_CHANNEL_ALLOC_H
#define SRSLTE_SUB_CHANNEL_ALLOC_H
#include "srslte/config.h"
#include "stdint.h"
/*!
* \brief Describes a subchannel allocation.
*/
typedef struct SRSLTE_API srslte_subchn_alloc_t {
uint16_t code_size; /*!< \brief Number of bits, \f$N\f$, of the encoder input/output vector. */
uint16_t message_size; /*!< \brief Number of bits, \f$K\f$, of data + CRC. */
uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
} srslte_subchn_alloc_t;
/*!
* Initializes a subchannel allocation instance.
* \param[out] c A pointer to the srslte_subchn_alloc_t structure
* containing the parameters needed by the subchannel allocation function.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
* \param[in] message_set_size Number of data + CRC bits.
* \param[in] message_set Pointer to the indices of the encoder input vector containing
* data and CRC bits.
*/
void srslte_subchannel_allocation_init(srslte_subchn_alloc_t* c,
uint8_t code_size_log,
uint16_t message_set_size,
uint16_t* message_set);
/*!
* Allocates message bits (data + CRC) to the encoder input bit vector at the
* positions specified in \a c->message_set and zeros to the remaining
* positions. This function is not fully 5G compliant as parity bits positions
* are set to 0.
* \param[in] c A pointer to the srslte_subchn_alloc_t structure containing
* the parameters needed by the subchannel allocation function.
* \param[in] message A pointer to the vector with the message bits (data and CRC).
* \param[out] input_encoder A pointer to the encoder input bit vector.
*/
void srslte_subchannel_allocation(const srslte_subchn_alloc_t* c, const uint8_t* message, uint8_t* input_encoder);
/*!
* Extracts message bits (data + CRC) from the decoder output vector
* according to the positions specified in \a c->message_set.
* \param[in] c A pointer to the srslte_subchn_alloc_t structure containing the
* parameters needed by the subchannel allocation function.
* \param[in] output_decoder A pointer to the decoder output bit vector.
* \param[out] message A pointer to the vector with the message bits (data and CRC).
*/
void srslte_subchannel_deallocation(const srslte_subchn_alloc_t* c, const uint8_t* output_decoder, uint8_t* message);
#endif // SRSLTE_SUB_CHANNEL_ALLOC_H

@ -32,6 +32,7 @@
#include "srslte/config.h"
#include "srslte/phy/dft/dft.h"
#include <stdint.h>
typedef struct SRSLTE_API {
cf_t* input_fft;
@ -72,12 +73,12 @@ SRSLTE_API uint32_t srslte_conv_fft_cc_run_opt(srslte_conv_fft_cc_t* q,
cf_t* output);
SRSLTE_API uint32_t
srslte_conv_cc(const cf_t* input, const cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
srslte_conv_cc(const cf_t* input, const cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
SRSLTE_API uint32_t
srslte_conv_same_cf(cf_t* input, float* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
srslte_conv_same_cf(cf_t* input, float* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
SRSLTE_API uint32_t
srslte_conv_same_cc(cf_t* input, cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
srslte_conv_same_cc(cf_t* input, cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
#endif // SRSLTE_CONVOLUTION_H

@ -72,8 +72,14 @@ static inline float srslte_convert_dB_to_power(float v)
return powf(10.0f, v / 10.0f);
}
/*logical operations */
SRSLTE_API void srslte_vec_xor_bbb(int8_t* x, int8_t* y, int8_t* z, const uint32_t len);
/*!
* Computes \f$ z = x \oplus y \f$ elementwise.
* \param[in] x A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] y A pointer to a vector of uint8_t with 0's and 1's.
* \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] len Length of vectors x, y and z.
*/
SRSLTE_API void srslte_vec_xor_bbb(const uint8_t* x, const uint8_t* y, uint8_t* z, const uint32_t len);
/** Return the sum of all the elements */
SRSLTE_API float srslte_vec_acc_ff(const float* x, const uint32_t len);
@ -194,6 +200,32 @@ SRSLTE_API uint32_t srslte_vec_max_fi(const float* x, const uint32_t len);
SRSLTE_API uint32_t srslte_vec_max_abs_fi(const float* x, const uint32_t len);
SRSLTE_API uint32_t srslte_vec_max_abs_ci(const cf_t* x, const uint32_t len);
/*!
* Quantizes an array of floats into an array of 16-bit signed integers. It is
* ensured that *-inf* and *inf* map to -32767 and 32767, respectively (useful
* when quantizing on less than 16 bits).
* \param[in] in Real values to be quantized.
* \param[out] out Quantized values.
* \param[in] gain Quantization gain, controls the output range.
* \param[in] offset Quantization offset, for asymmetric quantization.
* \param[in] clip Saturation value.
* \param[in] len Number of values to be quantized.
*/
SRSLTE_API void srslte_vec_quant_fs(const float* in, int16_t* out, float gain, float offset, float clip, uint32_t len);
/*!
* Quantizes an array of floats into an array of 8-bit signed integers. It is
* ensured that *-inf* and *inf* map to -127 and 127, respectively (useful
* when quantizing on less than 8 bits).
* \param[in] in Real values to be quantized.
* \param[out] out Quantized values.
* \param[in] gain Quantization gain, controls the output range.
* \param[in] offset Quantization offset, for asymmetric quantization.
* \param[in] clip Saturation value.
* \param[in] len Number of values to be quantized.
*/
SRSLTE_API void srslte_vec_quant_fc(const float* in, int8_t* out, float gain, float offset, float clip, uint32_t len);
/* quantify vector of floats or int16 and convert to uint8_t */
SRSLTE_API void srslte_vec_quant_fuc(const float* in,
uint8_t* out,

@ -31,7 +31,7 @@ extern "C" {
#include <stdio.h>
/*SIMD Logical operations*/
SRSLTE_API void srslte_vec_xor_bbb_simd(const int8_t* x, const int8_t* y, int8_t* z, int len);
SRSLTE_API void srslte_vec_xor_bbb_simd(const uint8_t* x, const uint8_t* y, uint8_t* z, int len);
/* SIMD Basic vector math */
SRSLTE_API void srslte_vec_sum_sss_simd(const int16_t* x, const int16_t* y, int16_t* z, int len);

@ -22,8 +22,10 @@ set(FEC_SOURCES
crc.c
softbuffer.c)
add_subdirectory(test)
add_subdirectory(convolutional)
add_subdirectory(ldpc)
add_subdirectory(polar)
add_subdirectory(test)
add_subdirectory(turbo)
add_library(srslte_fec OBJECT ${FEC_SOURCES})

@ -29,6 +29,4 @@ set(FEC_SOURCES ${FEC_SOURCES}
convolutional/viterbi37_sse.c
PARENT_SCOPE)
message(STATUS "aaaaa ${FEC_SOURCES}")
add_subdirectory(test)

@ -0,0 +1,39 @@
#
# Copyright 2013-2020 Software Radio Systems Limited
#
# This file is part of srsLTE
#
# srsLTE is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# srsLTE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# A copy of the GNU Affero General Public License can be found in
# the LICENSE file in the top-level directory of this distribution
# and at http://www.gnu.org/licenses/.
#
set(FEC_SOURCES ${FEC_SOURCES}
ldpc/base_graph.c
ldpc/ldpc_dec_f.c
ldpc/ldpc_dec_s.c
ldpc/ldpc_dec_c.c
ldpc/ldpc_dec_c_flood.c
ldpc/ldpc_dec_c_avx2.c
ldpc/ldpc_dec_c_avx2long.c
ldpc/ldpc_dec_c_avx2_flood.c
ldpc/ldpc_dec_c_avx2long_flood.c
ldpc/ldpc_decoder.c
ldpc/ldpc_enc_c.c
ldpc/ldpc_enc_avx2.c
ldpc/ldpc_enc_avx2long.c
ldpc/ldpc_encoder.c
ldpc/ldpc_rm.c
PARENT_SCOPE)
add_subdirectory(test)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,170 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_avx2_consts.h
* \brief Declaration of constants and masks for the AVX2-based implementation
* of the LDPC encoder and decoder.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef LDPC_AVX2_CONSTS_H
#define LDPC_AVX2_CONSTS_H
#include <immintrin.h>
#include "../utils_avx2.h"
/*!
* \brief Packed 8-bit zeros.
*/
static const __m256i zero_epi8 = {0, 0, 0, 0};
/*!
* \brief Packed 8-bit ones.
*/
static const __m256i one_epi8 = {0x0101010101010101LL,
0x0101010101010101LL,
0x0101010101010101LL,
0x0101010101010101LL};
/*!
* \brief Packed 8-bit 127 (that is \f$2^7 - 1\f$).
*/
static const __m256i infty8_epi8 = {0x7F7F7F7F7F7F7F7FLL,
0x7F7F7F7F7F7F7F7FLL,
0x7F7F7F7F7F7F7F7FLL,
0x7F7F7F7F7F7F7F7FLL};
/*!
* \brief Packed 8-bit --127 (that is \f$-2^7 + 1\f$).
*/
static const __m256i neg_infty8_epi8 = {0x8181818181818181LL, // NOLINT
0x8181818181818181LL, // NOLINT
0x8181818181818181LL, // NOLINT
0x8181818181818181LL}; // NOLINT
/*!
* \brief Packed 8-bit 63 (that is \f$2^6 - 1\f$).
*/
static const __m256i infty7_epi8 = {0x3F3F3F3F3F3F3F3FLL,
0x3F3F3F3F3F3F3F3FLL,
0x3F3F3F3F3F3F3F3FLL,
0x3F3F3F3F3F3F3F3FLL};
/*!
* \brief Packed 8-bit --63 (that is \f$-2^6 + 1\f$).
*/
static const __m256i neg_infty7_epi8 = {0xC1C1C1C1C1C1C1C1LL, // NOLINT
0xC1C1C1C1C1C1C1C1LL, // NOLINT
0xC1C1C1C1C1C1C1C1LL, // NOLINT
0xC1C1C1C1C1C1C1C1LL}; // NOLINT
/*!
* \brief Identifies even-indexed 8-bit packets.
*/
static const __m256i mask_even_epi8 = {0x00FF00FF00FF00FF,
0x00FF00FF00FF00FF,
0x00FF00FF00FF00FF,
0x00FF00FF00FF00FF}; // NOLINT
/*!
* \brief Mask needed for node rotation: mask_least_epi8[i] marks the bits
* corresponding to the \b i least significant chars.
*/
static const __m256i mask_least_epi8[SRSLTE_AVX2_B_SIZE + 1] = {
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0x00000000000000FF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0x000000000000FFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0x0000000000FFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0x00000000FFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0x000000FFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0x0000FFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0x00FFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x00000000000000FF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x000000000000FFFF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x0000000000FFFFFF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x000000FFFFFFFFFF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x0000FFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000000000FF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000000000FFFF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000FFFFFF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000FFFFFFFFFF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000FFFFFFFFFFFF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000000}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000000000FF}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000000000FFFF}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000FFFFFF}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000FFFFFFFFFF}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000FFFFFFFFFFFF}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF}, // NOLINT
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}}; // NOLINT
/*!
* \brief Mask needed for node rotation: mask_most_epi8[i] marks the bits
* corresponding to the SRSLTE_AVX2_B_SIZE - \b i most significant chars.
*/
static const __m256i mask_most_epi8[SRSLTE_AVX2_B_SIZE + 1] = {
{0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0xFFFFFFFFFFFFFF00, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0xFFFFFFFFFF000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0xFFFFFF0000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0xFFFF000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0xFF00000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFFFFFFFFFFFFFF00, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFFFFFFFFFF000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFFFFFF0000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFFFF000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0xFF00000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFF00, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFF000000, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFFFFFF0000000000, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFFFF000000000000, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0xFF00000000000000, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFFFF}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFF00}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFF0000}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFF000000}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFF00000000}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFF0000000000}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFF000000000000}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFF00000000000000}, // NOLINT
{0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}; // NOLINT
#endif // LDPC_AVX2_CONSTS_H

@ -0,0 +1,602 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_all.h
* \brief Declaration of the LDPC decoder inner functions.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef SRSLTE_LDPCDEC_ALL_H
#define SRSLTE_LDPCDEC_ALL_H
#include <srslte/phy/fec/ldpc/base_graph.h>
#include <stdint.h>
/*!
* Creates the registers used by the float-based implementation of the LDPC decoder.
* \param[in] bgN Codeword length.
* \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size.
* \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs structure).
*/
void* create_ldpc_dec_f(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the float-based LDPC decoder.
* \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs structure).
*/
void delete_ldpc_dec_f(void* p);
/*!
* Initializes the inner registers of the float-based LDPC decoder before
* carrying out the actual decoding.
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_f(void* p, const float* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (float version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_f(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes (float version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_f(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword (float version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_f(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits.
* \param[in] p A pointer to the decoder registers (an ldpc_regs structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_f(void* p, uint8_t* message, uint16_t liftK);
/*!
* Creates the registers used by the 16-bit-based implementation of the LDPC decoder.
* \param[in] bgN Codeword length.
* \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size.
* \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs_s structure).
*/
void* create_ldpc_dec_s(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the 16-bit integer-based LDPC decoder.
* \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_s structure).
*/
void delete_ldpc_dec_s(void* p);
/*!
* Initializes the inner registers of the 16-bit integer-based LDPC decoder before
* carrying out the actual decoding.
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_s structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_s(void* p, const int16_t* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (16-bit version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_s structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_s(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes (16-bit version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_s structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_s(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword (16-bit version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_s structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_s(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits.
* \param[in] p A pointer to the decoder registers (an ldpc_regs_s structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_s(void* p, uint8_t* message, uint16_t liftK);
/*!
* Creates the registers used by the 8-bit-based implementation of the LDPC decoder.
* \param[in] bgN Codeword length.
* \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size.
* \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs_c structure).
*/
void* create_ldpc_dec_c(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the 8-bit integer-based LDPC decoder.
* \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c structure).
*/
void delete_ldpc_dec_c(void* p);
/*!
* Initializes the inner registers of the 8-bit integer-based LDPC decoder before
* carrying out the actual decoding.
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_c(void* p, const int8_t* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (8-bit version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_c(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes (8-bit version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_c(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword (8-bit version).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_c(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits.
* \param[in] p A pointer to the decoder registers (an ldpc_regs_c structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_c(void* p, uint8_t* message, uint16_t liftK);
/*!
* Creates the registers used by the 8-bit-based implementation of the LDPC decoder (flooded scheduling).
* \param[in] bgN Codeword length.
* \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size.
* \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs_c_flood structure).
*/
void* create_ldpc_dec_c_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the 8-bit integer-based LDPC decoder (flooded scheduling).
* \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_flood structure).
*/
void delete_ldpc_dec_c_flood(void* p);
/*!
* Initializes the inner registers of the 8-bit integer-based LDPC decoder (flooded scheduling) before
* carrying out the actual decoding.
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_flood structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_c_flood(void* p, const int8_t* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (8-bit version, flooded scheduling).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_flood structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_c_flood(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes (8-bit version, flooded scheduling).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_flood structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_c_flood(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword (8-bit version, flooded scheduling).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_flood structure).
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to each layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_c_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits.
* \param[in] p A pointer to the decoder registers (an ldpc_regs_c_flood structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_c_flood(void* p, uint8_t* message, uint16_t liftK);
/*!
* Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder (LS <= \ref
* SRSLTE_AVX2_B_SIZE). \param[in] bgN Codeword length. \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size. \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs_c_avx2 structure).
*/
void* create_ldpc_dec_c_avx2(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder (LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2 structure).
*/
void delete_ldpc_dec_c_avx2(void* p);
/*!
* Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
* carrying out the actual decoding (LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_c_avx2(void* p, const int8_t* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_c_avx2(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes (optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_c_avx2(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword (optimized 8-bit version, LS <= \ref
* SRSLTE_AVX2_B_SIZE). \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_c_avx2(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits (optimized 8-bit version, LS <= \ref
* SRSLTE_AVX2_B_SIZE). \param[in] p A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_c_avx2(void* p, uint8_t* message, uint16_t liftK);
/*!
* Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder (LS > \ref
* SRSLTE_AVX2_B_SIZE). \param[in] bgN Codeword length. \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size. \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs_c_avx2long structure).
*/
void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder (LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2long structure).
*/
void delete_ldpc_dec_c_avx2long(void* p);
/*!
* Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
* carrying out the actual decoding (LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_c_avx2long(void* p, const int8_t* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (optimized 8-bit version, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_c_avx2long(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes (optimized 8-bit version, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_c_avx2long(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword (optimized 8-bit version, LS > \ref
* SRSLTE_AVX2_B_SIZE). \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_c_avx2long(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits (optimized 8-bit version, LS > \ref
* SRSLTE_AVX2_B_SIZE). \param[in] p A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_c_avx2long(void* p, uint8_t* message, uint16_t liftK);
/*!
* Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder
* (flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in] bgN Codeword length.
* \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size.
* \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs_c_avx2_flood structure).
*/
void* create_ldpc_dec_c_avx2_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder
* (flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2_flood structure).
*/
void delete_ldpc_dec_c_avx2_flood(void* p);
/*!
* Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
* carrying out the actual decoding (flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_c_avx2_flood(void* p, const int8_t* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_c_avx2_flood(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes
* (optimized 8-bit version, flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_c_avx2_flood(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword
* (optimized 8-bit version, flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to each layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_c_avx2_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits
* (flooded scheduling, optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_c_avx2_flood(void* p, uint8_t* message, uint16_t liftK);
/*!
* Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder
* (flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in] bgN Codeword length.
* \param[in] bgM Number of check nodes.
* \param[in] ls Lifting size.
* \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
* \return A pointer to the created registers (an ldpc_regs_c_avx2long_flood structure).
*/
void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
/*!
* Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder (flooded scheduling, LS > \ref
* SRSLTE_AVX2_B_SIZE). \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2long_flood
* structure).
*/
void delete_ldpc_dec_c_avx2long_flood(void* p);
/*!
* Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
* carrying out the actual decoding (flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
* \param[in] llrs A pointer to the array of LLR values from the channel.
* \param[in] ls The lifting size.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_ldpc_dec_c_avx2long_flood(void* p, const int8_t* llrs, uint16_t ls);
/*!
* Updates the messages from variable nodes to check nodes (optimized 8-bit version,
* flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_var_to_check_c_avx2long_flood(void* p, int i_layer);
/*!
* Updates the messages from check nodes to variable nodes (optimized 8-bit version,
* flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
* \param[in] i_layer The index of the variable-to-check layer to update.
* \param[in] this_pcm A pointer to the row of the parity check matrix (i.e. base
* graph) corresponding to the selected layer.
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to the current layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_check_to_var_c_avx2long_flood(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Updates the current estimate of the (soft) bits of the codeword (optimized 8-bit version,
* flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] p A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
* \param[in] these_var_indices
* Contains the indices of the variable nodes connected
* to each layer.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int update_ldpc_soft_bits_c_avx2long_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT]);
/*!
* Returns the decoded message (hard bits) from the current soft bits (optimized 8-bit version,
* flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
* \param[out] message A pointer to the decoded message.
* \param[in] liftK The length of the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int extract_ldpc_message_c_avx2long_flood(void* p, uint8_t* message, uint16_t liftK);
#endif // SRSLTE_LDPCDEC_ALL_H

@ -0,0 +1,363 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_c.c
* \brief Definition of the LDPC decoder inner functions working
* with 8-bit integer-valued LLRs.
*
* Even if the inner representation is based on 8 bits, check-to-variable and
* variable-to-check messages are actually represented with 7 bits, the
* remaining bit is used to represent infinity.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <strings.h>
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
#define F2I 100 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
/*!
* \brief Maximum message magnitude.
* Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
*/
static const int8_t infinity7 = (1U << 6U) - 1;
/*!
* \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
*/
struct ldpc_regs_c {
int8_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
int8_t* check_to_var; /*!< \brief Check-to-variable messages. */
int8_t* var_to_check; /*!< \brief Variable-to-check messages. */
int8_t (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
int* min_v_index; /*!< \brief Helper register for computing check-to-variable messages. */
int* prod_v2c; /*!< \brief Helper register for computing check-to-variable messages. */
uint16_t liftN; /*!< \brief Total number of variable nodes (after lifting). */
uint16_t hrrN; /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint16_t ls; /*!< \brief Lifting size. */
int scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
};
/*!
* Carries out the actual update of the variable-to-check messages. It basically
* consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
* \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
* ensure it is properly propagated. Also, the subtraction is saturated between
* \f$- clip\f$ and \f$+ clip\f$.
* \param[in] x Minuend: array we subtract from (in practice, the soft bits).
* \param[in] y Subtrahend: array to be subtracted (in practice, the
* check-to-variable messages).
* \param[out] z Resulting difference array(in practice, the updated
* variable-to-check messages).
* \param[in] clip The saturation value.
* \param[in] len The length of the vectors.
*/
static void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, uint8_t clip, uint32_t len);
void* create_ldpc_dec_c(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs_c* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t liftN = bgN * ls;
uint16_t hrrN = (bgK + 4) * ls;
if ((vp = malloc(sizeof(struct ldpc_regs_c))) == NULL) {
return NULL;
}
if ((vp->soft_bits = srslte_vec_i8_malloc(liftN)) == NULL) {
free(vp);
return NULL;
}
if ((vp->check_to_var = srslte_vec_i8_malloc((hrrN + ls) * bgM)) == NULL) {
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->var_to_check = srslte_vec_i8_malloc((hrrN + ls))) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->min_v2c = malloc(ls * sizeof(int8_t[2]))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->liftN = liftN;
vp->hrrN = hrrN;
vp->ls = ls;
vp->scaling_fctr = (int)(scaling_fctr * F2I);
return vp;
}
void delete_ldpc_dec_c(void* p)
{
struct ldpc_regs_c* vp = p;
if (vp != NULL) {
free(vp->prod_v2c);
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
}
}
int init_ldpc_dec_c(void* p, const int8_t* llrs, uint16_t ls)
{
struct ldpc_regs_c* vp = p;
int i = 0;
int skip = 2 * ls;
if (p == NULL) {
return -1;
}
bzero(vp->soft_bits, skip * sizeof(int8_t));
for (i = skip; i < vp->liftN; i++) {
vp->soft_bits[i] = llrs[i - skip];
}
bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int8_t));
bzero(vp->var_to_check, (vp->hrrN + vp->ls) * sizeof(int8_t));
return 0;
}
int update_ldpc_var_to_check_c(void* p, int i_layer)
{
struct ldpc_regs_c* vp = p;
if (p == NULL) {
return -1;
}
int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
// Update the high-rate region.
inner_var_to_check_c(vp->soft_bits, this_check_to_var, vp->var_to_check, infinity7, vp->hrrN);
if (i_layer >= 4) {
// Update the extension region.
inner_var_to_check_c(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
this_check_to_var + vp->hrrN,
vp->var_to_check + vp->hrrN,
infinity7,
vp->ls);
}
return 0;
}
int update_ldpc_check_to_var_c(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int j = 0;
for (i = 0; i < vp->ls; i++) {
vp->prod_v2c[i] = 1;
for (j = 0; j < 2; j++) {
vp->min_v2c[i][j] = INT8_MAX;
}
}
uint16_t shift = 0;
int index = 0;
int8_t this_v2c = 0;
int is_min = 0;
int i_v2c = 0;
int i_v2c_base = 0;
int8_t current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_v2c = abs(vp->var_to_check[i_v2c]);
is_min = this_v2c < vp->min_v2c[index][0];
vp->min_v2c[index][1] =
(this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
vp->min_v2c[index][0] = is_min ? this_v2c : vp->min_v2c[index][0];
vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
vp->prod_v2c[index] *= (vp->var_to_check[i_v2c] >= 0) ? 1 : -1;
}
current_var_index = (*these_var_indices)[i + 1];
}
int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
this_check_to_var[i_v2c] = this_check_to_var[i_v2c] * vp->scaling_fctr / F2I;
this_check_to_var[i_v2c] *= vp->prod_v2c[index] * ((vp->var_to_check[i_v2c] >= 0) ? 1 : -1);
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_c(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c* vp = p;
if (p == NULL) {
return -1;
}
int i_bit = 0;
int i_bit_tmp = 0;
int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
int8_t* this_var_to_check = vp->var_to_check;
long tmp = 0;
int8_t current_var_index = (*these_var_indices)[0];
int current_var_index_ext = 0;
for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
current_var_index_ext = current_var_index * vp->ls;
for (int j = 0; j < vp->ls; j++) {
i_bit = current_var_index_ext + j;
i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
tmp = (long)this_check_to_var[i_bit_tmp] + this_var_to_check[i_bit_tmp];
if (tmp > infinity7) {
tmp = INT8_MAX;
}
if (tmp < -infinity7) {
tmp = -INT8_MAX;
}
vp->soft_bits[i_bit] = (int8_t)tmp;
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int extract_ldpc_message_c(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs_c* vp = p;
for (int i = 0; i < liftK; i++) {
message[i] = (vp->soft_bits[i] < 0);
}
return 0;
}
void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, const uint8_t clip, const uint32_t len)
{
unsigned i = 0;
long tmp = 0;
const long infinity8 = (1U << 7U) - 1; // Max positive value in 8-bit representation
for (i = 0; i < len; i++) {
if (x[i] >= infinity8) {
z[i] = infinity8;
continue;
}
if (x[i] <= -infinity8) {
z[i] = -infinity8;
continue;
}
tmp = (long)x[i] - y[i];
if (tmp > clip) {
tmp = clip;
}
if (tmp < -clip) {
tmp = -clip;
}
z[i] = (int8_t)tmp;
}
}

@ -0,0 +1,545 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_c_avx2.c
* \brief Definition LDPC decoder inner functions working
* with 8-bit integer-valued LLRs (AVX2 version).
*
* Even if the inner representation is based on 8 bits, check-to-variable and
* variable-to-check messages are actually represented with 7 bits, the
* remaining bit is used to represent infinity.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <strings.h>
#include "../utils_avx2.h"
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
#ifdef LV_HAVE_AVX2
#include <immintrin.h>
#include "ldpc_avx2_consts.h"
#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
/*!
* \brief Represents a node of the base factor graph.
*/
typedef union bg_node_t {
int8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
__m256i v; /*!< All the lifted nodes of the current base node as a 256-bit line. */
} bg_node_t;
/*!
* \brief Maximum message magnitude.
* Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
*/
static const int8_t infinity7 = (1U << 6U) - 1;
/*!
* \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
*/
struct ldpc_regs_c_avx2 {
__m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
__m256i* rotated_v2c; /*!< \brief To store a rotated version of the variable-to-check messages. */
uint16_t ls; /*!< \brief Lifting size. */
uint8_t hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint8_t bgN; /*!< \brief Number of variable nodes (before lifting). */
};
/*!
* Carries out the actual update of the variable-to-check messages. It basically
* consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
* \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
* ensure it is properly propagated. Also, the subtraction is saturated between
* \f$- clip\f$ and \f$+ clip\f$.
* \param[in] x Minuend: array we subtract from (in practice, the soft bits).
* \param[in] y Subtrahend: array to be subtracted (in practice, the
* check-to-variable messages).
* \param[out] z Resulting difference array(in practice, the updated
* variable-to-check messages).
* \param[in] clip The saturation value.
* \param[in] len The length of the vectors.
*/
static void inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
/*!
* Rotate the content of an __m256i vector (first input) towards the left by
* the number of chars specified by the second input (i.e., the \b imm * 8 least
* significant bits become the \b imm * 8 most significant bits).
* \param[in] a Vector to circularly shift.
* \param[in] imm The shift order in chars.
* \return The shifted vector.
*/
static __m256i _mm256_rotatelli_si256(__m256i a, int imm);
/*!
* Rotate the content of an __m256i vector (first input) towards the right by
* the number of chars specified by the second input (i.e., the \b imm * 8 most
* significant bits become the \b imm * 8 least significant bits).
* \param[in] a Vector to circularly shift.
* \param[in] imm The shift order in chars.
* \return The shifted vector.
*/
static __m256i _mm256_rotaterli_si256(__m256i a, int imm);
/*!
* Rotate the contents of a node towards the left by \b imm chars, that is the
* \b imm * 8 most significant bits become the least significant ones.
* \param[in] a The node to rotate.
* \param[in] imm The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \return The rotated node.
*/
static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls);
/*!
* Rotate the contents of a node towards the right by \b imm chars, that is the
* \b imm * 8 most significant bits become the least significant ones.
* \param[in] a The node to rotate.
* \param[in] imm The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \return The rotated node.
*/
static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls);
/*!
* Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
* \param[in] a Vector of packed 8-bit integers.
* \param[in] sf Scaling factor.
* \return Vector of packed 8-bit integers with the scaling result.
*/
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
void* create_ldpc_dec_c_avx2(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs_c_avx2* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t hrr = bgK + 4;
if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2))) == NULL) {
return NULL;
}
if ((vp->soft_bits = srslte_vec_malloc(bgN * sizeof(bg_node_t))) == NULL) {
free(vp);
return NULL;
}
if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * sizeof(__m256i))) == NULL) {
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * sizeof(__m256i))) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * sizeof(__m256i))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->bgN = bgN;
vp->hrr = hrr;
vp->ls = ls;
vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
return vp;
}
void delete_ldpc_dec_c_avx2(void* p)
{
struct ldpc_regs_c_avx2* vp = p;
if (vp != NULL) {
free(vp->rotated_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
}
}
int init_ldpc_dec_c_avx2(void* p, const int8_t* llrs, uint16_t ls)
{
struct ldpc_regs_c_avx2* vp = p;
int i = 0;
int j = 0;
if (p == NULL) {
return -1;
}
// the first 2 x LS bits of the codeword are not sent
vp->soft_bits[0].v = _mm256_set1_epi8(0);
vp->soft_bits[1].v = _mm256_set1_epi8(0);
for (i = 2; i < vp->bgN; i++) {
for (j = 0; j < ls; j++) {
vp->soft_bits[i].c[j] = llrs[(i - 2) * ls + j];
}
bzero(&(vp->soft_bits[i].c[ls]), (SRSLTE_AVX2_B_SIZE - ls) * sizeof(int8_t));
}
bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * sizeof(__m256i));
bzero(vp->var_to_check, (vp->hrr + 1) * sizeof(__m256i));
return 0;
}
int update_ldpc_var_to_check_c_avx2(void* p, int i_layer)
{
struct ldpc_regs_c_avx2* vp = p;
if (p == NULL) {
return -1;
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
// Update the high-rate region.
inner_var_to_check_c_avx2(&(vp->soft_bits[0].v), this_check_to_var, vp->var_to_check, infinity7, vp->hrr);
if (i_layer >= 4) {
// Update the extension region.
inner_var_to_check_c_avx2(&(vp->soft_bits[0].v) + vp->hrr + i_layer - 4,
this_check_to_var + vp->hrr,
vp->var_to_check + vp->hrr,
infinity7,
1);
}
return 0;
}
int update_ldpc_check_to_var_c_avx2(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
uint16_t shift = 0;
int i_v2c_base = 0;
__m256i* this_rotated_v2c = NULL;
__m256i this_abs_v2c_epi8;
__m256i minp_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
__m256i mins_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
__m256i prod_v2c_epi8 = _mm256_set1_epi8(0);
__m256i mask_sign_epi8;
__m256i mask_min_epi8;
__m256i help_min_epi8;
__m256i min_ix_epi8;
__m256i current_ix_epi8;
int8_t current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
this_rotated_v2c = vp->rotated_v2c + i;
*this_rotated_v2c = rotate_node_right(vp->var_to_check[i_v2c_base], shift, vp->ls);
// mask_sign is 1 if this_rotated_v2c is strictly negative
mask_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
prod_v2c_epi8 = _mm256_xor_si256(prod_v2c_epi8, mask_sign_epi8);
this_abs_v2c_epi8 = _mm256_abs_epi8(*this_rotated_v2c);
// mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(minp_v2c_epi8, this_abs_v2c_epi8);
help_min_epi8 = _mm256_blendv_epi8(this_abs_v2c_epi8, minp_v2c_epi8, mask_min_epi8);
minp_v2c_epi8 = _mm256_blendv_epi8(minp_v2c_epi8, this_abs_v2c_epi8, mask_min_epi8);
min_ix_epi8 = _mm256_blendv_epi8(min_ix_epi8, current_ix_epi8, mask_min_epi8);
// mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(mins_v2c_epi8, this_abs_v2c_epi8);
mins_v2c_epi8 = _mm256_blendv_epi8(mins_v2c_epi8, help_min_epi8, mask_min_epi8);
current_var_index = (*these_var_indices)[i + 1];
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
current_var_index = (*these_var_indices)[0];
__m256i mask_is_min_epi8;
__m256i this_c2v_epi8;
__m256i help_c2v_epi8;
__m256i final_sign_epi8;
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
this_rotated_v2c = vp->rotated_v2c + i;
// mask_sign is 1 if this_rotated_v2c is strictly negative
final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, prod_v2c_epi8);
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
mask_is_min_epi8 = _mm256_cmpeq_epi8(current_ix_epi8, min_ix_epi8);
this_c2v_epi8 = _mm256_blendv_epi8(minp_v2c_epi8, mins_v2c_epi8, mask_is_min_epi8);
this_c2v_epi8 = _mm256_scalei_epi8(this_c2v_epi8, vp->scaling_fctr);
help_c2v_epi8 = _mm256_sign_epi8(this_c2v_epi8, final_sign_epi8);
this_c2v_epi8 = _mm256_blendv_epi8(this_c2v_epi8, help_c2v_epi8, final_sign_epi8);
this_check_to_var[i_v2c_base] = rotate_node_left(this_c2v_epi8, shift, vp->ls);
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_c_avx2(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2* vp = p;
if (p == NULL) {
return -1;
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
int i_bit_tmp_base = 0;
__m256i tmp_epi8;
__m256i mask_epi8;
int8_t current_var_index = (*these_var_indices)[0];
for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_tmp_base], vp->var_to_check[i_bit_tmp_base]);
// tmp = (tmp > infty7) : infty8 ? tmp
mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
tmp_epi8 = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
// tmp = (tmp < -infty7) : -infty8 ? tmp
mask_epi8 = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
vp->soft_bits[current_var_index].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int extract_ldpc_message_c_avx2(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs_c_avx2* vp = p;
int j = 0;
for (int i = 0; i < liftK / vp->ls; i++) {
for (j = 0; j < vp->ls; j++) {
message[i * vp->ls + j] = (vp->soft_bits[i].c[j] < 0);
}
}
return 0;
}
static void
inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
{
unsigned i = 0;
__m256i x_epi8;
__m256i y_epi8;
__m256i z_epi8;
__m256i mask_epi8;
__m256i help_sub_epi8;
__m256i clip_epi8 = _mm256_set1_epi8(clip);
__m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
for (i = 0; i < len; i++) {
x_epi8 = x[i];
y_epi8 = y[i];
// z = (x-y > clip) ? clip : x-y
help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
mask_epi8 = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
z_epi8 = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
// z = (z < -clip) ? -clip : z
mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
z_epi8 = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
// ensure that x = +/- infinity => z = +/- infinity
// z = (x < infinity) ? z : infinity
mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
z_epi8 = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
// z = (x > - infinity) ? z : - infinity
mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
z[i] = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
}
}
static __m256i _mm256_rotatelli_si256(__m256i a, int imm)
{
__m256i rotated_block_a[4];
// rotate left a as if made of 64-bit blocks: rotated_block_a[i] contains the
// rotation by i units
rotated_block_a[0] = a; // blocks 0 - 1 - 2 - 3
rotated_block_a[1] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78); // 2 - 3 - 0 - 1
rotated_block_a[3] = _mm256_permute4x64_epi64(a, 57); // 1 - 2 - 3 - 0
// rotation index we are interested in
int step1 = imm / 8;
// small-step rotation
int left = imm % 8;
// next block, for carry-over
int step2 = (step1 + 1) % 4;
// shift right each block
__m256i reg1 = _mm256_slli_epi64(rotated_block_a[step1], left * 8);
// carry-over from the next block
__m256i reg2 = _mm256_srli_epi64(rotated_block_a[step2], (8 - left) * 8);
return _mm256_xor_si256(reg1, reg2);
}
static __m256i _mm256_rotaterli_si256(__m256i a, int imm)
{
__m256i rotated_block_a[4];
// rotate right a as if made of 64-bit blocks: rotated_block_a[i] contains the
// rotation by i units
rotated_block_a[0] = a; // blocks 0 - 1 - 2 - 3
rotated_block_a[1] = _mm256_permute4x64_epi64(a, 57); // 1 - 2 - 3 - 0
rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78); // 2 - 3 - 0 - 1
rotated_block_a[3] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
// rotation index we are interested in
int step1 = imm / 8;
// small-step rotation
int left = imm % 8;
// next block, for carry-over
int step2 = (step1 + 1) % 4;
// shift right each block
__m256i reg1 = _mm256_srli_epi64(rotated_block_a[step1], left * 8);
// carry-over from the next block
__m256i reg2 = _mm256_slli_epi64(rotated_block_a[step2], (8 - left) * 8);
return _mm256_xor_si256(reg1, reg2);
}
static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls)
{
if (imm == 0) {
return a;
}
__m256i step1 = _mm256_rotatelli_si256(a, imm);
if (ls == SRSLTE_AVX2_B_SIZE) {
return step1;
}
__m256i step2 = _mm256_rotaterli_si256(a, ls - imm);
step1 = _mm256_and_si256(step1, mask_most_epi8[imm]);
step2 = _mm256_and_si256(step2, mask_least_epi8[imm]);
step1 = _mm256_xor_si256(step1, step2);
return step1;
;
}
static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls)
{
if (imm == 0) {
return a;
}
__m256i step1 = _mm256_rotaterli_si256(a, imm);
if (ls == SRSLTE_AVX2_B_SIZE) {
return step1;
}
__m256i step2 = _mm256_rotatelli_si256(a, ls - imm);
step1 = _mm256_and_si256(step1, mask_least_epi8[ls - imm]);
step2 = _mm256_and_si256(step2, mask_most_epi8[ls - imm]);
step1 = _mm256_xor_si256(step1, step2);
return step1;
}
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
{
__m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
__m256i odd_epi16 = _mm256_srli_epi16(a, 8);
__m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
__m256i p_odd_epi16 = _mm256_mulhi_epu16(odd_epi16, sf);
p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,572 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_c_avx2_flood.c
* \brief Definition LDPC decoder inner functions working
* with 8-bit integer-valued LLRs (AVX2 version, flooded scheduling).
*
* Even if the inner representation is based on 8 bits, check-to-variable and
* variable-to-check messages are actually represented with 7 bits, the
* remaining bit is used to represent infinity.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <strings.h>
#include "../utils_avx2.h"
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
#ifdef LV_HAVE_AVX2
#include <immintrin.h>
#include "ldpc_avx2_consts.h"
#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
/*!
* \brief Represents a node of the base factor graph.
*/
typedef union bg_node_t {
int8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
__m256i v; /*!< All the lifted nodes of the current base node as a 256-bit line. */
} bg_node_t;
/*!
* \brief Maximum message magnitude.
* Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
*/
static const int8_t infinity7 = (1U << 6U) - 1;
/*!
* \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
*/
struct ldpc_regs_c_avx2_flood {
__m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
__m256i* llrs; /*!< \brief A-priori log-likelihood ratios. */
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
__m256i* rotated_v2c; /*!< \brief To store a rotated version of the variable-to-check messages. */
uint16_t ls; /*!< \brief Lifting size. */
uint8_t hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint8_t bgN; /*!< \brief Number of variable nodes (before lifting). */
};
/*!
* Carries out the actual update of the variable-to-check messages. It basically
* consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
* \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
* ensure it is properly propagated. Also, the subtraction is saturated between
* \f$- clip\f$ and \f$+ clip\f$.
* \param[in] x Minuend: array we subtract from (in practice, the soft bits).
* \param[in] y Subtrahend: array to be subtracted (in practice, the
* check-to-variable messages).
* \param[out] z Resulting difference array(in practice, the updated
* variable-to-check messages).
* \param[in] clip The saturation value.
* \param[in] len The length of the vectors.
*/
static void inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
/*!
* Rotate the content of an __m256i vector (first input) towards the left by
* the number of chars specified by the second input (i.e., the \b imm * 8 least
* significant bits become the \b imm * 8 most significant bits).
* \param[in] a Vector to circularly shift.
* \param[in] imm The shift order in chars.
* \return The shifted vector.
*/
static __m256i _mm256_rotatelli_si256(__m256i a, int imm);
/*!
* Rotate the content of an __m256i vector (first input) towards the right by
* the number of chars specified by the second input (i.e., the \b imm * 8 most
* significant bits become the \b imm * 8 least significant bits).
* \param[in] a Vector to circularly shift.
* \param[in] imm The shift order in chars.
* \return The shifted vector.
*/
static __m256i _mm256_rotaterli_si256(__m256i a, int imm);
/*!
* Rotate the contents of a node towards the left by \b imm chars, that is the
* \b imm * 8 most significant bits become the least significant ones.
* \param[in] a The node to rotate.
* \param[in] imm The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \return The rotated node.
*/
static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls);
/*!
* Rotate the contents of a node towards the right by \b imm chars, that is the
* \b imm * 8 most significant bits become the least significant ones.
* \param[in] a The node to rotate.
* \param[in] imm The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \return The rotated node.
*/
static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls);
/*!
* Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
* \param[in] a Vector of packed 8-bit integers.
* \param[in] sf Scaling factor.
* \return Vector of packed 8-bit integers with the scaling result.
*/
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
void* create_ldpc_dec_c_avx2_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs_c_avx2_flood* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t hrr = bgK + 4;
if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2_flood))) == NULL) {
return NULL;
}
if ((vp->llrs = srslte_vec_malloc(bgN * sizeof(__m256i))) == NULL) {
free(vp);
return NULL;
}
if ((vp->soft_bits = srslte_vec_malloc(bgN * sizeof(bg_node_t))) == NULL) {
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * sizeof(__m256i))) == NULL) {
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * bgM * sizeof(__m256i))) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * sizeof(__m256i))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->bgN = bgN;
vp->hrr = hrr;
vp->ls = ls;
vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
return vp;
}
void delete_ldpc_dec_c_avx2_flood(void* p)
{
struct ldpc_regs_c_avx2_flood* vp = p;
if (vp != NULL) {
free(vp->rotated_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
}
}
int init_ldpc_dec_c_avx2_flood(void* p, const int8_t* llrs, uint16_t ls)
{
struct ldpc_regs_c_avx2_flood* vp = p;
int i = 0;
int j = 0;
if (p == NULL) {
return -1;
}
// the first 2 x LS bits of the codeword are not sent
vp->soft_bits[0].v = _mm256_set1_epi8(0);
vp->soft_bits[1].v = _mm256_set1_epi8(0);
vp->llrs[0] = _mm256_set1_epi8(0);
vp->llrs[1] = _mm256_set1_epi8(0);
for (i = 2; i < vp->bgN; i++) {
for (j = 0; j < ls; j++) {
vp->soft_bits[i].c[j] = llrs[(i - 2) * ls + j];
}
bzero(&(vp->soft_bits[i].c[ls]), (SRSLTE_AVX2_B_SIZE - ls) * sizeof(int8_t));
vp->llrs[i] = vp->soft_bits[i].v;
}
bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * sizeof(__m256i));
bzero(vp->var_to_check, (vp->hrr + 1) * vp->bgM * sizeof(__m256i));
return 0;
}
int update_ldpc_var_to_check_c_avx2_flood(void* p, int i_layer)
{
struct ldpc_regs_c_avx2_flood* vp = p;
if (p == NULL) {
return -1;
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
__m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1);
// Update the high-rate region.
inner_var_to_check_c_avx2(&(vp->soft_bits[0].v), this_check_to_var, this_var_to_check, infinity7, vp->hrr);
if (i_layer >= 4) {
// Update the extension region.
inner_var_to_check_c_avx2(&(vp->soft_bits[0].v) + vp->hrr + i_layer - 4,
this_check_to_var + vp->hrr,
this_var_to_check + vp->hrr,
infinity7,
1);
}
return 0;
}
int update_ldpc_check_to_var_c_avx2_flood(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2_flood* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
uint16_t shift = 0;
int i_v2c_base = 0;
__m256i* this_rotated_v2c = NULL;
__m256i this_abs_v2c_epi8;
__m256i minp_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
__m256i mins_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
__m256i prod_v2c_epi8 = _mm256_set1_epi8(0);
__m256i mask_sign_epi8;
__m256i mask_min_epi8;
__m256i help_min_epi8;
__m256i min_ix_epi8;
__m256i current_ix_epi8;
int8_t current_var_index = (*these_var_indices)[0];
__m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1);
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
this_rotated_v2c = vp->rotated_v2c + i;
*this_rotated_v2c = rotate_node_right(this_var_to_check[i_v2c_base], shift, vp->ls);
// mask_sign is 1 if this_rotated_v2c is strictly negative
mask_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
prod_v2c_epi8 = _mm256_xor_si256(prod_v2c_epi8, mask_sign_epi8);
this_abs_v2c_epi8 = _mm256_abs_epi8(*this_rotated_v2c);
// mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(minp_v2c_epi8, this_abs_v2c_epi8);
help_min_epi8 = _mm256_blendv_epi8(this_abs_v2c_epi8, minp_v2c_epi8, mask_min_epi8);
minp_v2c_epi8 = _mm256_blendv_epi8(minp_v2c_epi8, this_abs_v2c_epi8, mask_min_epi8);
min_ix_epi8 = _mm256_blendv_epi8(min_ix_epi8, current_ix_epi8, mask_min_epi8);
// mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(mins_v2c_epi8, this_abs_v2c_epi8);
mins_v2c_epi8 = _mm256_blendv_epi8(mins_v2c_epi8, help_min_epi8, mask_min_epi8);
current_var_index = (*these_var_indices)[i + 1];
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
current_var_index = (*these_var_indices)[0];
__m256i mask_is_min_epi8;
__m256i this_c2v_epi8;
__m256i help_c2v_epi8;
__m256i final_sign_epi8;
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
this_rotated_v2c = vp->rotated_v2c + i;
// mask_sign is 1 if this_rotated_v2c is strictly negative
final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, prod_v2c_epi8);
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
mask_is_min_epi8 = _mm256_cmpeq_epi8(current_ix_epi8, min_ix_epi8);
this_c2v_epi8 = _mm256_blendv_epi8(minp_v2c_epi8, mins_v2c_epi8, mask_is_min_epi8);
this_c2v_epi8 = _mm256_scalei_epi8(this_c2v_epi8, vp->scaling_fctr);
help_c2v_epi8 = _mm256_sign_epi8(this_c2v_epi8, final_sign_epi8);
this_c2v_epi8 = _mm256_blendv_epi8(this_c2v_epi8, help_c2v_epi8, final_sign_epi8);
this_check_to_var[i_v2c_base] = rotate_node_left(this_c2v_epi8, shift, vp->ls);
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_c_avx2_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2_flood* vp = p;
if (p == NULL) {
return -1;
}
__m256i* this_check_to_var = NULL;
int i = 0;
int i_layer = 0;
int i_bit_tmp_base = 0;
int8_t current_var_index = 0;
__m256i tmp_epi8;
__m256i mask_epi8;
for (i = 0; i < vp->bgN; i++) {
vp->soft_bits[i].v = vp->llrs[i];
}
for (i_layer = 0; i_layer < vp->bgM; i_layer++) {
current_var_index = these_var_indices[i_layer][0];
this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_tmp_base], vp->soft_bits[current_var_index].v);
// tmp = (tmp > infty7) : infty8 ? tmp
mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
tmp_epi8 = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
// tmp = (tmp < -infty7) : -infty8 ? tmp
mask_epi8 = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
vp->soft_bits[current_var_index].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
current_var_index = these_var_indices[i_layer][i + 1];
}
}
return 0;
}
int extract_ldpc_message_c_avx2_flood(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs_c_avx2_flood* vp = p;
int j = 0;
for (int i = 0; i < liftK / vp->ls; i++) {
for (j = 0; j < vp->ls; j++) {
message[i * vp->ls + j] = (vp->soft_bits[i].c[j] < 0);
}
}
return 0;
}
static void
inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
{
unsigned i = 0;
__m256i x_epi8;
__m256i y_epi8;
__m256i z_epi8;
__m256i mask_epi8;
__m256i help_sub_epi8;
__m256i clip_epi8 = _mm256_set1_epi8(clip);
__m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
for (i = 0; i < len; i++) {
x_epi8 = x[i];
y_epi8 = y[i];
// z = (x-y > clip) ? clip : x-y
help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
mask_epi8 = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
z_epi8 = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
// z = (z < -clip) ? -clip : z
mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
z_epi8 = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
// ensure that x = +/- infinity => z = +/- infinity
// z = (x < infinity) ? z : infinity
mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
z_epi8 = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
// z = (x > - infinity) ? z : - infinity
mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
z[i] = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
}
}
static __m256i _mm256_rotatelli_si256(__m256i a, int imm)
{
__m256i rotated_block_a[4];
// rotate left a as if made of 64-bit blocks: rotated_block_a[i] contains the
// rotation by i units
rotated_block_a[0] = a; // blocks 0 - 1 - 2 - 3
rotated_block_a[1] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78); // 2 - 3 - 0 - 1
rotated_block_a[3] = _mm256_permute4x64_epi64(a, 57); // 1 - 2 - 3 - 0
// rotation index we are interested in
int step1 = imm / 8;
// small-step rotation
int left = imm % 8;
// next block, for carry-over
int step2 = (step1 + 1) % 4;
// shift right each block
__m256i reg1 = _mm256_slli_epi64(rotated_block_a[step1], left * 8);
// carry-over from the next block
__m256i reg2 = _mm256_srli_epi64(rotated_block_a[step2], (8 - left) * 8);
return _mm256_xor_si256(reg1, reg2);
}
static __m256i _mm256_rotaterli_si256(__m256i a, int imm)
{
__m256i rotated_block_a[4];
// rotate right a as if made of 64-bit blocks: rotated_block_a[i] contains the
// rotation by i units
rotated_block_a[0] = a; // blocks 0 - 1 - 2 - 3
rotated_block_a[1] = _mm256_permute4x64_epi64(a, 57); // 1 - 2 - 3 - 0
rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78); // 2 - 3 - 0 - 1
rotated_block_a[3] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
// rotation index we are interested in
int step1 = imm / 8;
// small-step rotation
int left = imm % 8;
// next block, for carry-over
int step2 = (step1 + 1) % 4;
// shift right each block
__m256i reg1 = _mm256_srli_epi64(rotated_block_a[step1], left * 8);
// carry-over from the next block
__m256i reg2 = _mm256_slli_epi64(rotated_block_a[step2], (8 - left) * 8);
return _mm256_xor_si256(reg1, reg2);
}
static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls)
{
if (imm == 0) {
return a;
}
__m256i step1 = _mm256_rotatelli_si256(a, imm);
if (ls == SRSLTE_AVX2_B_SIZE) {
return step1;
}
__m256i step2 = _mm256_rotaterli_si256(a, ls - imm);
step1 = _mm256_and_si256(step1, mask_most_epi8[imm]);
step2 = _mm256_and_si256(step2, mask_least_epi8[imm]);
step1 = _mm256_xor_si256(step1, step2);
return step1;
;
}
static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls)
{
if (imm == 0) {
return a;
}
__m256i step1 = _mm256_rotaterli_si256(a, imm);
if (ls == SRSLTE_AVX2_B_SIZE) {
return step1;
}
__m256i step2 = _mm256_rotatelli_si256(a, ls - imm);
step1 = _mm256_and_si256(step1, mask_least_epi8[ls - imm]);
step2 = _mm256_and_si256(step2, mask_most_epi8[ls - imm]);
step1 = _mm256_xor_si256(step1, step2);
return step1;
}
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
{
__m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
__m256i odd_epi16 = _mm256_srli_epi16(a, 8);
__m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
__m256i p_odd_epi16 = _mm256_mulhi_epu16(odd_epi16, sf);
p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,541 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_c_avx2long.c
* \brief Definition LDPC decoder inner functions working
* with 8-bit integer-valued LLRs (AVX2 version, large lifting size).
*
* Even if the inner representation is based on 8 bits, check-to-variable and
* variable-to-check messages are actually represented with 7 bits, the
* remaining bit is used to represent infinity.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <strings.h>
#include "../utils_avx2.h"
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
#ifdef LV_HAVE_AVX2
#include <immintrin.h>
#include "ldpc_avx2_consts.h"
#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
/*!
* \brief Represents a node of the base factor graph.
*/
typedef union bg_node_t {
int8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
__m256i v; /*!< All the lifted nodes of the current base node as a 256-bit line. */
} bg_node_t;
/*!
* \brief Maximum message magnitude.
* Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
*/
static const int8_t infinity7 = (1U << 6U) - 1;
/*!
* \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
*/
struct ldpc_regs_c_avx2long {
__m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
__m256i* rotated_v2c; /*!< \brief To store a rotated version of the variable-to-check messages. */
__m256i* this_c2v_epi8; /*!< \brief Helper register for the current c2v node. */
__m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
__m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
__m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
__m256i* min_ix_epi8; /*!< \brief Helper register for the index of the minimum v2c message. */
uint16_t ls; /*!< \brief Lifting size. */
uint8_t hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint8_t bgN; /*!< \brief Number of variable nodes (before lifting). */
uint8_t n_subnodes; /*!< \brief Number of subnodes. */
};
/*!
* Carries out the actual update of the variable-to-check messages. It basically
* consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
* \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
* ensure it is properly propagated. Also, the subtraction is saturated between
* \f$- clip\f$ and \f$+ clip\f$.
* \param[in] x Minuend: array we subtract from (in practice, the soft bits).
* \param[in] y Subtrahend: array to be subtracted (in practice, the
* check-to-variable messages).
* \param[out] z Resulting difference array(in practice, the updated
* variable-to-check messages).
* \param[in] clip The saturation value.
* \param[in] len The length of the vectors.
*/
static void inner_var_to_check_c_avx2long(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
/*!
* Rotate the contents of a node towards the right by \b shift chars, that is the
* \b shift * 8 most significant bits become the least significant ones.
* \param[in] in_256i The node to rotate.
* \param[out] out The rotated node.
* \param[in] shift The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \param[in] n_subnodes The number of subnodes in each node.
* \return The rotated node.
*/
static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes);
/*!
* Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
* \param[in] a Vector of packed 8-bit integers.
* \param[in] sf Scaling factor.
* \return Vector of packed 8-bit integers with the scaling result.
*/
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs_c_avx2long* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t hrr = bgK + 4;
if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2long))) == NULL) {
return NULL;
}
// compute number of subnodes
int left_out = ls % SRSLTE_AVX2_B_SIZE;
int n_subnodes = ls / SRSLTE_AVX2_B_SIZE + (left_out > 0);
if ((vp->soft_bits = srslte_vec_malloc(bgN * n_subnodes * sizeof(bg_node_t))) == NULL) {
free(vp);
return NULL;
}
if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->minp_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->mins_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->prod_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->min_ix_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->min_ix_epi8);
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->this_c2v_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->rotated_v2c);
free(vp->min_ix_epi8);
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->bgN = bgN;
vp->hrr = hrr;
vp->ls = ls;
vp->n_subnodes = n_subnodes;
vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
return vp;
}
void delete_ldpc_dec_c_avx2long(void* p)
{
struct ldpc_regs_c_avx2long* vp = p;
if (vp != NULL) {
free(vp->this_c2v_epi8);
free(vp->rotated_v2c);
free(vp->min_ix_epi8);
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
}
}
int init_ldpc_dec_c_avx2long(void* p, const int8_t* llrs, uint16_t ls)
{
struct ldpc_regs_c_avx2long* vp = p;
int i = 0;
int j = 0;
int k = 0;
if (p == NULL) {
return -1;
}
for (k = 0; k < vp->n_subnodes; k++) {
vp->soft_bits[k].v = _mm256_set1_epi8(0);
vp->soft_bits[vp->n_subnodes + k].v = _mm256_set1_epi8(0);
}
for (i = 2; i < vp->bgN; i++) {
for (j = 0; j < vp->n_subnodes; j++) {
for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < ls); k++) {
vp->soft_bits[i * vp->n_subnodes + j].c[k] = llrs[(i - 2) * ls + j * SRSLTE_AVX2_B_SIZE + k];
}
}
bzero(&(vp->soft_bits[i * vp->n_subnodes + j - 1].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(int8_t));
}
bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * vp->n_subnodes * sizeof(__m256i));
bzero(vp->var_to_check, (vp->hrr + 1) * vp->n_subnodes * sizeof(__m256i));
return 0;
}
int update_ldpc_var_to_check_c_avx2long(void* p, int i_layer)
{
struct ldpc_regs_c_avx2long* vp = p;
if (p == NULL) {
return -1;
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
// Update the high-rate region.
inner_var_to_check_c_avx2long(
&(vp->soft_bits[0].v), this_check_to_var, vp->var_to_check, infinity7, vp->hrr * vp->n_subnodes);
if (i_layer >= 4) {
// Update the extension region.
inner_var_to_check_c_avx2long(&(vp->soft_bits[0].v) + (vp->hrr + i_layer - 4) * vp->n_subnodes,
this_check_to_var + vp->hrr * vp->n_subnodes,
vp->var_to_check + vp->hrr * vp->n_subnodes,
infinity7,
vp->n_subnodes);
}
return 0;
}
int update_ldpc_check_to_var_c_avx2long(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2long* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int j = 0;
uint16_t shift = 0;
int i_v2c_base = 0;
__m256i* this_rotated_v2c = NULL;
__m256i this_abs_v2c_epi8;
__m256i mask_sign_epi8;
__m256i mask_min_epi8;
__m256i help_min_epi8;
__m256i current_ix_epi8;
for (j = 0; j < vp->n_subnodes; j++) {
vp->minp_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
vp->mins_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
vp->prod_v2c_epi8[j] = _mm256_set1_epi8(0);
}
int8_t current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
i_v2c_base *= vp->n_subnodes;
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
rotate_node_right(vp->var_to_check + i_v2c_base, this_rotated_v2c, shift, vp->ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
// mask_sign is 1 if this_v2c_epi8 is strictly negative
mask_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
vp->prod_v2c_epi8[j] = _mm256_xor_si256(vp->prod_v2c_epi8[j], mask_sign_epi8);
this_abs_v2c_epi8 = _mm256_abs_epi8(this_rotated_v2c[j]);
// mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8);
help_min_epi8 = _mm256_blendv_epi8(this_abs_v2c_epi8, vp->minp_v2c_epi8[j], mask_min_epi8);
vp->minp_v2c_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8, mask_min_epi8);
vp->min_ix_epi8[j] = _mm256_blendv_epi8(vp->min_ix_epi8[j], current_ix_epi8, mask_min_epi8);
// mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(vp->mins_v2c_epi8[j], this_abs_v2c_epi8);
vp->mins_v2c_epi8[j] = _mm256_blendv_epi8(vp->mins_v2c_epi8[j], help_min_epi8, mask_min_epi8);
}
current_var_index = (*these_var_indices)[i + 1];
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
current_var_index = (*these_var_indices)[0];
__m256i mask_is_min_epi8;
__m256i help_c2v_epi8;
__m256i final_sign_epi8;
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
i_v2c_base *= vp->n_subnodes;
this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
for (j = 0; j < vp->n_subnodes; j++) {
// mask_sign is 1 if this_v2c_epi8 is strictly negative
final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, vp->prod_v2c_epi8[j]);
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
mask_is_min_epi8 = _mm256_cmpeq_epi8(current_ix_epi8, vp->min_ix_epi8[j]);
vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], vp->mins_v2c_epi8[j], mask_is_min_epi8);
vp->this_c2v_epi8[j] = _mm256_scalei_epi8(vp->this_c2v_epi8[j], vp->scaling_fctr);
help_c2v_epi8 = _mm256_sign_epi8(vp->this_c2v_epi8[j], final_sign_epi8);
vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->this_c2v_epi8[j], help_c2v_epi8, final_sign_epi8);
}
// rotating right LS - shift positions is the same as rotating left shift positions
rotate_node_right(vp->this_c2v_epi8, this_check_to_var + i_v2c_base, vp->ls - shift, vp->ls, vp->n_subnodes);
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_c_avx2long(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2long* vp = p;
if (p == NULL) {
return -1;
}
int j = 0;
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
int i_bit_tmp_base = 0;
int i_bit_subnode = 0;
__m256i tmp_epi8;
__m256i mask_epi8;
int8_t current_var_index = (*these_var_indices)[0];
int current_var_index_subnode = 0;
for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
current_var_index_subnode = current_var_index * vp->n_subnodes;
for (j = 0; j < vp->n_subnodes; j++) {
i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
i_bit_subnode = i_bit_tmp_base * vp->n_subnodes + j;
tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_subnode], vp->var_to_check[i_bit_subnode]);
mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
tmp_epi8 = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
vp->soft_bits[current_var_index_subnode + j].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int extract_ldpc_message_c_avx2long(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs_c_avx2long* vp = p;
int j = 0;
int k = 0;
for (int i = 0; i < liftK / vp->ls; i++) {
for (j = 0; j < vp->n_subnodes; j++) {
for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < vp->ls); k++) {
message[i * vp->ls + j * SRSLTE_AVX2_B_SIZE + k] = (vp->soft_bits[i * vp->n_subnodes + j].c[k] < 0);
}
}
}
return 0;
}
static void
inner_var_to_check_c_avx2long(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
{
unsigned i = 0;
__m256i x_epi8;
__m256i y_epi8;
__m256i z_epi8;
__m256i mask_epi8;
__m256i help_sub_epi8;
__m256i clip_epi8 = _mm256_set1_epi8(clip);
__m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
for (i = 0; i < len; i++) {
x_epi8 = x[i];
y_epi8 = y[i];
help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
mask_epi8 = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
z_epi8 = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
z_epi8 = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
z_epi8 = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
z[i] = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
}
}
static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes)
{
const int8_t* in = (const int8_t*)in_256i;
int16_t n_type1 = (ls - shift) / SRSLTE_AVX2_B_SIZE - (ls == SRSLTE_AVX2_B_SIZE);
int16_t n_type2 = n_subnodes - n_type1 - 1 - (ls == SRSLTE_AVX2_B_SIZE);
int16_t gap = (ls - shift) % SRSLTE_AVX2_B_SIZE;
int16_t i = 0;
for (; i < n_type1; i++) {
out[i] = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
}
__m256i tmp1 = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
__m256i tmp2 = _mm256_loadu_si256((const __m256i*)(in - gap));
out[i] = _mm256_blendv_epi8(tmp1, tmp2, mask_most_epi8[gap]);
for (i = 1; i <= n_type2; i++) {
out[n_type1 + i] = _mm256_loadu_si256((const __m256i*)(in - gap + i * SRSLTE_AVX2_B_SIZE));
}
}
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
{
__m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
__m256i odd_epi16 = _mm256_srli_epi16(a, 8);
__m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
__m256i p_odd_epi16 = _mm256_mulhi_epu16(odd_epi16, sf);
p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,576 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_c_avx2long_flood.c
* \brief Definition LDPC decoder inner functions working
* with 8-bit integer-valued LLRs (flooded scheduling, AVX2 version, large lifting size).
*
* Even if the inner representation is based on 8 bits, check-to-variable and
* variable-to-check messages are actually represented with 7 bits, the
* remaining bit is used to represent infinity.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <strings.h>
#include "../utils_avx2.h"
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
#ifdef LV_HAVE_AVX2
#include <immintrin.h>
#include "ldpc_avx2_consts.h"
#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
/*!
* \brief Represents a node of the base factor graph.
*/
typedef union bg_node_t {
int8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
__m256i v; /*!< All the lifted nodes of the current base node as a 256-bit line. */
} bg_node_t;
/*!
* \brief Maximum message magnitude.
* Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
*/
static const int8_t infinity7 = (1U << 6U) - 1;
/*!
* \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
*/
struct ldpc_regs_c_avx2long_flood {
__m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
__m256i* llrs; /*!< \brief A-priori log-likelihood ratios. */
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
__m256i* rotated_v2c; /*!< \brief To store a rotated version of the variable-to-check messages. */
__m256i* this_c2v_epi8; /*!< \brief Helper register for the current c2v node. */
__m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
__m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
__m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
__m256i* min_ix_epi8; /*!< \brief Helper register for the index of the minimum v2c message. */
uint16_t ls; /*!< \brief Lifting size. */
uint8_t n_subnodes; /*!< \brief Number of subnodes. */
uint8_t hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint8_t bgN; /*!< \brief Number of variable nodes (before lifting). */
};
/*!
* Carries out the actual update of the variable-to-check messages. It basically
* consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
* \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
* ensure it is properly propagated. Also, the subtraction is saturated between
* \f$- clip\f$ and \f$+ clip\f$.
* \param[in] x Minuend: array we subtract from (in practice, the soft bits).
* \param[in] y Subtrahend: array to be subtracted (in practice, the
* check-to-variable messages).
* \param[out] z Resulting difference array(in practice, the updated
* variable-to-check messages).
* \param[in] clip The saturation value.
* \param[in] len The length of the vectors.
*/
static void inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
/*!
* Rotate the contents of a node towards the right by \b shift chars, that is the
* \b shift * 8 most significant bits become the least significant ones.
* \param[in] in_256i The node to rotate.
* \param[out] out The rotated node.
* \param[in] shift The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \param[in] n_subnodes The number of subnodes in each node.
* \return The rotated node.
*/
static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes);
/*!
* Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
* \param[in] a Vector of packed 8-bit integers.
* \param[in] sf Scaling factor.
* \return Vector of packed 8-bit integers with the scaling result.
*/
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs_c_avx2long_flood* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t hrr = bgK + 4;
if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2long_flood))) == NULL) {
return NULL;
}
// compute number of subnodes
int left_out = ls % SRSLTE_AVX2_B_SIZE;
int n_subnodes = ls / SRSLTE_AVX2_B_SIZE + (left_out > 0);
if ((vp->llrs = srslte_vec_malloc(bgN * n_subnodes * sizeof(__m256i))) == NULL) {
free(vp);
return NULL;
}
if ((vp->soft_bits = srslte_vec_malloc(bgN * n_subnodes * sizeof(bg_node_t))) == NULL) {
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * bgM * n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->minp_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->mins_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->prod_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->min_ix_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->min_ix_epi8);
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->this_c2v_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->rotated_v2c);
free(vp->min_ix_epi8);
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->bgN = bgN;
vp->hrr = hrr;
vp->ls = ls;
vp->n_subnodes = n_subnodes;
vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
return vp;
}
void delete_ldpc_dec_c_avx2long_flood(void* p)
{
struct ldpc_regs_c_avx2long_flood* vp = p;
if (vp != NULL) {
free(vp->this_c2v_epi8);
free(vp->rotated_v2c);
free(vp->min_ix_epi8);
free(vp->prod_v2c_epi8);
free(vp->mins_v2c_epi8);
free(vp->minp_v2c_epi8);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
}
}
int init_ldpc_dec_c_avx2long_flood(void* p, const int8_t* llrs, uint16_t ls)
{
struct ldpc_regs_c_avx2long_flood* vp = p;
int i = 0;
int j = 0;
int k = 0;
if (p == NULL) {
return -1;
}
for (k = 0; k < vp->n_subnodes; k++) {
vp->soft_bits[k].v = _mm256_set1_epi8(0);
vp->soft_bits[vp->n_subnodes + k].v = _mm256_set1_epi8(0);
vp->llrs[k] = _mm256_set1_epi8(0);
vp->llrs[vp->n_subnodes + k] = _mm256_set1_epi8(0);
}
for (i = 2; i < vp->bgN; i++) {
for (j = 0; j < vp->n_subnodes; j++) {
for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < ls); k++) {
vp->soft_bits[i * vp->n_subnodes + j].c[k] = llrs[(i - 2) * ls + j * SRSLTE_AVX2_B_SIZE + k];
}
vp->llrs[i * vp->n_subnodes + j] = vp->soft_bits[i * vp->n_subnodes + j].v;
}
bzero(&(vp->soft_bits[i * vp->n_subnodes + j - 1].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(int8_t));
bzero((int8_t*)(vp->llrs + i * vp->n_subnodes + j - 1) + k, (SRSLTE_AVX2_B_SIZE - k) * sizeof(int8_t));
}
bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * vp->n_subnodes * sizeof(__m256i));
bzero(vp->var_to_check, (vp->hrr + 1) * vp->bgM * vp->n_subnodes * sizeof(__m256i));
return 0;
}
int update_ldpc_var_to_check_c_avx2long_flood(void* p, int i_layer)
{
struct ldpc_regs_c_avx2long_flood* vp = p;
if (p == NULL) {
return -1;
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
__m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1) * vp->n_subnodes;
// Update the high-rate region.
inner_var_to_check_c_avx2(
&(vp->soft_bits[0].v), this_check_to_var, this_var_to_check, infinity7, vp->hrr * vp->n_subnodes);
if (i_layer >= 4) {
// Update the extension region.
inner_var_to_check_c_avx2(&(vp->soft_bits[0].v) + (vp->hrr + i_layer - 4) * vp->n_subnodes,
this_check_to_var + vp->hrr * vp->n_subnodes,
this_var_to_check + vp->hrr * vp->n_subnodes,
infinity7,
vp->n_subnodes);
}
return 0;
}
int update_ldpc_check_to_var_c_avx2long_flood(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2long_flood* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int j = 0;
uint16_t shift = 0;
int i_v2c_base = 0;
__m256i* this_rotated_v2c = NULL;
__m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1) * vp->n_subnodes;
__m256i this_abs_v2c_epi8;
__m256i mask_sign_epi8;
__m256i mask_min_epi8;
__m256i help_min_epi8;
__m256i current_ix_epi8;
for (j = 0; j < vp->n_subnodes; j++) {
vp->minp_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
vp->mins_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
vp->prod_v2c_epi8[j] = _mm256_set1_epi8(0);
}
int8_t current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
i_v2c_base *= vp->n_subnodes;
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
rotate_node_right(this_var_to_check + i_v2c_base, this_rotated_v2c, shift, vp->ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
// mask_sign is 1 if this_v2c_epi8 is strictly negative
mask_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
vp->prod_v2c_epi8[j] = _mm256_xor_si256(vp->prod_v2c_epi8[j], mask_sign_epi8);
this_abs_v2c_epi8 = _mm256_abs_epi8(this_rotated_v2c[j]);
// mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8);
help_min_epi8 = _mm256_blendv_epi8(this_abs_v2c_epi8, vp->minp_v2c_epi8[j], mask_min_epi8);
vp->minp_v2c_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8, mask_min_epi8);
vp->min_ix_epi8[j] = _mm256_blendv_epi8(vp->min_ix_epi8[j], current_ix_epi8, mask_min_epi8);
// mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
mask_min_epi8 = _mm256_cmpgt_epi8(vp->mins_v2c_epi8[j], this_abs_v2c_epi8);
vp->mins_v2c_epi8[j] = _mm256_blendv_epi8(vp->mins_v2c_epi8[j], help_min_epi8, mask_min_epi8);
}
current_var_index = (*these_var_indices)[i + 1];
}
__m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
current_var_index = (*these_var_indices)[0];
__m256i mask_is_min_epi8;
__m256i help_c2v_epi8;
__m256i final_sign_epi8;
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
i_v2c_base *= vp->n_subnodes;
this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
for (j = 0; j < vp->n_subnodes; j++) {
// mask_sign is 1 if this_v2c_epi8 is strictly negative
final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, vp->prod_v2c_epi8[j]);
current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
mask_is_min_epi8 = _mm256_cmpeq_epi8(current_ix_epi8, vp->min_ix_epi8[j]);
vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], vp->mins_v2c_epi8[j], mask_is_min_epi8);
vp->this_c2v_epi8[j] = _mm256_scalei_epi8(vp->this_c2v_epi8[j], vp->scaling_fctr);
help_c2v_epi8 = _mm256_sign_epi8(vp->this_c2v_epi8[j], final_sign_epi8);
vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->this_c2v_epi8[j], help_c2v_epi8, final_sign_epi8);
}
// rotating right LS - shift positions is the same as rotating left shift positions
rotate_node_right(vp->this_c2v_epi8, this_check_to_var + i_v2c_base, vp->ls - shift, vp->ls, vp->n_subnodes);
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_c_avx2long_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_avx2long_flood* vp = p;
if (p == NULL) {
return -1;
}
int i_layer = 0;
int i = 0;
int j = 0;
__m256i* this_check_to_var = NULL;
int i_bit_tmp_base = 0;
int i_bit_subnode = 0;
__m256i tmp_epi8;
__m256i mask_epi8;
int8_t current_var_index = 0;
int current_var_index_subnode = 0;
for (i = 0; i < vp->bgN; i++) {
for (j = 0; j < vp->n_subnodes; j++) {
vp->soft_bits[i * vp->n_subnodes + j].v = vp->llrs[i * vp->n_subnodes + j];
}
}
for (i_layer = 0; i_layer < vp->bgM; i_layer++) {
current_var_index = these_var_indices[i_layer][0];
this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
current_var_index_subnode = current_var_index * vp->n_subnodes;
for (j = 0; j < vp->n_subnodes; j++) {
i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
i_bit_subnode = i_bit_tmp_base * vp->n_subnodes + j;
tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_subnode], vp->soft_bits[current_var_index_subnode + j].v);
mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
tmp_epi8 = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
vp->soft_bits[current_var_index_subnode + j].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
}
current_var_index = these_var_indices[i_layer][i + 1];
}
}
return 0;
}
int extract_ldpc_message_c_avx2long_flood(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs_c_avx2long_flood* vp = p;
int j = 0;
int k = 0;
for (int i = 0; i < liftK / vp->ls; i++) {
for (j = 0; j < vp->n_subnodes; j++) {
for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < vp->ls); k++) {
message[i * vp->ls + j * SRSLTE_AVX2_B_SIZE + k] = (vp->soft_bits[i * vp->n_subnodes + j].c[k] < 0);
}
}
}
return 0;
}
static void
inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
{
unsigned i = 0;
__m256i x_epi8;
__m256i y_epi8;
__m256i z_epi8;
__m256i mask_epi8;
__m256i help_sub_epi8;
__m256i clip_epi8 = _mm256_set1_epi8(clip);
__m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
for (i = 0; i < len; i++) {
x_epi8 = x[i];
y_epi8 = y[i];
help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
mask_epi8 = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
z_epi8 = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
z_epi8 = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
z_epi8 = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
z[i] = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
}
}
static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes)
{
const int8_t* in = (const int8_t*)in_256i;
int16_t n_type1 = (ls - shift) / SRSLTE_AVX2_B_SIZE - (ls == SRSLTE_AVX2_B_SIZE);
int16_t n_type2 = n_subnodes - n_type1 - 1 - (ls == SRSLTE_AVX2_B_SIZE);
int16_t gap = (ls - shift) % SRSLTE_AVX2_B_SIZE;
int16_t i = 0;
for (; i < n_type1; i++) {
out[i] = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
}
__m256i tmp1 = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
__m256i tmp2 = _mm256_loadu_si256((const __m256i*)(in - gap));
out[i] = _mm256_blendv_epi8(tmp1, tmp2, mask_most_epi8[gap]);
for (i = 1; i <= n_type2; i++) {
out[n_type1 + i] = _mm256_loadu_si256((const __m256i*)(in - gap + i * SRSLTE_AVX2_B_SIZE));
}
}
static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
{
__m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
__m256i odd_epi16 = _mm256_srli_epi16(a, 8);
__m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
__m256i p_odd_epi16 = _mm256_mulhi_epu16(odd_epi16, sf);
p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,391 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_c_flood.c
* \brief Definition of the LDPC decoder inner functions working
* with 8-bit integer-valued LLRs. Flooded scheduling.
*
* Even if the inner representation is based on 8 bits, check-to-variable and
* variable-to-check messages are actually represented with 7 bits, the
* remaining bit is used to represent infinity.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <strings.h>
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
#define F2I 100 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
/*!
* \brief Maximum message magnitude.
* Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
*/
static const int8_t infinity7 = (1U << 6U) - 1;
/*!
* \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs (flooded scheduling).
*/
struct ldpc_regs_c_flood {
int8_t* llrs; /*!< \brief A-priori log-likelihood ratios. */
int8_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
int8_t* check_to_var; /*!< \brief Check-to-variable messages. */
int8_t* var_to_check; /*!< \brief Variable-to-check messages. */
int8_t (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
int* min_v_index; /*!< \brief Helper register for computing check-to-variable messages. */
int* prod_v2c; /*!< \brief Helper register for computing check-to-variable messages. */
uint16_t liftN; /*!< \brief Total number of variable nodes (after lifting). */
uint16_t hrrN; /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint16_t ls; /*!< \brief Lifting size. */
int scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
};
/*!
* Carries out the actual update of the variable-to-check messages. It basically
* consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
* \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
* ensure it is properly propagated. Also, the subtraction is saturated between
* \f$- clip\f$ and \f$+ clip\f$.
* \param[in] x Minuend: array we subtract from (in practice, the soft bits).
* \param[in] y Subtrahend: array to be subtracted (in practice, the
* check-to-variable messages).
* \param[out] z Resulting difference array(in practice, the updated
* variable-to-check messages).
* \param[in] clip The saturation value.
* \param[in] len The length of the vectors.
*/
static void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, uint8_t clip, uint32_t len);
void* create_ldpc_dec_c_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs_c_flood* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t liftN = bgN * ls;
uint16_t hrrN = (bgK + 4) * ls;
if ((vp = malloc(sizeof(struct ldpc_regs_c_flood))) == NULL) {
return NULL;
}
if ((vp->llrs = srslte_vec_i8_malloc(liftN)) == NULL) {
free(vp);
return NULL;
}
if ((vp->soft_bits = srslte_vec_i8_malloc(liftN)) == NULL) {
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->check_to_var = srslte_vec_i8_malloc((hrrN + ls) * bgM)) == NULL) {
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->var_to_check = srslte_vec_i8_malloc((hrrN + ls) * bgM)) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->min_v2c = malloc(ls * sizeof(int8_t[2]))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->liftN = liftN;
vp->hrrN = hrrN;
vp->ls = ls;
vp->scaling_fctr = (int)(scaling_fctr * F2I);
return vp;
}
void delete_ldpc_dec_c_flood(void* p)
{
struct ldpc_regs_c_flood* vp = p;
if (vp != NULL) {
free(vp->prod_v2c);
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp->llrs);
free(vp);
}
}
int init_ldpc_dec_c_flood(void* p, const int8_t* llrs, uint16_t ls)
{
struct ldpc_regs_c_flood* vp = p;
int i = 0;
int skip = 2 * ls;
if (p == NULL) {
return -1;
}
bzero(vp->llrs, skip * sizeof(int8_t));
bzero(vp->soft_bits, skip * sizeof(int8_t));
for (i = skip; i < vp->liftN; i++) {
vp->llrs[i] = llrs[i - skip];
vp->soft_bits[i] = llrs[i - skip];
}
bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int8_t));
bzero(vp->var_to_check, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int8_t));
return 0;
}
int update_ldpc_var_to_check_c_flood(void* p, int i_layer)
{
struct ldpc_regs_c_flood* vp = p;
if (p == NULL) {
return -1;
}
int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
int8_t* this_var_to_check = vp->var_to_check + i_layer * (vp->hrrN + vp->ls);
// Update the high-rate region.
inner_var_to_check_c(vp->soft_bits, this_check_to_var, this_var_to_check, infinity7, vp->hrrN);
if (i_layer >= 4) {
// Update the extension region.
inner_var_to_check_c(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
this_check_to_var + vp->hrrN,
this_var_to_check + vp->hrrN,
infinity7,
vp->ls);
}
return 0;
}
int update_ldpc_check_to_var_c_flood(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_flood* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int j = 0;
for (i = 0; i < vp->ls; i++) {
vp->prod_v2c[i] = 1;
for (j = 0; j < 2; j++) {
vp->min_v2c[i][j] = INT8_MAX;
}
}
uint16_t shift = 0;
int index = 0;
int8_t this_v2c = 0;
int is_min = 0;
int i_v2c = 0;
int i_v2c_base = 0;
int8_t current_var_index = (*these_var_indices)[0];
int8_t* this_var_to_check = vp->var_to_check + i_layer * (vp->hrrN + vp->ls);
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_v2c = abs(this_var_to_check[i_v2c]);
is_min = this_v2c < vp->min_v2c[index][0];
vp->min_v2c[index][1] =
(this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
vp->min_v2c[index][0] = is_min ? this_v2c : vp->min_v2c[index][0];
vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
vp->prod_v2c[index] *= (this_var_to_check[i_v2c] >= 0) ? 1 : -1;
}
current_var_index = (*these_var_indices)[i + 1];
}
int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
this_check_to_var[i_v2c] = this_check_to_var[i_v2c] * vp->scaling_fctr / F2I;
this_check_to_var[i_v2c] *= vp->prod_v2c[index] * ((this_var_to_check[i_v2c] >= 0) ? 1 : -1);
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_c_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_c_flood* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int j = 0;
int i_layer = 0;
int i_bit = 0;
int i_bit_tmp = 0;
int8_t current_var_index = 0;
int current_var_index_ext = 0;
int8_t* this_check_to_var = NULL;
long tmp = 0;
for (i = 0; i < vp->liftN; i++) {
vp->soft_bits[i] = vp->llrs[i];
}
for (i_layer = 0; i_layer < vp->bgM; i_layer++) {
current_var_index = these_var_indices[i_layer][0];
this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
// recall that current_var_index depends on i!
current_var_index_ext = current_var_index * vp->ls;
for (j = 0; j < vp->ls; j++) {
i_bit = current_var_index_ext + j;
i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
tmp = (long)this_check_to_var[i_bit_tmp] + vp->soft_bits[i_bit];
if (tmp > infinity7) {
tmp = INT8_MAX;
}
if (tmp < -infinity7) {
tmp = -INT8_MAX;
}
vp->soft_bits[i_bit] = (int8_t)tmp;
}
current_var_index = these_var_indices[i_layer][i + 1];
}
}
return 0;
}
int extract_ldpc_message_c_flood(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs_c_flood* vp = p;
for (int i = 0; i < liftK; i++) {
message[i] = (vp->soft_bits[i] < 0);
}
return 0;
}
void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, const uint8_t clip, const uint32_t len)
{
unsigned i = 0;
long tmp = 0;
const long infinity8 = (1U << 7U) - 1; // Max positive value in 8-bit representation
for (i = 0; i < len; i++) {
if (x[i] >= infinity8) {
z[i] = infinity8;
continue;
}
if (x[i] <= -infinity8) {
z[i] = -infinity8;
continue;
}
tmp = (long)x[i] - y[i];
if (tmp > clip) {
tmp = clip;
}
if (tmp < -clip) {
tmp = -clip;
}
z[i] = (int8_t)tmp;
}
}

@ -0,0 +1,302 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_f.c
* \brief Definition of the LDPC decoder inner functions working
* with float-valued LLRs.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include "ldpc_dec_all.h"
#include "math.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
/*!
* \brief Inner registers for the LDPC decoder that works with real-valued LLRs.
*/
struct ldpc_regs {
float* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
float* check_to_var; /*!< \brief Check-to-variable messages. */
float* var_to_check; /*!< \brief Variable-to-check messages. */
float (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
int* min_v_index; /*!< \brief Helper register for computing check-to-variable messages. */
int* prod_v2c; /*!< \brief Helper register for computing check-to-variable messages. */
uint16_t liftN; /*!< \brief Total number of variable nodes (after lifting). */
uint16_t hrrN; /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint16_t ls; /*!< \brief Lifting size. */
float scaling_fctr; /*!< Scaling factor for the normalized min-sum decoding algorithm. */
};
void* create_ldpc_dec_f(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t liftN = bgN * ls;
uint16_t hrrN = (bgK + 4) * ls;
if ((vp = malloc(sizeof(struct ldpc_regs))) == NULL) {
return NULL;
}
if ((vp->soft_bits = srslte_vec_f_malloc(liftN)) == NULL) {
free(vp);
return NULL;
}
if ((vp->check_to_var = srslte_vec_f_malloc((hrrN + ls) * bgM)) == NULL) {
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->var_to_check = srslte_vec_f_malloc((hrrN + ls))) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->min_v2c = malloc(ls * sizeof(float[2]))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->liftN = liftN;
vp->hrrN = hrrN;
vp->ls = ls;
vp->scaling_fctr = scaling_fctr;
return vp;
}
void delete_ldpc_dec_f(void* p)
{
struct ldpc_regs* vp = p;
if (vp != NULL) {
free(vp->prod_v2c);
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
}
}
int init_ldpc_dec_f(void* p, const float* llrs, uint16_t ls)
{
struct ldpc_regs* vp = p;
int i = 0;
int skip = 2 * ls;
if (p == NULL) {
return -1;
}
bzero(vp->soft_bits, skip * sizeof(float));
for (i = skip; i < vp->liftN; i++) {
vp->soft_bits[i] = llrs[i - skip];
}
bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(float));
bzero(vp->var_to_check, (vp->hrrN + vp->ls) * sizeof(float));
return 0;
}
int update_ldpc_var_to_check_f(void* p, int i_layer)
{
struct ldpc_regs* vp = p;
if (p == NULL) {
return -1;
}
float* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
// Update the high-rate region.
srslte_vec_sub_fff(vp->soft_bits, this_check_to_var, vp->var_to_check, vp->hrrN);
if (i_layer >= 4) {
// Update the extension region.
srslte_vec_sub_fff(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
this_check_to_var + vp->hrrN,
vp->var_to_check + vp->hrrN,
vp->ls);
}
return 0;
}
int update_ldpc_check_to_var_f(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int j = 0;
for (i = 0; i < vp->ls; i++) {
vp->prod_v2c[i] = 1;
for (j = 0; j < 2; j++) {
vp->min_v2c[i][j] = INFINITY;
}
}
uint16_t shift = 0;
int index = 0;
float this_v2c = NAN;
int is_min = 0;
int i_v2c_base = 0;
int i_v2c = 0;
int8_t current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_v2c = fabsf(vp->var_to_check[i_v2c]);
is_min = this_v2c < vp->min_v2c[index][0];
vp->min_v2c[index][1] =
(this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
vp->min_v2c[index][0] = is_min ? this_v2c : vp->min_v2c[index][0];
vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
vp->prod_v2c[index] *= (vp->var_to_check[i_v2c] >= 0) ? 1 : -1;
}
current_var_index = (*these_var_indices)[i + 1];
}
float* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
this_check_to_var[i_v2c] *= vp->scaling_fctr;
this_check_to_var[i_v2c] *= (float)vp->prod_v2c[index] * ((vp->var_to_check[i_v2c] >= 0) ? 1.F : -1.F);
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_f(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs* vp = p;
if (p == NULL) {
return -1;
}
int i_bit = 0;
int i_bit_tmp = 0;
float* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
float* this_var_to_check = vp->var_to_check;
int8_t current_var_index = (*these_var_indices)[0];
int current_var_index_ext = 0;
for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
current_var_index_ext = current_var_index * vp->ls;
for (int j = 0; j < vp->ls; j++) {
i_bit = current_var_index_ext + j;
i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
vp->soft_bits[i_bit] = this_check_to_var[i_bit_tmp] + this_var_to_check[i_bit_tmp];
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int extract_ldpc_message_f(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs* vp = p;
for (int i = 0; i < liftK; i++) {
message[i] = (vp->soft_bits[i] < 0);
}
return 0;
}

@ -0,0 +1,364 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_s.c
* \brief Definition of the LDPC decoder inner functions working
* with 16-bit integer-valued LLRs.
*
* Even if the inner representation is based on 16 bits, check-to-variable and
* variable-to-check messages are actually represented with 15 bits, the
* remaining bit is used to represent infinity.
*
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include <stdlib.h>
#include <strings.h>
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/utils/vector.h"
#define F2I 100 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
/*!
* \brief Maximum message magnitude.
* Messages use a 15-bit quantization. Soft bits use the remaining bit to denote infinity.
*/
const int16_t infinity15 = (1U << 14U) - 1;
/*!
* \brief Inner registers for the LDPC decoder that works with 16-bit integer-valued LLRs.
*/
struct ldpc_regs_s {
int16_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
int16_t* check_to_var; /*!< \brief Check-to-variable messages. */
int16_t* var_to_check; /*!< \brief Variable-to-check messages. */
int16_t (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
int* min_v_index; /*!< \brief Helper register for computing check-to-variable messages. */
int* prod_v2c; /*!< \brief Helper register for computing check-to-variable messages. */
uint16_t liftN; /*!< \brief Total number of variable nodes (after lifting). */
uint16_t hrrN; /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
uint8_t bgM; /*!< \brief Number of check nodes (before lifting). */
uint16_t ls; /*!< \brief Lifting size. */
int scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
};
/*!
* Carries out the actual update of the variable-to-check messages. It basically
* consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
* \f$\lvert x[i] \rvert = 2^{15}-1 \f$ (our representation of infinity) to
* ensure it is properly propagated. Also, the subtraction is saturated between
* \f$- clip\f$ and \f$+ clip\f$.
* \param[in] x Minuend: array we subtract from (in practice, the soft bits).
* \param[in] y Subtrahend: array to be subtracted (in practice, the
* check-to-variable messages).
* \param[out] z Resulting difference array(in practice, the updated
* variable-to-check messages).
* \param[in] clip The saturation value.
* \param[in] len The length of the vectors.
*/
static void inner_var_to_check_s(const int16_t* x, const int16_t* y, int16_t* z, uint16_t clip, uint32_t len);
void* create_ldpc_dec_s(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
{
struct ldpc_regs_s* vp = NULL;
uint8_t bgK = bgN - bgM;
uint16_t liftN = bgN * ls;
uint16_t hrrN = (bgK + 4) * ls;
if ((vp = malloc(sizeof(struct ldpc_regs_s))) == NULL) {
return NULL;
}
if ((vp->soft_bits = malloc(liftN * sizeof(int16_t))) == NULL) {
free(vp);
return NULL;
}
if ((vp->check_to_var = malloc((hrrN + ls) * bgM * sizeof(int16_t))) == NULL) {
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->var_to_check = malloc((hrrN + ls) * sizeof(int16_t))) == NULL) {
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->min_v2c = malloc(ls * sizeof(int16_t[2]))) == NULL) {
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
return NULL;
}
vp->bgM = bgM;
vp->liftN = liftN;
vp->hrrN = hrrN;
vp->ls = ls;
vp->scaling_fctr = (int)(scaling_fctr * F2I);
return vp;
}
void delete_ldpc_dec_s(void* p)
{
struct ldpc_regs_s* vp = p;
if (vp != NULL) {
free(vp->prod_v2c);
free(vp->min_v_index);
free(vp->min_v2c);
free(vp->var_to_check);
free(vp->check_to_var);
free(vp->soft_bits);
free(vp);
}
}
int init_ldpc_dec_s(void* p, const int16_t* llrs, uint16_t ls)
{
struct ldpc_regs_s* vp = p;
int i = 0;
int skip = 2 * ls;
if (p == NULL) {
return -1;
}
bzero(vp->soft_bits, skip * sizeof(int16_t));
for (i = skip; i < vp->liftN; i++) {
vp->soft_bits[i] = llrs[i - skip];
}
bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int16_t));
bzero(vp->var_to_check, (vp->hrrN + vp->ls) * sizeof(int16_t));
return 0;
}
int update_ldpc_var_to_check_s(void* p, int i_layer)
{
struct ldpc_regs_s* vp = p;
if (p == NULL) {
return -1;
}
int16_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
// Update the high-rate region.
inner_var_to_check_s(vp->soft_bits, this_check_to_var, vp->var_to_check, infinity15, vp->hrrN);
if (i_layer >= 4) {
// Update the extension region.
inner_var_to_check_s(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
this_check_to_var + vp->hrrN,
vp->var_to_check + vp->hrrN,
infinity15,
vp->ls);
}
return 0;
}
int update_ldpc_check_to_var_s(void* p,
int i_layer,
const uint16_t* this_pcm,
const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_s* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int j = 0;
for (i = 0; i < vp->ls; i++) {
vp->prod_v2c[i] = 1;
for (j = 0; j < 2; j++) {
vp->min_v2c[i][j] = INT16_MAX;
}
}
uint16_t shift = 0;
int index = 0;
int16_t this_v2c = 0;
int is_min = 0;
int i_v2c = 0;
int i_v2c_base = 0;
int8_t current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_v2c = abs(vp->var_to_check[i_v2c]);
is_min = this_v2c < vp->min_v2c[index][0];
vp->min_v2c[index][1] =
(this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
vp->min_v2c[index][0] = is_min ? this_v2c : vp->min_v2c[index][0];
vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
vp->prod_v2c[index] *= (vp->var_to_check[i_v2c] >= 0) ? 1 : -1;
}
current_var_index = (*these_var_indices)[i + 1];
}
int16_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
current_var_index = (*these_var_indices)[0];
for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
shift = this_pcm[current_var_index];
i_v2c_base = current_var_index * vp->ls;
i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
for (j = 0; j < vp->ls; j++) {
index = (j + vp->ls - shift) % vp->ls;
i_v2c = i_v2c_base + j;
this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
this_check_to_var[i_v2c] = this_check_to_var[i_v2c] * vp->scaling_fctr / F2I;
this_check_to_var[i_v2c] *= vp->prod_v2c[index] * ((vp->var_to_check[i_v2c] >= 0) ? 1 : -1);
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int update_ldpc_soft_bits_s(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
{
struct ldpc_regs_s* vp = p;
if (p == NULL) {
return -1;
}
int i_bit = 0;
int i_bit_tmp = 0;
int16_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
int16_t* this_var_to_check = vp->var_to_check;
long tmp = 0;
int8_t current_var_index = (*these_var_indices)[0];
int current_var_index_ext = 0;
for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
current_var_index_ext = current_var_index * vp->ls;
for (int j = 0; j < vp->ls; j++) {
i_bit = current_var_index_ext + j;
i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
tmp = (long)this_check_to_var[i_bit_tmp] + this_var_to_check[i_bit_tmp];
if (tmp > infinity15) {
tmp = INT16_MAX;
}
if (tmp < -infinity15) {
tmp = -INT16_MAX;
}
vp->soft_bits[i_bit] = (int16_t)tmp;
}
current_var_index = (*these_var_indices)[i + 1];
}
return 0;
}
int extract_ldpc_message_s(void* p, uint8_t* message, uint16_t liftK)
{
if (p == NULL) {
return -1;
}
struct ldpc_regs_s* vp = p;
for (int i = 0; i < liftK; i++) {
message[i] = (vp->soft_bits[i] < 0);
}
return 0;
}
void inner_var_to_check_s(const int16_t* x, const int16_t* y, int16_t* z, const uint16_t clip, const uint32_t len)
{
unsigned i = 0;
long tmp = 0;
const long infinity16 = (1U << 15U) - 1; // Max positive value in 16-bit representation
for (i = 0; i < len; i++) {
if (x[i] >= infinity16) {
z[i] = infinity16;
continue;
}
if (x[i] <= -infinity16) {
z[i] = -infinity16;
continue;
}
tmp = (long)x[i] - y[i];
if (tmp > clip) {
tmp = clip;
}
if (tmp < -clip) {
tmp = -clip;
}
z[i] = (int16_t)tmp;
}
}

@ -0,0 +1,785 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_decoder.c
* \brief Definition of the LDPC decoder.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include "../utils_avx2.h"
#include "ldpc_dec_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/vector.h"
#define MAX_ITERATIONS 10 /*!< \brief Iterations of the BP algorithm. */
/*! Carries out the actual destruction of the memory allocated to the decoder, float-LLR case. */
static void free_dec_f(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_f(q->ptr);
}
/*! Carries out the decoding with real-valued LLRs. */
static int decode_f(void* o, const float* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_f(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_f(q->ptr, i_layer);
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_f(q->ptr, i_layer, this_pcm, these_var_indices);
update_ldpc_soft_bits_f(q->ptr, i_layer, these_var_indices);
}
}
extract_ldpc_message_f(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with real valued LLRs. */
static int init_f(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_f;
if ((q->ptr = create_ldpc_dec_f(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_f(q);
return -1;
}
q->decode_f = decode_f;
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the decoder, 16-bit-LLR case. */
static void free_dec_s(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_s(q->ptr);
}
/*! Carries out the decoding with 16-bit integer-valued LLRs. */
static int decode_s(void* o, const int16_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_s(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_s(q->ptr, i_layer);
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_s(q->ptr, i_layer, this_pcm, these_var_indices);
update_ldpc_soft_bits_s(q->ptr, i_layer, these_var_indices);
}
}
extract_ldpc_message_s(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with 16-bit integer-valued LLRs. */
static int init_s(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_s;
if ((q->ptr = create_ldpc_dec_s(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_s(q);
return -1;
}
q->decode_s = decode_s;
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case. */
static void free_dec_c(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_c(q->ptr);
}
/*! Carries out the decoding with 8-bit integer-valued LLRs. */
static int decode_c(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_c(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_c(q->ptr, i_layer);
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_c(q->ptr, i_layer, this_pcm, these_var_indices);
update_ldpc_soft_bits_c(q->ptr, i_layer, these_var_indices);
}
}
extract_ldpc_message_c(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with 8-bit integer-valued LLRs. */
static int init_c(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_c;
if ((q->ptr = create_ldpc_dec_c(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_c(q);
return -1;
}
q->decode_c = decode_c;
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR flooded case. */
static void free_dec_c_flood(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_c_flood(q->ptr);
}
/*! Carries out the decoding with 8-bit integer-valued LLRs, flooded scheduling. */
static int decode_c_flood(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_c_flood(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < 2 * MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_c_flood(q->ptr, i_layer);
}
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_c_flood(q->ptr, i_layer, this_pcm, these_var_indices);
}
update_ldpc_soft_bits_c_flood(q->ptr, q->var_indices);
}
extract_ldpc_message_c_flood(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with 8-bit integer-valued LLRs. */
static int init_c_flood(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_c_flood;
if ((q->ptr = create_ldpc_dec_c_flood(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_c_flood(q);
return -1;
}
q->decode_c = decode_c_flood;
return 0;
}
#ifdef LV_HAVE_AVX2
/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case (AVX2 implementation). */
static void free_dec_c_avx2(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_c_avx2(q->ptr);
}
/*! Carries out the decoding with 8-bit integer-valued LLRs (AVX2 implementation). */
static int decode_c_avx2(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_c_avx2(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_c_avx2(q->ptr, i_layer);
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_c_avx2(q->ptr, i_layer, this_pcm, these_var_indices);
update_ldpc_soft_bits_c_avx2(q->ptr, i_layer, these_var_indices);
}
}
extract_ldpc_message_c_avx2(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with 8-bit integer-valued LLRs (AVX2 implementation). */
static int init_c_avx2(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_c_avx2;
if ((q->ptr = create_ldpc_dec_c_avx2(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_c_avx2(q);
return -1;
}
q->decode_c = decode_c_avx2;
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case (AVX2 implementation,
* large lifting size). */
static void free_dec_c_avx2long(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_c_avx2long(q->ptr);
}
/*! Carries out the decoding with 8-bit integer-valued LLRs (AVX2 implementation, large lifting size). */
static int decode_c_avx2long(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_c_avx2long(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_c_avx2long(q->ptr, i_layer);
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_c_avx2long(q->ptr, i_layer, this_pcm, these_var_indices);
update_ldpc_soft_bits_c_avx2long(q->ptr, i_layer, these_var_indices);
}
}
extract_ldpc_message_c_avx2long(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with 8-bit integer-valued LLRs (AVX2 implementation, large lifting size). */
static int init_c_avx2long(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_c_avx2long;
if ((q->ptr = create_ldpc_dec_c_avx2long(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_c_avx2long(q);
return -1;
}
q->decode_c = decode_c_avx2long;
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case (AVX2 implementation,
* flooded scheduling). */
static void free_dec_c_avx2_flood(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_c_avx2_flood(q->ptr);
}
/*! Carries out the decoding with 8-bit integer-valued LLRs (AVX2 implementation, flooded scheduling). */
static int decode_c_avx2_flood(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_c_avx2_flood(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < 2 * MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_c_avx2_flood(q->ptr, i_layer);
}
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_c_avx2_flood(q->ptr, i_layer, this_pcm, these_var_indices);
}
update_ldpc_soft_bits_c_avx2_flood(q->ptr, q->var_indices);
}
extract_ldpc_message_c_avx2_flood(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with 8-bit integer-valued LLRs (AVX2 implementation, flooded scheduling). */
static int init_c_avx2_flood(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_c_avx2_flood;
if ((q->ptr = create_ldpc_dec_c_avx2_flood(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_c_avx2_flood(q);
return -1;
}
q->decode_c = decode_c_avx2_flood;
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case
* (flooded scheduling, AVX2 implementation, large lifting size). */
static void free_dec_c_avx2long_flood(void* o)
{
srslte_ldpc_decoder_t* q = o;
if (q->var_indices) {
free(q->var_indices);
}
if (q->pcm) {
free(q->pcm);
}
delete_ldpc_dec_c_avx2long_flood(q->ptr);
}
/*! Carries out the decoding with 8-bit integer-valued LLRs (flooded scheduling, AVX2 implementation, large lifting
* size). */
static int decode_c_avx2long_flood(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
srslte_ldpc_decoder_t* q = o;
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
init_ldpc_dec_c_avx2long_flood(q->ptr, llrs, q->ls);
uint16_t* this_pcm = NULL;
int8_t(*these_var_indices)[MAX_CNCT] = NULL;
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
for (int i_iteration = 0; i_iteration < 2 * MAX_ITERATIONS; i_iteration++) {
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
update_ldpc_var_to_check_c_avx2long_flood(q->ptr, i_layer);
}
for (int i_layer = 0; i_layer < n_layers; i_layer++) {
this_pcm = q->pcm + i_layer * q->bgN;
these_var_indices = q->var_indices + i_layer;
update_ldpc_check_to_var_c_avx2long_flood(q->ptr, i_layer, this_pcm, these_var_indices);
}
update_ldpc_soft_bits_c_avx2long_flood(q->ptr, q->var_indices);
}
extract_ldpc_message_c_avx2long_flood(q->ptr, message, q->liftK);
return 0;
}
/*! Initializes the decoder to work with 8-bit integer-valued LLRs
* (flooded scheduling, AVX2 implementation, large lifting size). */
static int init_c_avx2long_flood(srslte_ldpc_decoder_t* q)
{
q->free = free_dec_c_avx2long_flood;
if ((q->ptr = create_ldpc_dec_c_avx2long_flood(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
ERROR("Create_ldpc_dec failed\n");
free_dec_c_avx2long(q);
return -1;
}
q->decode_c = decode_c_avx2long_flood;
return 0;
}
#endif // LV_HAVE_AVX2
int srslte_ldpc_decoder_init(srslte_ldpc_decoder_t* q,
srslte_ldpc_decoder_type_t type,
srslte_basegraph_t bg,
uint16_t ls,
float scaling_fctr)
{
int ls_index = get_ls_index(ls);
if (ls_index == VOID_LIFTSIZE) {
ERROR("Invalid lifting size %d\n", ls);
return -1;
}
switch (bg) {
case BG1:
q->bgN = BG1Nfull;
q->bgM = BG1M;
break;
case BG2:
q->bgN = BG2Nfull;
q->bgM = BG2M;
break;
default:
ERROR("Base Graph BG%d does not exist\n", bg + 1);
return -1;
}
q->bg = bg;
q->bgK = q->bgN - q->bgM;
q->ls = ls;
q->liftK = ls * q->bgK;
q->liftM = ls * q->bgM;
q->liftN = ls * q->bgN;
q->pcm = srslte_vec_malloc(q->bgM * q->bgN * sizeof(uint16_t));
if (!q->pcm) {
perror("malloc");
return -1;
}
q->var_indices = srslte_vec_malloc(q->bgM * sizeof(int8_t[MAX_CNCT]));
if (create_compact_pcm(q->pcm, q->var_indices, q->bg, q->ls) != 0) {
perror("Create PCM");
free(q->var_indices);
free(q->pcm);
return -1;
}
if ((scaling_fctr <= 0) || (scaling_fctr > 1)) {
perror("The scaling factor of the min-sum algorithm should be larger than 0 and not larger than 1.");
free(q->var_indices);
free(q->pcm);
return -1;
}
q->scaling_fctr = scaling_fctr;
switch (type) {
case SRSLTE_LDPC_DECODER_F:
return init_f(q);
case SRSLTE_LDPC_DECODER_S:
return init_s(q);
case SRSLTE_LDPC_DECODER_C:
return init_c(q);
case SRSLTE_LDPC_DECODER_C_FLOOD:
return init_c_flood(q);
#ifdef LV_HAVE_AVX2
case SRSLTE_LDPC_DECODER_C_AVX2:
if (ls <= SRSLTE_AVX2_B_SIZE) {
return init_c_avx2(q);
} else {
return init_c_avx2long(q);
}
case SRSLTE_LDPC_DECODER_C_AVX2_FLOOD:
if (ls <= SRSLTE_AVX2_B_SIZE) {
return init_c_avx2_flood(q);
} else {
return init_c_avx2long_flood(q);
}
#endif // LV_HAVE_AVX2
default:
ERROR("Unknown decoder.\n");
return -1;
}
}
void srslte_ldpc_decoder_free(srslte_ldpc_decoder_t* q)
{
if (q->free) {
q->free(q);
}
bzero(q, sizeof(srslte_ldpc_decoder_t));
}
int srslte_ldpc_decoder_decode_f(srslte_ldpc_decoder_t* q, const float* llrs, uint8_t* message, uint32_t cdwd_rm_length)
{
return q->decode_f(q, llrs, message, cdwd_rm_length);
}
int srslte_ldpc_decoder_decode_s(srslte_ldpc_decoder_t* q,
const int16_t* llrs,
uint8_t* message,
uint32_t cdwd_rm_length)
{
return q->decode_s(q, llrs, message, cdwd_rm_length);
}
int srslte_ldpc_decoder_decode_c(srslte_ldpc_decoder_t* q,
const int8_t* llrs,
uint8_t* message,
uint32_t cdwd_rm_length)
{
return q->decode_c(q, llrs, message, cdwd_rm_length);
}

@ -0,0 +1,211 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_enc_all.h
* \brief Declaration of the LDPC encoder inner functions.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef SRSLTE_LDPCENC_ALL_H
#define SRSLTE_LDPCENC_ALL_H
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
/*! Computes the product between the first (K - 2) columns of the PCM and the systematic bits.
* \param[in,out] q A pointer to an encoder.
* \param[in] input The message to encode.
*/
void preprocess_systematic_bits(srslte_ldpc_encoder_t* q, const uint8_t* input);
/*! Computes the high-rate parity bits for BG1 and ls_index in {0, 1, 2, 3, 4, 5, 7}.
* \param[in] o A pointer to an encoder.
* \param[out] output The resulting codeword.
*/
void encode_high_rate_case1(void* o, uint8_t* output);
/*! Computes the high-rate parity bits for BG1 and ls_index in {6}.
* \param[in] o A pointer to an encoder.
* \param[out] output The resulting codeword.
*/
void encode_high_rate_case2(void* o, uint8_t* output);
/*! Computes the high-rate parity bits for BG2 and ls_index in {0, 1, 2, 4, 5, 6}.
* \param[in] o A pointer to an encoder.
* \param[out] output The resulting codeword.
*/
void encode_high_rate_case3(void* o, uint8_t* output);
/*! Computes the high-rate parity bits for BG2 and ls_index in {3, 7}.
* \param[in] o A pointer to an encoder.
* \param[out] output The resulting codeword.
*/
void encode_high_rate_case4(void* o, uint8_t* output);
/*! Computes the extended-region parity bits.
* \param[in] q A pointer to an encoder.
* \param[out] output The resulting codeword.
* \param[in] n_layers The number of layers to process (when doing rate matching not all
* layers are needed).
*/
void encode_ext_region(srslte_ldpc_encoder_t* q, uint8_t* output, uint8_t n_layers);
/*!
* Creates the inner registers required by the optimized LDPC encoder (LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] q A pointer to an encoder.
* \return A pointer to the newly created structure of registers.
*/
void* create_ldpc_enc_avx2(srslte_ldpc_encoder_t* q);
/*!
* Deletes the inner registers of an optimized LDPC encoder (LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p A pointer to the register structure.
*/
void delete_ldpc_enc_avx2(void* p);
/*!
* Loads the message in the opimized encoder registers (LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p The register structure.
* \param[in] input The message to encode.
* \param[in] msg_len Number of variable nodes in one message.
* \param[in] cdwd_len Number of variable nodes in one message.
* \param[in] ls The lifting size.
* \return Error code: 0 if correct, -1 otherwise.
*/
int load_avx2(void* p, const uint8_t* input, uint8_t msg_len, uint8_t cdwd_len, uint16_t ls);
/*! Extracts the final codeword from the optimized encoder registers (LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p The register structure.
* \param[out] output The output codeword.
* \param[in] cdwd_len The number of variable nodes (after rate-matching, if enabled).
* \param[in] ls The lifting size.
* \return Error code: 0 if correct, -1 otherwise.
*/
int return_codeword_avx2(void* p, uint8_t* output, uint8_t cdwd_len, uint16_t ls);
/*! Computes the product between the first (K - 2) columns of the PCM and the
* systematic bits (SIMD-optimized version, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] q A pointer to an encoder.
*/
void preprocess_systematic_bits_avx2(srslte_ldpc_encoder_t* q);
/*! Computes the high-rate parity bits for BG1 and ls_index in {0, 1, 2, 3, 4, 5, 7}
* (SIMD-optimized version, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] o A pointer to an encoder.
*/
void encode_high_rate_case1_avx2(void* o);
/*! Computes the high-rate parity bits for BG1 and ls_index in {6} (SIMD-optimized version, LS <= \ref
* SRSLTE_AVX2_B_SIZE). \param[in,out] q A pointer to an encoder.
*/
void encode_high_rate_case2_avx2(void* o);
/*! Computes the high-rate parity bits for BG2 and ls_index in {0, 1, 2, 4, 5, 6} (SIMD-optimized version, LS <= \ref
* SRSLTE_AVX2_B_SIZE). \param[in,out] q A pointer to an encoder.
*/
void encode_high_rate_case3_avx2(void* o);
/*! Computes the high-rate parity bits for BG2 and ls_index in {3, 7} (SIMD-optimized version, LS <= \ref
* SRSLTE_AVX2_B_SIZE). \param[in,out] q A pointer to an encoder.
*/
void encode_high_rate_case4_avx2(void* o);
/*! Computes the extended-region parity bits (SIMD-optimized version, LS <= \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] q A pointer to an encoder.
* \param[in] n_layers The number of layers to process (when doing rate matching not all
* layers are needed).
*/
void encode_ext_region_avx2(srslte_ldpc_encoder_t* q, uint8_t n_layers);
/*!
* Creates the inner registers required by the optimized LDPC encoder (for LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] q A pointer to an encoder.
* \return A pointer to the newly created structure of registers.
*/
void* create_ldpc_enc_avx2long(srslte_ldpc_encoder_t* q);
/*!
* Deletes the inner registers of an optimized LDPC encoder (LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p A pointer to the register structure.
*/
void delete_ldpc_enc_avx2long(void* p);
/*!
* Loads the message in the optimized encoder registers (LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p The register structure.
* \param[in] input The message to encode.
* \param[in] msg_len Number of variable nodes in one message.
* \param[in] cdwd_len Number of variable nodes in one message.
* \param[in] ls The lifting size.
* \return Error code: 0 if correct, -1 otherwise.
*/
int load_avx2long(void* p, const uint8_t* input, uint8_t msg_len, uint8_t cdwd_len, uint16_t ls);
/*! Extracts the final codeword from the optimized encoder registers (LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in] p The register structure.
* \param[out] output The output codeword.
* \param[in] cdwd_len The number of variable nodes (after rate-matching, if enabled).
* \param[in] ls The lifting size.
* \return Error code: 0 if correct, -1 otherwise.
*/
int return_codeword_avx2long(void* p, uint8_t* output, uint8_t cdwd_len, uint16_t ls);
/*! Computes the product between the first (K - 2) columns of the PCM and the
* systematic bits (SIMD-optimized version, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] q A pointer to an encoder.
*/
void preprocess_systematic_bits_avx2long(srslte_ldpc_encoder_t* q);
/*! Computes the high-rate parity bits for BG1 and ls_index in {0, 1, 2, 3, 4, 5, 7}
* (SIMD-optimized version, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] o A pointer to an encoder.
*/
void encode_high_rate_case1_avx2long(void* o);
/*! Computes the high-rate parity bits for BG1 and ls_index in {6} (SIMD-optimized version, LS > \ref
* SRSLTE_AVX2_B_SIZE).
* \param[in,out] o A pointer to an encoder.
*/
void encode_high_rate_case2_avx2long(void* o);
/*! Computes the high-rate parity bits for BG2 and ls_index in {0, 1, 2, 4, 5, 6} (SIMD-optimized version, LS > \ref
* SRSLTE_AVX2_B_SIZE).
* \param[in,out] o A pointer to an encoder.
*/
void encode_high_rate_case3_avx2long(void* o);
/*! Computes the high-rate parity bits for BG2 and ls_index in {3, 7} (SIMD-optimized version, LS > \ref
* SRSLTE_AVX2_B_SIZE).
* \param[in,out] o A pointer to an encoder.
*/
void encode_high_rate_case4_avx2long(void* o);
/*! Computes the extended-region parity bits (SIMD-optimized version, LS > \ref SRSLTE_AVX2_B_SIZE).
* \param[in,out] q A pointer to an encoder.
* \param[in] n_layers The number of layers to process (when doing rate matching not all
* layers are needed).
*/
void encode_ext_region_avx2long(srslte_ldpc_encoder_t* q, uint8_t n_layers);
#endif // SRSLTE_LDPCENC_ALL_H

@ -0,0 +1,442 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_enc_avx2.c
* \brief Definition of the LDPC encoder inner functions (AVX2 version, small lifting size).
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include "../utils_avx2.h"
#include "ldpc_enc_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/vector.h"
#ifdef LV_HAVE_AVX2
#include <immintrin.h>
#include "ldpc_avx2_consts.h"
/*!
* \brief Represents a node of the base factor graph.
*/
typedef union bg_node_t {
uint8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
__m256i v; /*!< All the lifted nodes of the current base node as a 256-bit line. */
} bg_node_t;
/*!
* \brief Inner registers for the optimized LDPC encoder.
*/
struct ldpc_enc_avx2 {
bg_node_t* codeword; /*!< \brief Contains the entire codeword, before puncturing. */
__m256i* aux; /*!< \brief Auxiliary register. */
};
/*!
* Rotate the content of an __m256i vector (first input) towards the left by
* the number of chars specified by the second input (i.e., the \b imm * 8 least
* significant bits become the \b imm * 8 most significant bits).
* \param[in] a Vector to circularly shift.
* \param[in] imm The shift order in chars.
* \return The shifted vector.
*/
static __m256i _mm256_rotatelli_si256(__m256i a, int imm);
/*!
* Rotate the content of an __m256i vector (first input) towards the right by
* the number of chars specified by the second input (i.e., the \b imm * 8 most
* significant bits become the \b imm * 8 least significant bits).
* \param[in] a Vector to circularly shift.
* \param[in] imm The shift order in chars.
* \return The shifted vector.
*/
static __m256i _mm256_rotaterli_si256(__m256i a, int imm);
/*!
* Rotate the contents of a node towards the left by \b imm chars, that is the
* \b imm * 8 most significant bits become the least significant ones.
* \param[in] a The node to rotate.
* \param[in] imm The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \return The rotated node.
*/
static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls);
/*!
* Rotate the contents of a node towards the right by \b imm chars, that is the
* \b imm * 8 most significant bits become the least significant ones.
* \param[in] a The node to rotate.
* \param[in] imm The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \return The rotated node.
*/
static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls);
void* create_ldpc_enc_avx2(srslte_ldpc_encoder_t* q)
{
struct ldpc_enc_avx2* vp = NULL;
if ((vp = malloc(sizeof(struct ldpc_enc_avx2))) == NULL) {
return NULL;
}
if ((vp->codeword = srslte_vec_malloc(q->bgN * sizeof(bg_node_t))) == NULL) {
free(vp);
return NULL;
}
if ((vp->aux = srslte_vec_malloc(q->bgM * sizeof(__m256i))) == NULL) {
free(vp->codeword);
free(vp);
return NULL;
}
return vp;
}
void delete_ldpc_enc_avx2(void* p)
{
struct ldpc_enc_avx2* vp = p;
if (vp != NULL) {
free(vp->aux);
free(vp->codeword);
free(vp);
}
}
int load_avx2(void* p, const uint8_t* input, const uint8_t msg_len, const uint8_t cdwd_len, const uint16_t ls)
{
struct ldpc_enc_avx2* vp = p;
if (p == NULL) {
return -1;
}
int i = 0;
int k = 0;
for (; i < msg_len; i++) {
for (k = 0; k < ls; k++) {
vp->codeword[i].c[k] = input[i * ls + k];
}
bzero(&(vp->codeword[i].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(uint8_t));
}
bzero(vp->codeword + i, (cdwd_len - msg_len) * sizeof(__m256i));
return 0;
}
int return_codeword_avx2(void* p, uint8_t* output, const uint8_t cdwd_len, const uint16_t ls)
{
struct ldpc_enc_avx2* vp = p;
if (p == NULL) {
return -1;
}
int k = 0;
for (int i = 0; i < cdwd_len - 2; i++) {
for (k = 0; k < ls; k++) {
output[i * ls + k] = vp->codeword[i + 2].c[k];
}
}
return 0;
}
void encode_ext_region_avx2(srslte_ldpc_encoder_t* q, uint8_t n_layers)
{
struct ldpc_enc_avx2* vp = q->ptr;
int m = 0;
int skip = 0;
int k = 0;
uint16_t* this_shift = NULL;
__m256i tmp_epi8;
// Encode the extended region. In case of puncturing or IR-HARQ, we could focus on
// specific check nodes instead of processing all of them from m = 4 to m = M - 1.
for (m = 4; m < n_layers; m++) {
skip = q->bgK + m;
// the systematic part has already been computed
vp->codeword[skip].v = vp->aux[m];
// sum the contribution due to the high-rate region, with the proper circular shifts
for (k = 0; k < 4; k++) {
this_shift = q->pcm + q->bgK + k + m * q->bgN;
if (*this_shift != NO_CNCT) {
tmp_epi8 = rotate_node_right(vp->codeword[q->bgK + k].v, *this_shift, q->ls);
vp->codeword[skip].v = _mm256_xor_si256(vp->codeword[skip].v, tmp_epi8);
}
}
}
}
void preprocess_systematic_bits_avx2(srslte_ldpc_encoder_t* q)
{
struct ldpc_enc_avx2* vp = q->ptr;
int N = q->bgN;
int K = q->bgK;
int M = q->bgM;
int ls = q->ls;
uint16_t* pcm = q->pcm;
int k = 0;
int m = 0;
uint16_t* this_shift = NULL;
__m256i tmp_epi8;
bzero(vp->aux, M * sizeof(__m256i));
// split the input message into K chunks of ls bits each and, for all chunks
for (k = 0; k < K; k++) {
// for all check nodes
// NB: if looking for performance you can do the following loop only over the high-rate
// region of the PCM (m=0,1,2,3) and over the check nodes that result in a transmitted
// coded bit after puncturing or IR-HARQ (see Deliverable D1 Section 3.4).
for (m = 0; m < M; m++) {
// entry of pcm corresponding to the current input chunk and the current check node
this_shift = pcm + k + m * N;
// xor array aux[m] with a circularly shifted version of the current input chunk, unless
// the current check node and variable node are not connected.
if (*this_shift != NO_CNCT) {
tmp_epi8 = rotate_node_right(vp->codeword[k].v, *this_shift, ls);
tmp_epi8 = _mm256_and_si256(tmp_epi8, one_epi8);
vp->aux[m] = _mm256_xor_si256(vp->aux[m], tmp_epi8);
}
}
}
}
void encode_high_rate_case1_avx2(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2* vp = q->ptr;
int ls = q->ls;
int skip0 = q->bgK;
int skip1 = q->bgK + 1;
int skip2 = q->bgK + 2;
int skip3 = q->bgK + 3;
// first chunk of parity bits
vp->codeword[skip0].v = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[2]);
vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[3]);
__m256i tmp_epi8 = rotate_node_right(vp->codeword[skip0].v, 1, ls);
// second chunk of parity bits
vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], tmp_epi8);
// fourth chunk of parity bits
vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], tmp_epi8);
// third chunk of parity bits
vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[2], vp->codeword[skip3].v);
}
void encode_high_rate_case2_avx2(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2* vp = q->ptr;
int ls = q->ls;
int skip0 = q->bgK;
int skip1 = q->bgK + 1;
int skip2 = q->bgK + 2;
int skip3 = q->bgK + 3;
// first chunk of parity bits
__m256i tmp_epi8 = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
tmp_epi8 = _mm256_xor_si256(tmp_epi8, vp->aux[2]);
tmp_epi8 = _mm256_xor_si256(tmp_epi8, vp->aux[3]);
vp->codeword[skip0].v = rotate_node_left(tmp_epi8, 105 % ls, ls);
// second chunk of parity bits
vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], vp->codeword[skip0].v);
// fourth chunk of parity bits
vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], vp->codeword[skip0].v);
// third chunk of parity bits
vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[2], vp->codeword[skip3].v);
}
void encode_high_rate_case3_avx2(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2* vp = q->ptr;
int ls = q->ls;
int skip0 = q->bgK;
int skip1 = q->bgK + 1;
int skip2 = q->bgK + 2;
int skip3 = q->bgK + 3;
// first chunk of parity bits
__m256i tmp_epi8 = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
tmp_epi8 = _mm256_xor_si256(tmp_epi8, vp->aux[2]);
tmp_epi8 = _mm256_xor_si256(tmp_epi8, vp->aux[3]);
vp->codeword[skip0].v = rotate_node_left(tmp_epi8, 1, ls);
// second chunk of parity bits
vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], vp->codeword[skip0].v);
// third chunk of parity bits
vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[1], vp->codeword[skip1].v);
// fourth chunk of parity bits
vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], vp->codeword[skip0].v);
}
void encode_high_rate_case4_avx2(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2* vp = q->ptr;
int ls = q->ls;
int skip0 = q->bgK;
int skip1 = q->bgK + 1;
int skip2 = q->bgK + 2;
int skip3 = q->bgK + 3;
// first chunk of parity bits
vp->codeword[skip0].v = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[2]);
vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[3]);
__m256i tmp_epi8 = rotate_node_right(vp->codeword[skip0].v, 1, ls);
// second chunk of parity bits
vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], tmp_epi8);
// third chunk of parity bits
vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[1], vp->codeword[skip1].v);
// fourth chunk of parity bits
vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], tmp_epi8);
}
static __m256i _mm256_rotatelli_si256(__m256i a, int imm)
{
__m256i rotated_block_a[4];
// rotate left a as if made of 64-bit blocks: rotated_block_a[i] contains the
// rotation by i units
rotated_block_a[0] = a; // blocks 0 - 1 - 2 - 3
rotated_block_a[1] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78); // 2 - 3 - 0 - 1
rotated_block_a[3] = _mm256_permute4x64_epi64(a, 57); // 1 - 2 - 3 - 0
// rotation index we are interested in
int step1 = imm / 8;
// small-step rotation
int left = imm % 8;
// next block, for carry-over
int step2 = (step1 + 1) % 4;
// shift right each block
__m256i reg1 = _mm256_slli_epi64(rotated_block_a[step1], left * 8);
// carry-over from the next block
__m256i reg2 = _mm256_srli_epi64(rotated_block_a[step2], (8 - left) * 8);
return _mm256_xor_si256(reg1, reg2);
}
static __m256i _mm256_rotaterli_si256(__m256i a, int imm)
{
__m256i rotated_block_a[4];
// rotate right a as if made of 64-bit blocks: rotated_block_a[i] contains the
// rotation by i units
rotated_block_a[0] = a; // blocks 0 - 1 - 2 - 3
rotated_block_a[1] = _mm256_permute4x64_epi64(a, 57); // 1 - 2 - 3 - 0
rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78); // 2 - 3 - 0 - 1
rotated_block_a[3] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
// rotation index we are interested in
int step1 = imm / 8;
// small-step rotation
int left = imm % 8;
// next block, for carry-over
int step2 = (step1 + 1) % 4;
// shift right each block
__m256i reg1 = _mm256_srli_epi64(rotated_block_a[step1], left * 8);
// carry-over from the next block
__m256i reg2 = _mm256_slli_epi64(rotated_block_a[step2], (8 - left) * 8);
return _mm256_xor_si256(reg1, reg2);
}
static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls)
{
if (imm == 0) {
return a;
}
__m256i step1 = _mm256_rotatelli_si256(a, imm);
if (ls == SRSLTE_AVX2_B_SIZE) {
return step1;
}
__m256i step2 = _mm256_rotaterli_si256(a, ls - imm);
step1 = _mm256_and_si256(step1, mask_most_epi8[imm]);
step2 = _mm256_and_si256(step2, mask_least_epi8[imm]);
step1 = _mm256_xor_si256(step1, step2);
return step1;
;
}
static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls)
{
if (imm == 0) {
return a;
}
__m256i step1 = _mm256_rotaterli_si256(a, imm);
if (ls == SRSLTE_AVX2_B_SIZE) {
return step1;
}
__m256i step2 = _mm256_rotatelli_si256(a, ls - imm);
step1 = _mm256_and_si256(step1, mask_least_epi8[ls - imm]);
step2 = _mm256_and_si256(step2, mask_most_epi8[ls - imm]);
step1 = _mm256_xor_si256(step1, step2);
return step1;
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,403 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_enc_avx2long.c
* \brief Definition of the LDPC encoder inner functions (AVX2 version, large lifting size).
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include "../utils_avx2.h"
#include "ldpc_enc_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/vector.h"
#ifdef LV_HAVE_AVX2
#include <immintrin.h>
#include "ldpc_avx2_consts.h"
/*!
* \brief Represents a node of the base factor graph.
*/
typedef union bg_node_t {
uint8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
__m256i v; /*!< All the lifted nodes of the current base node as a 256-bit line. */
} bg_node_t;
/*!
* \brief Inner registers for the optimized LDPC encoder.
*/
struct ldpc_enc_avx2long {
bg_node_t* codeword; /*!< \brief Contains the entire codeword, before puncturing. */
__m256i* aux; /*!< \brief Auxiliary register. */
__m256i* rotated_node; /*!< \brief To store rotated versions of the nodes. */
uint8_t n_subnodes; /*!< \brief Number of subnodes. */
};
/*!
* Rotate the contents of a node towards the right by \b shift chars, that is the
* \b shift * 8 most significant bits become the least significant ones.
* \param[in] in_256i The node to rotate.
* \param[out] out The rotated node.
* \param[in] shift The order of the rotation in number of chars.
* \param[in] ls The size of the node (lifting size).
* \param[in] n_subnodes The number of subnodes in each node.
* \return The rotated node.
*/
static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes);
void* create_ldpc_enc_avx2long(srslte_ldpc_encoder_t* q)
{
struct ldpc_enc_avx2long* vp = NULL;
if ((vp = malloc(sizeof(struct ldpc_enc_avx2long))) == NULL) {
return NULL;
}
int left_out = q->ls % SRSLTE_AVX2_B_SIZE;
vp->n_subnodes = q->ls / SRSLTE_AVX2_B_SIZE + (left_out > 0);
if ((vp->codeword = srslte_vec_malloc(q->bgN * vp->n_subnodes * sizeof(bg_node_t))) == NULL) {
free(vp);
return NULL;
}
if ((vp->aux = srslte_vec_malloc(q->bgM * vp->n_subnodes * sizeof(__m256i))) == NULL) {
free(vp->codeword);
free(vp);
return NULL;
}
// for some reason, the software stops with a segmentation fault when ls is a multiple of 32
// if we don't add the extra block.
if ((vp->rotated_node = srslte_vec_malloc((vp->n_subnodes + 1) * sizeof(__m256i))) == NULL) {
free(vp->aux);
free(vp->codeword);
free(vp);
return NULL;
}
return vp;
}
void delete_ldpc_enc_avx2long(void* p)
{
struct ldpc_enc_avx2long* vp = p;
if (vp != NULL) {
free(vp->rotated_node);
free(vp->aux);
free(vp->codeword);
free(vp);
}
}
int load_avx2long(void* p, const uint8_t* input, const uint8_t msg_len, const uint8_t cdwd_len, const uint16_t ls)
{
struct ldpc_enc_avx2long* vp = p;
if (p == NULL) {
return -1;
}
int k = 0;
int j = 0;
int i = 0;
for (; i < msg_len; i++) {
for (j = 0; j < vp->n_subnodes - 1; j++) {
for (k = 0; k < SRSLTE_AVX2_B_SIZE; k++) {
vp->codeword[i * vp->n_subnodes + j].c[k] = input[i * ls + j * SRSLTE_AVX2_B_SIZE + k];
}
}
// j is now equal to (vp->n_subnodes - 1)
for (k = 0; k < ls - j * SRSLTE_AVX2_B_SIZE; k++) {
vp->codeword[i * vp->n_subnodes + j].c[k] = input[i * ls + j * SRSLTE_AVX2_B_SIZE + k];
}
bzero(&(vp->codeword[i * vp->n_subnodes + j].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(uint8_t));
}
bzero(vp->codeword + i * vp->n_subnodes, (cdwd_len - msg_len) * vp->n_subnodes * sizeof(__m256i));
return 0;
}
int return_codeword_avx2long(void* p, uint8_t* output, const uint8_t cdwd_len, const uint16_t ls)
{
struct ldpc_enc_avx2long* vp = p;
if (p == NULL) {
return -1;
}
int k = 0;
int j = 0;
for (int i = 0; i < cdwd_len - 2; i++) {
for (j = 0; j < vp->n_subnodes - 1; j++) {
for (k = 0; k < SRSLTE_AVX2_B_SIZE; k++) {
output[i * ls + j * SRSLTE_AVX2_B_SIZE + k] = vp->codeword[(i + 2) * vp->n_subnodes + j].c[k];
}
}
// j is now equal to vp->n_subndes-1
for (k = 0; k < ls - j * SRSLTE_AVX2_B_SIZE; k++) {
output[i * ls + j * SRSLTE_AVX2_B_SIZE + k] = vp->codeword[(i + 2) * vp->n_subnodes + j].c[k];
}
}
return 0;
}
void encode_ext_region_avx2long(srslte_ldpc_encoder_t* q, uint8_t n_layers)
{
struct ldpc_enc_avx2long* vp = q->ptr;
int m = 0;
int skip = 0;
int k = 0;
int j = 0;
uint16_t* this_shift = NULL;
// Encode the extended region. In case of puncturing or IR-HARQ, we could focus on
// specific check nodes instead of processing all of them from m = 4 to m = M - 1.
for (m = 4; m < n_layers; m++) {
skip = (q->bgK + m) * vp->n_subnodes;
// the systematic part has already been computed
for (j = 0; j < vp->n_subnodes; j++) {
vp->codeword[skip + j].v = vp->aux[m * vp->n_subnodes + j];
}
// sum the contribution due to the high-rate region, with the proper circular shifts
for (k = 0; k < 4; k++) {
this_shift = q->pcm + q->bgK + k + m * q->bgN;
// xor array aux[m] with a circularly shifted version of the current input chunk, unless
// the current check node and variable node are not connected.
if (*this_shift != NO_CNCT) {
rotate_node_right(
&(vp->codeword[(q->bgK + k) * vp->n_subnodes].v), vp->rotated_node, *this_shift, q->ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
vp->codeword[skip + j].v = _mm256_xor_si256(vp->codeword[skip + j].v, vp->rotated_node[j]);
}
}
}
}
}
void preprocess_systematic_bits_avx2long(srslte_ldpc_encoder_t* q)
{
struct ldpc_enc_avx2long* vp = q->ptr;
int N = q->bgN;
int K = q->bgK;
int M = q->bgM;
int ls = q->ls;
uint16_t* pcm = q->pcm;
int k = 0;
int m = 0;
int j = 0;
uint16_t* this_shift = NULL;
__m256i tmp_epi8;
bzero(vp->aux, M * vp->n_subnodes * sizeof(__m256i));
// split the input message into K chunks of ls bits each and, for all chunks
for (k = 0; k < K; k++) {
// for all check nodes
// NB: if looking for performance you can do the following loop only over the high-rate
// region of the PCM (m=0,1,2,3) and over the check nodes that result in a transmitted
// coded bit after puncturing or IR-HARQ (see Deliverable D1 Section 3.4).
for (m = 0; m < M; m++) {
// entry of pcm corresponding to the current input chunk and the current check node
this_shift = pcm + k + m * N;
// xor array aux[m] with a circularly shifted version of the current input chunk, unless
// the current check node and variable node are not connected.
if (*this_shift != NO_CNCT) {
rotate_node_right(&(vp->codeword[k * vp->n_subnodes].v), vp->rotated_node, *this_shift, ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
tmp_epi8 = _mm256_and_si256(vp->rotated_node[j], one_epi8);
vp->aux[m * vp->n_subnodes + j] = _mm256_xor_si256(vp->aux[m * vp->n_subnodes + j], tmp_epi8);
}
}
}
}
}
void encode_high_rate_case1_avx2long(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2long* vp = q->ptr;
int ls = q->ls;
int j = 0;
int skip0 = q->bgK * vp->n_subnodes;
int skip1 = (q->bgK + 1) * vp->n_subnodes;
int skip2 = (q->bgK + 2) * vp->n_subnodes;
int skip3 = (q->bgK + 3) * vp->n_subnodes;
// first chunk of parity bits
for (j = 0; j < vp->n_subnodes; j++) {
vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[2 * vp->n_subnodes + j]);
vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[3 * vp->n_subnodes + j]);
}
rotate_node_right(&(vp->codeword[skip0].v), vp->rotated_node, 1, ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
// second chunk of parity bits
vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->rotated_node[j]);
// fourth chunk of parity bits
vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->rotated_node[j]);
// third chunk of parity bits
vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[2 * vp->n_subnodes + j], vp->codeword[skip3 + j].v);
}
}
void encode_high_rate_case2_avx2long(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2long* vp = q->ptr;
int ls = q->ls;
int j = 0;
int skip0 = q->bgK * vp->n_subnodes;
int skip1 = (q->bgK + 1) * vp->n_subnodes;
int skip2 = (q->bgK + 2) * vp->n_subnodes;
int skip3 = (q->bgK + 3) * vp->n_subnodes;
// first chunk of parity bits
for (j = 0; j < vp->n_subnodes; j++) {
vp->rotated_node[j] = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[2 * vp->n_subnodes + j]);
vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[3 * vp->n_subnodes + j]);
}
rotate_node_right(vp->rotated_node, &(vp->codeword[skip0].v), ls - 105 % ls, ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
// second chunk of parity bits
vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->codeword[skip0 + j].v);
// fourth chunk of parity bits
vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->codeword[skip0 + j].v);
// third chunk of parity bits
vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[2 * vp->n_subnodes + j], vp->codeword[skip3 + j].v);
}
}
void encode_high_rate_case3_avx2long(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2long* vp = q->ptr;
int ls = q->ls;
int j = 0;
int skip0 = q->bgK * vp->n_subnodes;
int skip1 = (q->bgK + 1) * vp->n_subnodes;
int skip2 = (q->bgK + 2) * vp->n_subnodes;
int skip3 = (q->bgK + 3) * vp->n_subnodes;
// first chunk of parity bits
for (j = 0; j < vp->n_subnodes; j++) {
vp->rotated_node[j] = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[2 * vp->n_subnodes + j]);
vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[3 * vp->n_subnodes + j]);
}
rotate_node_right(vp->rotated_node, &(vp->codeword[skip0].v), ls - 1, ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
// second chunk of parity bits
vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->codeword[skip0 + j].v);
// third chunk of parity bits
vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[vp->n_subnodes + j], vp->codeword[skip1 + j].v);
// fourth chunk of parity bits
vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->codeword[skip0 + j].v);
}
}
void encode_high_rate_case4_avx2long(void* o)
{
srslte_ldpc_encoder_t* q = o;
struct ldpc_enc_avx2long* vp = q->ptr;
int ls = q->ls;
int j = 0;
int skip0 = q->bgK * vp->n_subnodes;
int skip1 = (q->bgK + 1) * vp->n_subnodes;
int skip2 = (q->bgK + 2) * vp->n_subnodes;
int skip3 = (q->bgK + 3) * vp->n_subnodes;
// first chunk of parity bits
for (j = 0; j < vp->n_subnodes; j++) {
vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[2 * vp->n_subnodes + j]);
vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[3 * vp->n_subnodes + j]);
}
rotate_node_right(&(vp->codeword[skip0].v), vp->rotated_node, 1, ls, vp->n_subnodes);
for (j = 0; j < vp->n_subnodes; j++) {
// second chunk of parity bits
vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->rotated_node[j]);
// third chunk of parity bits
vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[vp->n_subnodes + j], vp->codeword[skip1 + j].v);
// fourth chunk of parity bits
vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->rotated_node[j]);
}
}
static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes)
{
const int8_t* in = (const int8_t*)in_256i;
int16_t n_type1 = (ls - shift) / SRSLTE_AVX2_B_SIZE - (ls == SRSLTE_AVX2_B_SIZE);
int16_t n_type2 = n_subnodes - n_type1 - 1 - (ls == SRSLTE_AVX2_B_SIZE);
int16_t gap = (ls - shift) % SRSLTE_AVX2_B_SIZE;
int16_t i = 0;
for (; i < n_type1; i++) {
out[i] = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
}
__m256i tmp1 = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
__m256i tmp2 = _mm256_loadu_si256((const __m256i*)(in - gap));
out[i] = _mm256_blendv_epi8(tmp1, tmp2, mask_most_epi8[gap]);
for (i = 1; i <= n_type2; i++) {
out[n_type1 + i] = _mm256_loadu_si256((const __m256i*)(in - gap + i * SRSLTE_AVX2_B_SIZE));
}
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,221 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_enc_c.c
* \brief Definition of the LDPC encoder inner functions (not optimized).
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/utils/debug.h"
void encode_ext_region(srslte_ldpc_encoder_t* q, uint8_t* output, uint8_t n_layers)
{
uint8_t(*aux)[q->ls] = q->ptr;
int m = 0;
int skip = 0;
int i = 0;
int k = 0;
uint16_t* this_shift = NULL;
uint8_t tmp_out = 0;
// Encode the extended region. In case of puncturing or IR-HARQ, we could focus on
// specific check nodes instead of processing all of them from m = 4 to m = M - 1.
for (m = 4; m < n_layers; m++) {
skip = (q->bgK + m - 2) * q->ls;
for (i = 0; i < q->ls; i++) {
// the systematic part has already been computed
output[skip + i] = aux[m][i];
// sum the contribution due to the high-rate region, with the proper circular shifts
for (k = 0; k < 4; k++) {
this_shift = q->pcm + q->bgK + k + m * q->bgN;
if (*this_shift != NO_CNCT) {
tmp_out = *(output + (q->bgK - 2 + k) * q->ls + ((i + *this_shift) % q->ls));
output[skip + i] ^= tmp_out;
}
}
}
}
}
void preprocess_systematic_bits(srslte_ldpc_encoder_t* q, const uint8_t* input)
{
uint8_t(*aux)[q->ls] = q->ptr;
int N = q->bgN;
int K = q->bgK;
int M = q->bgM;
int ls = q->ls;
uint16_t* pcm = q->pcm;
int i = 0;
int k = 0;
int m = 0;
uint16_t* this_shift = NULL;
const uint8_t* this_in_chunk = NULL;
bzero(aux, M * ls * sizeof(uint8_t));
// split the input message into K chunks of ls bits each and, for all chunks
for (k = 0; k < K; k++) {
this_in_chunk = input + k * ls;
// for all check nodes
// NB: if looking for performance you can do the following loop only over the high-rate
// region of the PCM (m=0,1,2,3) and over the check nodes that result in a transmitted
// coded bit after puncturing or IR-HARQ (see Deliverable D1 Section 3.4).
for (m = 0; m < M; m++) {
// entry of pcm corresponding to the current input chunk and the current check node
this_shift = pcm + k + m * N;
// xor array aux[m] with a circularly shifted version of the current input chunk, unless
// the current check node and variable node are not connected.
for (i = 0; i < ls; i++) {
// mask with 1 to remove the filler bit flag
aux[m][i] ^= *this_shift != NO_CNCT ? 1U & (*(this_in_chunk + ((i + *this_shift) % ls))) : 0;
}
}
}
}
void encode_high_rate_case1(void* q_, uint8_t* output)
{
srslte_ldpc_encoder_t* q = (srslte_ldpc_encoder_t*)q_;
uint8_t(*aux)[q->ls] = q->ptr;
int ls = q->ls;
int k = 0;
int skip0 = (q->bgK - 2) * ls;
int skip1 = (q->bgK - 1) * ls;
int skip2 = q->bgK * ls;
int skip3 = (q->bgK + 1) * ls;
for (k = 0; k < ls; k++) {
// first chunk of parity bits
output[skip0 + k] = aux[0][k] ^ aux[1][k];
output[skip0 + k] ^= aux[2][k];
output[skip0 + k] ^= aux[3][k];
}
for (k = 0; k < ls; k++) {
// second chunk of parity bits
output[skip1 + k] = aux[0][k] ^ output[skip0 + ((k + 1) % ls)];
// fourth chunk of parity bits
output[skip3 + k] = aux[3][k] ^ output[skip0 + ((k + 1) % ls)];
// third chunk of parity bits
output[skip2 + k] = aux[2][k] ^ output[skip3 + k];
}
}
void encode_high_rate_case2(srslte_ldpc_encoder_t* q, uint8_t* output)
{
uint8_t(*aux)[q->ls] = q->ptr;
int ls = q->ls;
int i = 0;
int k = 0;
int skip0 = (q->bgK - 2) * ls;
int skip1 = (q->bgK - 1) * ls;
int skip2 = q->bgK * ls;
int skip3 = (q->bgK + 1) * ls;
for (k = 0; k < ls; k++) {
i = (k - 105) % ls;
i = i >= 0 ? i : i + ls;
// first chunk of parity bits
output[skip0 + k] = aux[0][i] ^ aux[1][i];
output[skip0 + k] ^= aux[2][i];
output[skip0 + k] ^= aux[3][i];
// second chunk of parity bits
output[skip1 + k] = aux[0][k] ^ output[skip0 + k];
// fourth chunk of parity bits
output[skip3 + k] = aux[3][k] ^ output[skip0 + k];
// third chunk of parity bits
output[skip2 + k] = aux[2][k] ^ output[skip3 + k];
}
}
void encode_high_rate_case3(srslte_ldpc_encoder_t* q, uint8_t* output)
{
uint8_t(*aux)[q->ls] = q->ptr;
int ls = q->ls;
int i = 0;
int k = 0;
int skip0 = (q->bgK - 2) * ls;
int skip1 = (q->bgK - 1) * ls;
int skip2 = q->bgK * ls;
int skip3 = (q->bgK + 1) * ls;
for (k = 0; k < ls; k++) {
i = (k - 1) % ls;
i = i >= 0 ? i : i + ls;
// first chunk of parity bits
output[skip0 + k] = aux[0][i] ^ aux[1][i];
output[skip0 + k] ^= aux[2][i];
output[skip0 + k] ^= aux[3][i];
// second chunk of parity bits
output[skip1 + k] = aux[0][k] ^ output[skip0 + k];
// third chunk of parity bits
output[skip2 + k] = aux[1][k] ^ output[skip1 + k];
// fourth chunk of parity bits
output[skip3 + k] = aux[3][k] ^ output[skip0 + k];
}
}
void encode_high_rate_case4(srslte_ldpc_encoder_t* q, uint8_t* output)
{
uint8_t(*aux)[q->ls] = q->ptr;
int ls = q->ls;
int k = 0;
int skip0 = (q->bgK - 2) * ls;
int skip1 = (q->bgK - 1) * ls;
int skip2 = q->bgK * ls;
int skip3 = (q->bgK + 1) * ls;
for (k = 0; k < ls; k++) {
// first chunk of parity bits
output[skip0 + k] = aux[0][k] ^ aux[1][k];
output[skip0 + k] ^= aux[2][k];
output[skip0 + k] ^= aux[3][k];
}
for (k = 0; k < ls; k++) {
// second chunk of parity bits
output[skip1 + k] = aux[0][k] ^ output[skip0 + ((k + 1) % ls)];
// third chunk of parity bits
output[skip2 + k] = aux[1][k] ^ output[skip1 + k];
// fourth chunk of parity bits
output[skip3 + k] = aux[3][k] ^ output[skip0 + ((k + 1) % ls)];
}
}

@ -0,0 +1,390 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_encoder.c
* \brief Definition of the LDPC encoder.
* \author David Gregoratti (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <stdint.h>
#include "../utils_avx2.h"
#include "ldpc_enc_all.h"
#include "srslte/phy/fec/ldpc/base_graph.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/vector.h"
/*! Carries out the actual destruction of the memory allocated to the encoder. */
static void free_enc_c(void* o)
{
srslte_ldpc_encoder_t* q = o;
if (q->pcm) {
free(q->pcm);
}
if (q->ptr) {
free(q->ptr);
}
}
/*! Carries out the actual encoding with a non-optimized encoder. */
static int encode_c(void* o, const uint8_t* input, uint8_t* output, uint32_t input_length, uint32_t cdwd_rm_length)
{
srslte_ldpc_encoder_t* q = o;
if (input_length / q->bgK != q->ls) {
perror("Dimension mismatch.\n");
return -1;
}
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
// systematic bits
int skip_in = 2 * q->ls;
for (int k = 0; k < (q->bgK - 2) * q->ls; k++) {
output[k] = input[skip_in + k];
}
preprocess_systematic_bits(q, input);
q->encode_high_rate(q, output);
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
encode_ext_region(q, output, n_layers);
return 0;
}
/*! Initializes a non-optimized encoder. */
static int init_c(srslte_ldpc_encoder_t* q)
{
int ls_index = get_ls_index(q->ls);
if (ls_index == VOID_LIFTSIZE) {
ERROR("Invalid lifting size %d\n", q->ls);
return -1;
}
if (q->bg == BG1 && ls_index != 6) {
q->encode_high_rate = encode_high_rate_case1;
} else if (q->bg == BG1 && ls_index == 6) {
q->encode_high_rate = encode_high_rate_case2;
} else if (q->bg == BG2 && ls_index != 3 && ls_index != 7) {
q->encode_high_rate = encode_high_rate_case3;
} else if (q->bg == BG2 && (ls_index == 3 || ls_index == 7)) {
q->encode_high_rate = encode_high_rate_case4;
} else {
ERROR("Invalid lifting size %d and/or Base Graph %d\n", q->ls, q->bg + 1);
return -1;
}
q->free = free_enc_c;
q->ptr = srslte_vec_malloc(q->bgM * q->ls * sizeof(uint8_t));
if (!q->ptr) {
perror("malloc");
free_enc_c(q);
return -1;
}
q->encode = encode_c;
return 0;
}
#ifdef LV_HAVE_AVX2
/*! Carries out the actual destruction of the memory allocated to the encoder. */
static void free_enc_avx2(void* o)
{
srslte_ldpc_encoder_t* q = o;
if (q->pcm) {
free(q->pcm);
}
if (q->ptr) {
delete_ldpc_enc_avx2(q->ptr);
}
}
/*! Carries out the actual encoding with an optimized encoder. */
static int encode_avx2(void* o, const uint8_t* input, uint8_t* output, uint32_t input_length, uint32_t cdwd_rm_length)
{
srslte_ldpc_encoder_t* q = o;
if (input_length / q->bgK != q->ls) {
perror("Dimension mismatch.\n");
return -1;
}
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
load_avx2(q->ptr, input, q->bgK, q->bgN, q->ls);
preprocess_systematic_bits_avx2(q);
q->encode_high_rate_avx2(q);
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
encode_ext_region_avx2(q, n_layers);
return_codeword_avx2(q->ptr, output, n_layers + q->bgK, q->ls);
return 0;
}
/*! Initializes an optimized encoder. */
static int init_avx2(srslte_ldpc_encoder_t* q)
{
int ls_index = get_ls_index(q->ls);
if (ls_index == VOID_LIFTSIZE) {
ERROR("Invalid lifting size %d\n", q->ls);
return -1;
}
if (q->bg == BG1 && ls_index != 6) {
q->encode_high_rate_avx2 = encode_high_rate_case1_avx2;
} else if (q->bg == BG1 && ls_index == 6) {
q->encode_high_rate_avx2 = encode_high_rate_case2_avx2;
} else if (q->bg == BG2 && ls_index != 3 && ls_index != 7) {
q->encode_high_rate_avx2 = encode_high_rate_case3_avx2;
} else if (q->bg == BG2 && (ls_index == 3 || ls_index == 7)) {
q->encode_high_rate_avx2 = encode_high_rate_case4_avx2;
} else {
ERROR("Invalid lifting size %d and/or Base Graph %d\n", q->ls, q->bg + 1);
return -1;
}
q->free = free_enc_avx2;
if ((q->ptr = create_ldpc_enc_avx2(q)) == NULL) {
perror("Create_ldpc_enc\n");
free_enc_avx2(q);
return -1;
}
q->encode = encode_avx2;
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the encoder. */
static void free_enc_avx2long(void* o)
{
srslte_ldpc_encoder_t* q = o;
if (q->pcm) {
free(q->pcm);
}
if (q->ptr) {
delete_ldpc_enc_avx2long(q->ptr);
}
}
/*! Carries out the actual encoding with an optimized encoder. */
static int
encode_avx2long(void* o, const uint8_t* input, uint8_t* output, uint32_t input_length, uint32_t cdwd_rm_length)
{
srslte_ldpc_encoder_t* q = o;
if (input_length / q->bgK != q->ls) {
perror("Dimension mismatch.\n");
return -1;
}
// it must be smaller than the codeword size
if (cdwd_rm_length > q->liftN - 2 * q->ls) {
cdwd_rm_length = q->liftN - 2 * q->ls;
}
// We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
// 2 variable nodes are systematically punctured by the encoder.
if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
// ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
cdwd_rm_length = (q->bgK + 2) * q->ls;
// return -1;
}
if (cdwd_rm_length % q->ls) {
cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
// ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
// return -1;
}
load_avx2long(q->ptr, input, q->bgK, q->bgN, q->ls);
preprocess_systematic_bits_avx2long(q);
q->encode_high_rate_avx2(q);
// When computing the number of layers, we need to recall that the standard always removes
// the first two variable nodes from the final codeword.
uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
encode_ext_region_avx2long(q, n_layers);
return_codeword_avx2long(q->ptr, output, n_layers + q->bgK, q->ls);
return 0;
}
/*! Initializes an optimized encoder. */
static int init_avx2long(srslte_ldpc_encoder_t* q)
{
int ls_index = get_ls_index(q->ls);
if (ls_index == VOID_LIFTSIZE) {
ERROR("Invalid lifting size %d\n", q->ls);
return -1;
}
if (q->bg == BG1 && ls_index != 6) {
q->encode_high_rate_avx2 = encode_high_rate_case1_avx2long;
} else if (q->bg == BG1 && ls_index == 6) {
q->encode_high_rate_avx2 = encode_high_rate_case2_avx2long;
} else if (q->bg == BG2 && ls_index != 3 && ls_index != 7) {
q->encode_high_rate_avx2 = encode_high_rate_case3_avx2long;
} else if (q->bg == BG2 && (ls_index == 3 || ls_index == 7)) {
q->encode_high_rate_avx2 = encode_high_rate_case4_avx2long;
} else {
ERROR("Invalid lifting size %d and/or Base Graph %d\n", q->ls, q->bg + 1);
return -1;
}
q->free = free_enc_avx2long;
if ((q->ptr = create_ldpc_enc_avx2long(q)) == NULL) {
perror("Create_ldpc_enc\n");
free_enc_avx2long(q);
return -1;
}
q->encode = encode_avx2long;
return 0;
}
#endif
int srslte_ldpc_encoder_init(srslte_ldpc_encoder_t* q,
srslte_ldpc_encoder_type_t type,
srslte_basegraph_t bg,
uint16_t ls)
{
switch (bg) {
case BG1:
q->bgN = BG1Nfull;
q->bgM = BG1M;
break;
case BG2:
q->bgN = BG2Nfull;
q->bgM = BG2M;
break;
default:
ERROR("Base Graph BG%d does not exist\n", bg + 1);
return -1;
}
q->bg = bg;
q->bgK = q->bgN - q->bgM;
q->ls = ls;
q->liftK = ls * q->bgK;
q->liftM = ls * q->bgM;
q->liftN = ls * q->bgN;
q->pcm = srslte_vec_malloc(q->bgM * q->bgN * sizeof(uint16_t));
if (!q->pcm) {
perror("malloc");
return -1;
}
if (create_compact_pcm(q->pcm, NULL, q->bg, q->ls) != 0) {
perror("Create PCM");
return -1;
}
switch (type) {
case SRSLTE_LDPC_ENCODER_C:
return init_c(q);
#ifdef LV_HAVE_AVX2
case SRSLTE_LDPC_ENCODER_AVX2:
if (ls <= SRSLTE_AVX2_B_SIZE) {
return init_avx2(q);
} else {
return init_avx2long(q);
}
#endif // LV_HAVE_AVX2
default:
return -1;
}
}
void srslte_ldpc_encoder_free(srslte_ldpc_encoder_t* q)
{
if (q->free) {
q->free(q);
}
bzero(q, sizeof(srslte_ldpc_encoder_t));
}
int srslte_ldpc_encoder_encode(srslte_ldpc_encoder_t* q,
const uint8_t* input,
uint8_t* output,
uint32_t input_length,
uint32_t cdwd_rm_length)
{
return q->encode(q, input, output, input_length, cdwd_rm_length);
}

@ -0,0 +1,696 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_rm.c
* \brief Definition of the LDPC Rate Matcher and Rate Demacher (float-valued, int16_t and int8_t)
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include <math.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h" //FILLER_BIT definition
#include "srslte/phy/fec/ldpc/ldpc_rm.h"
#include "srslte/phy/utils/vector.h"
#include "srslte/phy/utils/debug.h"
//#define debug
/*!
* \brief Look-up table: k0 indices
*
* For each rv, the corresponding row contains the indices of the
* two base graphs.
*/
static const uint32_t BASEK0[4][2] = {{0, 0}, {17, 13}, {33, 25}, {56, 43}};
/*!
* \brief Look-up table: base codeword lengths N/LS
*
*/
static const uint32_t BASEN[2] = {66, 50};
/*!
* \brief Look-up table: base codeblock lengths K/LS
*
*/
static const uint32_t BASEK[2] = {22, 10};
/*!
* \brief Look-up table: Retuns the mod order associated to a mod_type_t
*
*/
static const uint32_t MODORD[5] = {1, 2, 4, 6, 8};
/*!
* \brief Look-up table: Maximum number of coded bits available for transmission in a
* transport block
*If there is only one user
* in the system using the maximum bandwidth, then
* MAXE is smaller than nRB * nDS * nSC_RB * modOrd * nLayers
* where nLayers is the number of layer (<=4), modOrder is the modulation order (<=8)
* nSC_RB is the number of subcarriers per resource block (RB) (<=12), nDS is the number of symbols
* to transmit data in a RB (13) and nRB is the maximum number of resource blocks in the system,
* this depends on the channel bandwidth and subcarrier spacing and according to
* Table 5.3.2-1: Maximum transmission bandwidth configuration NRB : FR1 ,
* it is not larger than 273 (i.e. for subcarrier spacing 10 and bandwidth 100MHz)
*/
static const uint32_t MAXE = 273 * 13 * 12 * 8 * 4;
/*!
* \brief Describes an rate matcher.
*/
struct pRM_tx {
uint8_t* tmp_rm_codeword; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
};
/*!
* \brief Describes an rate dematcher (float version).
*/
struct pRM_rx_f {
float* tmp_rm_symbol; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
uint32_t* indices; /*!< \brief Pointer to a temporal buffer with the indices for bit-selection. */
};
/*!
* \brief Describes an rate dematcher (short version).
*/
struct pRM_rx_s {
int16_t* tmp_rm_symbol; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
uint32_t* indices; /*!< \brief Pointer to a temporal buffer with the indices for bit-selection. */
};
/*!
* \brief Describes an rate dematcher (char version).
*/
struct pRM_rx_c {
int8_t* tmp_rm_symbol; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
uint32_t* indices; /*!< \brief Pointer to a temporal buffer with the indices for bit-selection. */
};
/*!
* Initialize rate-matching parameters
*/
static int init_rm(srslte_ldpc_rm_t* p,
const uint32_t E,
const uint32_t F,
const uint32_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref)
{
if (p == NULL) {
return -1;
}
uint32_t basek0 = BASEK0[rv][bg];
uint32_t mod_order = MODORD[mod_type];
uint32_t N = ls * BASEN[bg];
uint32_t K = ls * BASEK[bg];
// check E smaller than MAXE
if ((E > MAXE) != 0) { //
ERROR("Wrong RM codeword length (E) = %d. It must be smaller than %d for base graph %d\n", E, MAXE, bg + 1);
return -1;
}
// check out_len is multiple of mod_order
if ((E % mod_order) != 0) { // N can only be a multiple of either BASEN[0] or BASEN[1], but not both
ERROR("Wrong RM codeword length (E) = %d. It must be a multiple of modulation order = %d\n", E, mod_order);
return -1;
}
// Set parameters
p->N = N;
p->E = E;
p->K = K;
p->F = F;
p->ls = ls;
p->mod_order = mod_order;
p->bg = bg;
// Determine Ncb and k0
if (N <= Nref) {
p->Ncb = N;
p->k0 = ls * basek0;
} else {
p->Ncb = Nref;
p->k0 = ls * ((basek0 * Nref) / N);
}
return 0;
}
/*!
* Bit selection for the rate-matching block. Selects out_len bits, starting from
* the k0th, ingoring filler bits, and consider an input buffer of length Ncb.
*/
static void bit_selection_rm_tx(const uint8_t* input,
uint8_t* output,
const uint32_t out_len,
const uint32_t k0,
const uint32_t Ncb)
{
uint32_t E = out_len;
uint32_t k = 0;
uint32_t j = 0;
uint32_t icwd = 0;
while (k < E) {
icwd = (k0 + j) % Ncb;
if (input[icwd] != FILLER_BIT) {
output[k] = input[icwd];
k = k + 1;
}
j = j + 1;
} // while
}
/*!
* Undoes bit selection for the rate-dematching block.
* The output has the codeword length N. It inserts filler bits as INFINITY symbols
* (to indicate very reliable 0 bit), and set to 0 (completely unknown bit) all
* missing symbol. Repeated symbols are added.
*/
static void bit_selection_rm_rx(const float* input,
const uint32_t in_len,
float* output,
const uint32_t out_len,
uint32_t* indices,
const uint32_t ini_exclude,
const uint32_t end_exclude,
const uint32_t k0,
const uint32_t Ncb)
{
uint32_t E = in_len;
uint32_t N = out_len;
uint32_t k = 0;
uint32_t j = 0;
uint32_t icwd = 0;
while (k < E) {
icwd = (k0 + j) % Ncb;
if (!(icwd >= ini_exclude && icwd < end_exclude)) { // avoid filler bits
indices[k] = icwd;
k = k + 1;
}
j = j + 1;
} // while
// Initializes the data_decoded_vector to all zeros
bzero(output, N * sizeof(float));
// set filler bits to INFINITY
for (uint32_t i = ini_exclude; i < end_exclude; i++) {
output[i] = INFINITY;
}
// Add soft bits, in case of repetition
for (uint32_t i = 0; i < E; i++) {
output[indices[i]] = output[indices[i]] + input[i];
}
}
/*!
* Undoes bit selection for the rate-dematching block.
* The output has the codeword length N. It inserts filler bits as INFINITY symbols
* (to indicate very reliable 0 bit), and set to 0 (completely unknown bit) all
* missing symbol. Repeated symbols are added.
*/
static void bit_selection_rm_rx_s(const int16_t* input,
const uint32_t in_len,
int16_t* output,
const uint32_t out_len,
uint32_t* indices,
const uint32_t ini_exclude,
const uint32_t end_exclude,
const uint32_t k0,
const uint32_t Ncb)
{
uint32_t E = in_len;
uint32_t N = out_len;
uint32_t k = 0;
uint32_t j = 0;
uint32_t icwd = 0;
while (k < E) {
icwd = (k0 + j) % Ncb;
if (!(icwd >= ini_exclude && icwd < end_exclude)) { // avoid filler bits
indices[k] = icwd;
k = k + 1;
}
j = j + 1;
} // while
// Initializes the data_decoded_vector to all zeros
bzero(output, N * sizeof(int16_t));
// set filler bits to INFINITY
const long infinity16 = (1U << 15U) - 1; // Max positive value in 16-bit representation
for (uint32_t i = ini_exclude; i < end_exclude; i++) {
output[i] = infinity16;
}
// Add soft bits, in case of repetition
const int16_t infinity15 =
(1U << 14U) - 1; // Messages use a 15-bit quantization. Soft bits use the remaining bit to denote infinity.
// input is assume to be quantized from -infinity15 to infinity15. Only filler bits can be infinity16
long tmp = 0;
for (uint32_t i = 0; i < E; i++) {
tmp = (long)output[indices[i]] + input[i];
if (tmp > infinity15) {
tmp = infinity15;
}
if (tmp < -infinity15) {
tmp = -infinity15;
}
output[indices[i]] = (int16_t)tmp;
}
}
/*!
* Undoes bit selection for the rate-dematching block (int8_t).
* The output has the codeword length N. It inserts filler bits as INFINITY symbols
* (to indicate very reliable 0 bit), and set to 0 (completely unknown bit) all
* missing symbol. Repeated symbols are added.
*/
static void bit_selection_rm_rx_c(const int8_t* input,
const uint32_t in_len,
int8_t* output,
const uint32_t out_len,
uint32_t* indices,
const uint32_t ini_exclude,
const uint32_t end_exclude,
const uint32_t k0,
const uint32_t Ncb)
{
uint32_t E = in_len;
uint32_t N = out_len;
uint32_t k = 0;
uint32_t j = 0;
uint32_t icwd = 0;
while (k < E) {
icwd = (k0 + j) % Ncb;
if (!(icwd >= ini_exclude && icwd < end_exclude)) { // avoid filler bits
indices[k] = icwd;
k = k + 1;
}
j = j + 1;
} // while
// Initializes the data_decoded_vector to all zeros
bzero(output, N * sizeof(int8_t));
// set filler bits to INFINITY
const long infinity8 = (1U << 7U) - 1; // Max positive value in 8-bit representation
for (uint32_t i = ini_exclude; i < end_exclude; i++) {
output[i] = infinity8;
}
// Add soft bits, in case of repetition
const int16_t infinity7 =
(1U << 6U) - 1; // Messages use a 15-bit quantization. Soft bits use the remaining bit to denote infinity.
// input is assume to be quantized from -infinity15 to infinity15. Only filler bits can be infinity16
long tmp = 0;
for (uint32_t i = 0; i < E; i++) {
tmp = (long)output[indices[i]] + input[i];
if (tmp > infinity7) {
tmp = infinity7;
}
if (tmp < -infinity7) {
tmp = -infinity7;
}
output[indices[i]] = (int8_t)tmp;
}
}
/*!
* Bit interleaver
*/
static void
bit_interleaver_rm_tx(const uint8_t* input, uint8_t* output, const uint32_t in_out_len, const uint32_t mod_order)
{
uint32_t cols = 0;
uint32_t rows = 0;
rows = mod_order;
cols = in_out_len / rows;
for (uint32_t j = 0; j < cols; j++) {
for (uint32_t i = 0; i < rows; i++) {
output[i + j * rows] = input[i * cols + j];
}
}
}
/*!
* Bit deinterleaver (float)
*/
static void
bit_interleaver_rm_rx(const float* input, float* output, const uint32_t in_out_len, const uint32_t mod_order)
{
uint32_t cols = 0;
uint32_t rows = 0;
rows = mod_order;
cols = in_out_len / rows;
for (uint32_t j = 0; j < cols; j++) {
for (uint32_t i = 0; i < rows; i++) {
output[i * cols + j] = input[j * rows + i];
}
}
}
/*!
* Bit deinterleaver (short)
*/
static void
bit_interleaver_rm_rx_s(const int16_t* input, int16_t* output, const uint32_t in_out_len, const uint32_t mod_order)
{
uint32_t cols = 0;
uint32_t rows = 0;
rows = mod_order;
cols = in_out_len / rows;
for (uint32_t j = 0; j < cols; j++) {
for (uint32_t i = 0; i < rows; i++) {
output[i * cols + j] = input[j * rows + i];
}
}
}
/*!
* Bit deinterleaver (short)
*/
static void
bit_interleaver_rm_rx_c(const int8_t* input, int8_t* output, const uint32_t in_out_len, const uint32_t mod_order)
{
uint32_t cols = 0;
uint32_t rows = 0;
rows = mod_order;
cols = in_out_len / rows;
for (uint32_t j = 0; j < cols; j++) {
for (uint32_t i = 0; i < rows; i++) {
output[i * cols + j] = input[j * rows + i];
}
}
}
int srslte_ldpc_rm_tx_init(srslte_ldpc_rm_t* p)
{
if (p == NULL) {
return -1;
}
struct pRM_tx* pp = NULL; // pointer to the rate matcher instance
// allocate memory to the rate-matcher instance
if ((pp = malloc(sizeof(struct pRM_tx))) == NULL) {
return -1;
}
p->ptr = pp;
// allocate memory to the rm_codeword after bit selection.
if ((pp->tmp_rm_codeword = srslte_vec_u8_malloc(MAXE)) == NULL) {
free(pp);
return -1;
}
return 0;
}
int srslte_ldpc_rm_rx_init_f(srslte_ldpc_rm_t* p)
{
if (p == NULL) {
return -1;
}
struct pRM_rx_f* pp = NULL; // pointer to the rate matcher instance
// allocate memory to ther rate-demacher instance
if ((pp = malloc(sizeof(struct pRM_rx_f))) == NULL) {
return -1;
}
p->ptr = pp;
// allocate memory to the temporal buffer
if ((pp->tmp_rm_symbol = srslte_vec_f_malloc(MAXE)) == NULL) {
free(pp);
return -1;
}
if ((pp->indices = srslte_vec_u32_malloc(MAXE)) == NULL) {
free(pp->tmp_rm_symbol);
free(pp);
return -1;
}
return 0;
}
int srslte_ldpc_rm_rx_init_s(srslte_ldpc_rm_t* p)
{
if (p == NULL) {
return -1;
}
struct pRM_rx_s* pp = NULL; // pointer to the rate matcher instance
// allocate memory to ther rate-demacher instance
if ((pp = malloc(sizeof(struct pRM_rx_s))) == NULL) {
return -1;
}
p->ptr = pp;
// allocate memory to the temporal buffer
if ((pp->tmp_rm_symbol = srslte_vec_i16_malloc(MAXE)) == NULL) {
free(pp);
return -1;
}
if ((pp->indices = srslte_vec_u32_malloc(MAXE)) == NULL) {
free(pp->tmp_rm_symbol);
free(pp);
return -1;
}
return 0;
}
int srslte_ldpc_rm_rx_init_c(srslte_ldpc_rm_t* p)
{
if (p == NULL) {
return -1;
}
struct pRM_rx_c* pp = NULL; // pointer to the rate matcher instance
// allocate memory to ther rate-demacher instance
if ((pp = malloc(sizeof(struct pRM_rx_c))) == NULL) {
return -1;
}
p->ptr = pp;
// allocate memory to the temporal buffer
if ((pp->tmp_rm_symbol = srslte_vec_i8_malloc(MAXE)) == NULL) {
free(pp);
return -1;
}
if ((pp->indices = srslte_vec_u32_malloc(MAXE)) == NULL) {
free(pp->tmp_rm_symbol);
free(pp);
return -1;
}
return 0;
}
void srslte_ldpc_rm_tx_free(srslte_ldpc_rm_t* q)
{
if (q != NULL) {
struct pRM_tx* qq = q->ptr;
free(qq->tmp_rm_codeword);
free(qq);
}
}
void srslte_ldpc_rm_rx_free_f(srslte_ldpc_rm_t* q)
{
if (q != NULL) {
struct pRM_rx_f* qq = q->ptr;
free(qq->tmp_rm_symbol);
free(qq->indices);
free(qq);
}
}
void srslte_ldpc_rm_rx_free_s(srslte_ldpc_rm_t* q)
{
if (q != NULL) {
struct pRM_rx_s* qq = q->ptr;
free(qq->tmp_rm_symbol);
free(qq->indices);
free(qq);
}
}
void srslte_ldpc_rm_rx_free_c(srslte_ldpc_rm_t* q)
{
if (q != NULL) {
struct pRM_rx_c* qq = q->ptr;
free(qq->tmp_rm_symbol);
free(qq->indices);
free(qq);
}
}
int srslte_ldpc_rm_tx(srslte_ldpc_rm_t* q,
const uint8_t* input,
uint8_t* output,
const uint32_t E,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref)
{
// initialize parameters. The filler bit is ignored
if (init_rm(q, E, 0, bg, ls, rv, mod_type, Nref) != 0) {
perror("rate matcher init");
exit(-1);
}
struct pRM_tx* pp = q->ptr;
uint8_t* tmp_rm_codeword = pp->tmp_rm_codeword;
if (q->mod_order == 1) { // interleaver can be skipped
bit_selection_rm_tx(input, output, q->E, q->k0, q->Ncb);
} else {
bit_selection_rm_tx(input, tmp_rm_codeword, q->E, q->k0, q->Ncb);
bit_interleaver_rm_tx(tmp_rm_codeword, output, q->E, q->mod_order);
}
return 0;
}
int srslte_ldpc_rm_rx_f(srslte_ldpc_rm_t* q,
const float* input,
float* output,
const uint32_t E,
const uint32_t F,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref)
{
if (init_rm(q, E, F, bg, ls, rv, mod_type, Nref) != 0) {
perror("rate matcher init");
exit(-1);
}
struct pRM_rx_f* pp = q->ptr;
float* tmp_rm_symbol = pp->tmp_rm_symbol;
uint32_t* indices = pp->indices;
uint32_t end_exclude = q->K - 2 * q->ls;
uint32_t ini_exclude = end_exclude - q->F;
if (q->mod_order == 1) { // interleaver can be skipped
bit_selection_rm_rx(input, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
} else {
bit_interleaver_rm_rx(input, tmp_rm_symbol, q->E, q->mod_order);
bit_selection_rm_rx(tmp_rm_symbol, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
}
return 0;
}
int srslte_ldpc_rm_rx_s(srslte_ldpc_rm_t* q,
const int16_t* input,
int16_t* output,
const uint32_t E,
const uint32_t F,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref)
{
if (init_rm(q, E, F, bg, ls, rv, mod_type, Nref) != 0) {
perror("rate matcher init");
exit(-1);
}
struct pRM_rx_f* pp = q->ptr;
int16_t* tmp_rm_symbol = (int16_t*)pp->tmp_rm_symbol;
uint32_t* indices = pp->indices;
uint32_t end_exclude = q->K - 2 * q->ls;
uint32_t ini_exclude = end_exclude - q->F;
if (q->mod_order == 1) { // interleaver can be skipped
bit_selection_rm_rx_s(input, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
} else {
bit_interleaver_rm_rx_s(input, tmp_rm_symbol, q->E, q->mod_order);
bit_selection_rm_rx_s(tmp_rm_symbol, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
}
return 0;
}
int srslte_ldpc_rm_rx_c(srslte_ldpc_rm_t* q,
const int8_t* input,
int8_t* output,
const uint32_t E,
const uint32_t F,
const srslte_basegraph_t bg,
const uint32_t ls,
const uint8_t rv,
const mod_type_t mod_type,
const uint32_t Nref)
{
if (init_rm(q, E, F, bg, ls, rv, mod_type, Nref) != 0) {
perror("rate matcher init");
exit(-1);
}
struct pRM_rx_c* pp = q->ptr;
int8_t* tmp_rm_symbol = pp->tmp_rm_symbol;
uint32_t* indices = pp->indices;
uint32_t end_exclude = q->K - 2 * q->ls;
uint32_t ini_exclude = end_exclude - q->F;
if (q->mod_order == 1) { // interleaver can be skipped
bit_selection_rm_rx_c(input, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
} else {
bit_interleaver_rm_rx_c(input, tmp_rm_symbol, q->E, q->mod_order);
bit_selection_rm_rx_c(tmp_rm_symbol, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
}
return 0;
}

@ -0,0 +1,193 @@
#
# Copyright 2013-2020 Software Radio Systems Limited
#
# This file is part of srsLTE
#
# srsLTE is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# srsLTE is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# A copy of the GNU Affero General Public License can be found in
# the LICENSE file in the top-level directory of this distribution
# and at http://www.gnu.org/licenses/.
#
add_executable(ldpc_enc_test ldpc_enc_test.c)
target_link_libraries(ldpc_enc_test srslte_phy)
add_executable(ldpc_dec_test ldpc_dec_test.c)
target_link_libraries(ldpc_dec_test srslte_phy)
add_executable(ldpc_dec_s_test ldpc_dec_s_test.c)
target_link_libraries(ldpc_dec_s_test srslte_phy)
add_executable(ldpc_dec_c_test ldpc_dec_c_test.c)
target_link_libraries(ldpc_dec_c_test srslte_phy)
add_executable(ldpc_chain_test ldpc_chain_test.c)
target_link_libraries(ldpc_chain_test srslte_phy)
add_executable(ldpc_rm_test ldpc_rm_test.c)
target_link_libraries(ldpc_rm_test srslte_phy)
add_executable(ldpc_rm_chain_test ldpc_rm_chain_test.c)
target_link_libraries(ldpc_rm_chain_test srslte_phy)
set_target_properties(ldpc_enc_test ldpc_dec_test ldpc_dec_s_test ldpc_dec_c_test ldpc_chain_test ldpc_rm_test ldpc_rm_chain_test
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/tests/ldpc"
)
if(NOT DISABLE_SIMD)
add_executable(ldpc_enc_avx2_test ldpc_enc_avx2_test.c)
target_link_libraries(ldpc_enc_avx2_test srslte_phy)
add_executable(ldpc_dec_avx2_test ldpc_dec_avx2_test.c)
target_link_libraries(ldpc_dec_avx2_test srslte_phy)
set_target_properties(ldpc_dec_avx2_test ldpc_enc_avx2_test
PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/tests/ldpc"
)
endif(NOT DISABLE_SIMD)
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG1.dat
${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG2.dat
COMMAND cp examplesBG?.dat ${PROJECT_SOURCE_DIR}/tests/ldpc
DEPENDS examplesBG1.dat examplesBG2.dat
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Copying LDPC test reference files"
VERBATIM
)
add_custom_target(ldpc_test_files
DEPENDS ${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG1.dat
${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG2.dat
)
add_dependencies(ldpc_dec_test ldpc_test_files)
add_dependencies(ldpc_enc_test ldpc_test_files)
add_dependencies(ldpc_rm_test ldpc_test_files)
### Test LDPC libs
function(ldpc_unit_tests)
foreach(i IN LISTS ARGN)
add_test(NAME ${test_name}-LS${i} COMMAND ${test_command} -l${i}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/ldpc
)
endforeach()
endfunction()
set(lifting_sizes
2 4 8 16 32 64 128 256
3 6 12 24 48 96 192 384
5 10 20 40 80 160 320
7 14 28 56 112 224
9 18 36 72 144 288
11 22 44 88 176 352
13 26 52 104 208
15 30 60 120 240
)
set(test_name LDPC-ENC-BG1)
set(test_command ldpc_enc_test -b1)
ldpc_unit_tests(${lifting_sizes})
set(test_name LDPC-ENC-BG2)
set(test_command ldpc_enc_test -b2)
ldpc_unit_tests(${lifting_sizes})
set(test_name LDPC-DEC-BG1)
set(test_command ldpc_dec_test -b1)
ldpc_unit_tests(${lifting_sizes})
set(test_name LDPC-DEC-BG2)
set(test_command ldpc_dec_test -b2)
ldpc_unit_tests(${lifting_sizes})
add_test(NAME LDPC-chain COMMAND ldpc_chain_test)
### Test LDPC Rate Matching UNIT tests
set(mod_order
1 2 4 6 8
)
function(ldpc_rm_unit_tests)
#foreach(j IN LIST ${ARGV0})
set(listMod 0 1 2 3 4)
set(listModord 1 2 4 6 8)
set(listrv 0 1 2 3)
set(listbg 1 2)
set(listbaseN 66 50)
set(listbaseK 22 10)
list(LENGTH listMod modlen)
list(LENGTH listrv rvlen)
math(EXPR modlen "${modlen} - 1")
math(EXPR rvlen "${rvlen} - 1")
foreach(i IN LISTS ARGN)
foreach(numbg RANGE ${bglen}) #bg
foreach(numrv RANGE ${rvlen}) #rv
foreach(nummod RANGE ${modlen})
list(GET listbaseN ${numbg} baseNval)
list(GET listbaseK ${numbg} baseKval)
list(GET listbg ${numbg} bgval)
math(EXPR N "${i} * ${baseNval}")
math(EXPR K "${i} * ${baseKval}")
list(GET listMod ${nummod} Modval)
list(GET listModord ${nummod} Ordval)
list(GET listrv ${numrv} rvval)
math(EXPR Div "${Ordval}")
math(EXPR tmpN "${N} - (${N} % ${Div})")
math(EXPR E "${Ordval}*(${tmpN})/${Div}") #twice the rate
add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${N} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${N}
WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
)
math(EXPR M "${N} / 2")
# Half size buffer
add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${M} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${M}
WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
)
math(EXPR Div "2*${Ordval}")
math(EXPR tmpN "${N} - (${N} % ${Div})")
math(EXPR E "${Ordval}*(${tmpN})/${Div}") #twice the rate
add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${N} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${N}
WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
)
math(EXPR M "${N}/ 2")
# Half size buffer
add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${M} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${M}
WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
)
math(EXPR Div "${Ordval}")
math(EXPR tmpN "2*${N} - (2*${N} % ${Div})") #Half the rate
math(EXPR E "${Ordval}*(${tmpN})/${Div}")
add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${N} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${N}
WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
)
math(EXPR M "${N}/ 2")
# Half size buffer
add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${M} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${M}
WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
)
endforeach()
endforeach()
endforeach()
endforeach()
endfunction()
set(test_name LDPC-RM-BG1)
set(test_command ldpc_rm_test)
ldpc_rm_unit_tests(${lifting_sizes})
add_test(NAME LDPC-RM-chain COMMAND ldpc_rm_chain_test)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

@ -0,0 +1,526 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_chain_test.c
* \brief End-to-end test for LDPC encoder and decoder.
*
* A batch of example messages is randomly generated, encoded, 2-PAM modulated,
* sent over an AWGN channel and, finally, decoded by all three types of
* decoder. Transmitted and received messages are compared to estimate the WER.
* Multiple batches are simulated if the number of errors is not significant
* enough.
*
* A fixed number of filler bits (F) equal to encoder.bgK - 5 is added to the message.
* if the function is called with -e0 (no rate matchign), the rm_length is set to finalN - F,
* So that after rate-dematching (which includes filler bits) the input to the decoder has lenght N.
*
* Basic rate-matching simulations can be carried out by setting the codeword
* length to a value smaller than the base one.
*
*
* Synopsis: **ldpc_chain_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
* - **-e \<number\>** Codeword length after rate matching (set to 0 [default] for full rate).
* - **-s \<number\>** SNR in dB (Default 3 dB).
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/channel/ch_awgn.h"
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/random.h"
#include "srslte/phy/utils/vector.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
int lift_size = 2; /*!< \brief Lifting Size. */
int rm_length = 0; /*!< \brief Codeword length after rate matching. */
int finalK; /*!< \brief Number of uncoded bits (message length). */
int finalN; /*!< \brief Number of coded bits (codeword length). */
float snr = 0; /*!< \brief Signal-to-Noise Ratio [dB]. */
#define BATCH_SIZE 100 /*!< \brief Number of codewords in a batch. */
#define MAX_N_BATCH 10000 /*!< \brief Max number of simulated batches. */
#define REQ_ERRORS 100 /*!< \brief Minimum number of errors for a significant simulation. */
#define MS_SF 0.75f /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
/*!
* \brief Prints test help when wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX] [-eX] [-sX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
printf("\t-e Word length after rate matching [Default %d (no rate matching, only filler-bits are extracted)]\n",
rm_length);
printf("\t-s SNR [dB, Default %.1f dB]\n", snr);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:e:s:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
case 'e':
rm_length = (int)strtol(optarg, NULL, 10);
break;
case 's':
snr = strtod(optarg, NULL);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Prints decoder statistics.
*/
void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time);
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages_true = NULL;
uint8_t* messages_sim_f = NULL;
uint8_t* messages_sim_s = NULL;
uint8_t* messages_sim_c = NULL;
uint8_t* messages_sim_c_flood = NULL;
uint8_t* messages_sim_avx = NULL;
uint8_t* messages_sim_avx_flood = NULL;
uint8_t* codewords = NULL;
float* symbols_rm = NULL;
float* symbols = NULL;
int16_t* symbols_s = NULL;
int8_t* symbols_c = NULL;
int i = 0;
int j = 0;
parse_args(argc, argv);
// create an LDPC encoder
srslte_ldpc_encoder_t encoder;
#ifdef LV_HAVE_AVX2
if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_AVX2, base_graph, lift_size) != 0) {
perror("encoder init");
exit(-1);
}
#else // no AVX2
if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
perror("encoder init");
exit(-1);
}
#endif // LV_HAVE_AVX2
// create an LDPC decoder (float)
srslte_ldpc_decoder_t decoder_f;
if (srslte_ldpc_decoder_init(&decoder_f, SRSLTE_LDPC_DECODER_F, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
// create an LDPC decoder (16 bit)
srslte_ldpc_decoder_t decoder_s;
if (srslte_ldpc_decoder_init(&decoder_s, SRSLTE_LDPC_DECODER_S, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
// create an LDPC decoder (8 bit)
srslte_ldpc_decoder_t decoder_c;
if (srslte_ldpc_decoder_init(&decoder_c, SRSLTE_LDPC_DECODER_C, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
// create an LDPC decoder (8 bit, flooded)
srslte_ldpc_decoder_t decoder_c_flood;
if (srslte_ldpc_decoder_init(&decoder_c_flood, SRSLTE_LDPC_DECODER_C_FLOOD, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
#ifdef LV_HAVE_AVX2
// create an LDPC decoder (8 bit, AVX2 version)
srslte_ldpc_decoder_t decoder_avx;
if (srslte_ldpc_decoder_init(&decoder_avx, SRSLTE_LDPC_DECODER_C_AVX2, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
// create an LDPC decoder (8 bit, flooded scheduling, AVX2 version)
srslte_ldpc_decoder_t decoder_avx_flood;
if (srslte_ldpc_decoder_init(&decoder_avx_flood, SRSLTE_LDPC_DECODER_C_AVX2_FLOOD, base_graph, lift_size, MS_SF) !=
0) {
perror("decoder init");
exit(-1);
}
#endif // LV_HAVE_AVX2
// create a random generator
srslte_random_t random_gen = srslte_random_init(0);
uint32_t F = encoder.bgK - 5; // This value is arbitrary
if (rm_length == 0) {
rm_length = finalN - F;
}
printf("Test LDPC chain:\n");
printf(" Base Graph -> BG%d\n", encoder.bg + 1);
printf(" Lifting Size -> %d\n", encoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
printf(" Base code rate -> K/(N-2) = %d/%d = 1/%d\n",
encoder.liftK,
encoder.liftN - 2 * lift_size,
encoder.bg == BG1 ? 3 : 5);
printf("\n Codeword length after rate matching -> E = %d\n", rm_length);
printf(" Final code rate -> (K-F)/E = (%d - %d)/%d = %.3f\n",
encoder.liftK,
F,
rm_length,
1.0 * (encoder.liftK - F) / rm_length);
printf("\n Signal-to-Noise Ratio -> %.2f dB\n", snr);
finalK = encoder.liftK;
finalN = encoder.liftN - 2 * lift_size;
messages_true = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_f = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_s = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_c = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_c_flood = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_avx = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_avx_flood = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
codewords = malloc(finalN * BATCH_SIZE * sizeof(uint8_t));
symbols_rm = malloc((rm_length + F) * BATCH_SIZE * sizeof(float));
symbols = malloc(finalN * BATCH_SIZE * sizeof(float));
symbols_s = malloc(finalN * BATCH_SIZE * sizeof(int16_t));
symbols_c = malloc(finalN * BATCH_SIZE * sizeof(int8_t));
if (!messages_true || !messages_sim_f || !messages_sim_s || !messages_sim_c || //
!messages_sim_avx || !messages_sim_c_flood || !messages_sim_avx_flood || //
!codewords || !symbols || !symbols_s || !symbols_c) {
perror("malloc");
exit(-1);
}
int i_bit = 0;
int i_batch = 0;
struct timeval t[3];
double elapsed_time_enc = 0;
double elapsed_time_dec_f = 0;
double elapsed_time_dec_s = 0;
double elapsed_time_dec_c = 0;
double elapsed_time_dec_c_flood = 0;
double elapsed_time_dec_avx = 0;
double elapsed_time_dec_avx_flood = 0;
int n_error_words_f = 0;
int n_error_words_s = 0;
int n_error_words_c = 0;
int n_error_words_c_flood = 0;
int n_error_words_avx = 0;
int n_error_words_avx_flood = 0;
float noise_std_dev = srslte_convert_dB_to_amplitude(-snr);
int16_t inf15 = (1U << 14U) - 1;
float gain_s = inf15 * noise_std_dev / 20 / (1 / noise_std_dev + 2);
int8_t inf7 = (1U << 6U) - 1;
float gain_c = inf7 * noise_std_dev / 8 / (1 / noise_std_dev + 2);
printf("\nBatch:\n ");
while (((n_error_words_f < REQ_ERRORS) || (n_error_words_s < REQ_ERRORS) || (n_error_words_c < REQ_ERRORS)) &&
(i_batch < MAX_N_BATCH)) {
i_batch++;
if (!(i_batch % 10)) {
printf("%8d", i_batch);
if (!(i_batch % 90)) {
printf("\n ");
}
}
/* generate data_tx */
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK - F; j++) {
messages_true[i * finalK + j] = srslte_random_uniform_int_dist(random_gen, 0, 1);
}
for (; j < finalK; j++) {
messages_true[i * finalK + j] = FILLER_BIT;
}
}
// compute the number of symbols that we need to encode/decode: closest multiple of
// the lifting size that is larger than rm_length
// Extra F bits are added since filler-bits are not part of the rm_length
int n_useful_symbols =
(rm_length + F) % lift_size ? ((rm_length + F) / lift_size + 1) * lift_size : (rm_length + F);
printf("n_useful_symbols = %d\n", n_useful_symbols);
// Encode messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_encoder_encode(
&encoder, messages_true + j * finalK, codewords + j * finalN, finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_enc += t[0].tv_sec + 1e-6 * t[0].tv_usec;
// Modulate codewords and match rate (puncturing)
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < rm_length + F; j++) {
symbols_rm[i * (rm_length + F) + j] =
(codewords[i * finalN + j] == FILLER_BIT) ? INFINITY : 1 - 2 * codewords[i * finalN + j];
}
}
// Apply AWGN
srslte_ch_awgn_f(symbols_rm, symbols_rm, noise_std_dev, BATCH_SIZE * (rm_length + F));
// Convert symbols into LLRs
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < rm_length + F; j++) { //+F becouse we have alredy considered fillerbits when modulating.
symbols[i * finalN + j] = symbols_rm[i * (rm_length + F) + j] * 2 / (noise_std_dev * noise_std_dev);
}
// the rest of symbols are undetermined, set LLR to 0
for (; j < finalN; j++) {
symbols[i * finalN + j] = 0;
}
}
//////// Floating point
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_f(&decoder_f, symbols + j * finalN, messages_sim_f + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_f += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_f[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_f++;
break;
}
}
}
//////// Fixed point - 16 bit
// Quantize LLRs with 16 bits
srslte_vec_quant_fs(symbols, symbols_s, gain_s, 0, inf15, BATCH_SIZE * finalN);
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_s(&decoder_s, symbols_s + j * finalN, messages_sim_s + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_s += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_s[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_s++;
break;
}
}
}
//////// Fixed point - 8 bit
// Quantize LLRs with 8 bits
srslte_vec_quant_fc(symbols, symbols_c, gain_c, 0, inf7, BATCH_SIZE * finalN);
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(&decoder_c, symbols_c + j * finalN, messages_sim_c + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_c += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_c[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_c++;
break;
}
}
}
//////// Fixed point - 8 bit, flooded scheduling
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(
&decoder_c_flood, symbols_c + j * finalN, messages_sim_c_flood + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_c_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_c_flood[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_c_flood++;
break;
}
}
}
#ifdef LV_HAVE_AVX2
//////// Fixed point - 8 bit - AVX2 version
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(
&decoder_avx, symbols_c + j * finalN, messages_sim_avx + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_avx += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_avx[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_avx++;
break;
}
}
}
//////// Fixed point - 8 bit, flooded scheduling - AVX2 version
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(
&decoder_avx_flood, symbols_c + j * finalN, messages_sim_avx_flood + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_avx_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_avx_flood[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_avx_flood++;
break;
}
}
}
#endif // LV_HAVE_AVX2
}
printf("\nEstimated throughput encoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
i_batch * BATCH_SIZE / elapsed_time_enc,
i_batch * BATCH_SIZE * finalK / elapsed_time_enc,
i_batch * BATCH_SIZE * finalN / elapsed_time_enc);
print_decoder("FLOATING POINT", i_batch, n_error_words_f, elapsed_time_dec_f);
print_decoder("FIXED POINT (16 bits)", i_batch, n_error_words_s, elapsed_time_dec_s);
print_decoder("FIXED POINT (8 bits)", i_batch, n_error_words_c, elapsed_time_dec_c);
print_decoder("FIXED POINT (8 bits, flooded scheduling)", i_batch, n_error_words_c_flood, elapsed_time_dec_c_flood);
#ifdef LV_HAVE_AVX2
print_decoder("FIXED POINT (8 bits - AVX2)", i_batch, n_error_words_avx, elapsed_time_dec_avx);
print_decoder(
"FIXED POINT (8 bits, flooded scheduling - AVX2)", i_batch, n_error_words_avx_flood, elapsed_time_dec_avx_flood);
#endif // LV_HAVE_AVX2
if (n_error_words_s > 10 * n_error_words_f) {
perror("16-bit performance too low!");
exit(-1);
}
if (n_error_words_c > 10 * n_error_words_f) {
perror("8-bit performance too low!");
exit(-1);
}
printf("\nTest completed successfully!\n\n");
free(symbols_c);
free(symbols_s);
free(symbols);
free(codewords);
free(messages_sim_avx);
free(messages_sim_c_flood);
free(messages_sim_c);
free(messages_sim_s);
free(messages_sim_f);
free(messages_true);
srslte_random_free(random_gen);
#ifdef LV_HAVE_AVX2
srslte_ldpc_decoder_free(&decoder_avx);
#endif // LV_HAVE_AVX2
srslte_ldpc_decoder_free(&decoder_c_flood);
srslte_ldpc_decoder_free(&decoder_c);
srslte_ldpc_decoder_free(&decoder_s);
srslte_ldpc_decoder_free(&decoder_f);
srslte_ldpc_encoder_free(&encoder);
}
void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time)
{
printf("\n**** %s ****", title);
printf("\nEstimated word error rate:\n %e (%d errors)\n", (double)n_errors / n_batches / BATCH_SIZE, n_errors);
printf("Estimated throughput decoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
n_batches * BATCH_SIZE / elapsed_time,
n_batches * BATCH_SIZE * finalK / elapsed_time,
n_batches * BATCH_SIZE * finalN / elapsed_time);
}

@ -0,0 +1,236 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_avx2_test.c
* \brief Unit test for the LDPC decoder working with 8-bit integer-valued LLRs (AVX2 implementation).
*
* It decodes a batch of example codewords and compares the resulting messages
* with the expected ones. Reference messages and codewords are provided in
* files **examplesBG1.dat** and **examplesBG2.dat**.
*
* Synopsis: **ldpc_dec_c_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
#include "srslte/phy/utils/debug.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
int lift_size = 2; /*!< \brief Lifting Size. */
int finalK; /*!< \brief Number of uncoded bits (message length). */
int finalN; /*!< \brief Number of coded bits (codeword length). */
int scheduling = 0; /*!< \brief Message scheduling (0 for layered, 1 for flooded). */
#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
printf("\t-x Scheduling [Default %c]\n", scheduling);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:x:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
case 'x':
scheduling = (int)strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Reads the example file.
*/
void get_examples(uint8_t* messages, //
uint8_t* codewords,
FILE* ex_file)
{
char mstr[15]; // message string
char cstr[15]; // codeword string
char tmp[15];
int i = 0;
int j = 0;
sprintf(mstr, "ls%dmsgs", lift_size);
sprintf(cstr, "ls%dcwds", lift_size);
do {
do {
tmp[0] = fgetc(ex_file);
} while (tmp[0] != 'l');
fscanf(ex_file, "%[^\n]", tmp + 1);
fgetc(ex_file); // discard newline
} while (strcmp(tmp, mstr) != 0);
// read messages
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalK; i++) {
int rc = fgetc(ex_file);
messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
fscanf(ex_file, "%[^\n]", tmp);
if (strcmp(tmp, cstr) != 0) {
printf("Something went wrong while reading example file.\n");
exit(-1);
}
fgetc(ex_file); // discard newline
// read codewords
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalN; i++) {
int rc = fgetc(ex_file);
codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
}
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages_true = NULL;
uint8_t* messages_sim = NULL;
uint8_t* codewords = NULL;
int8_t* symbols = NULL;
int i = 0;
int j = 0;
FILE* ex_file = NULL;
char file_name[1000];
parse_args(argc, argv);
srslte_ldpc_decoder_type_t dectype =
(scheduling == 0) ? SRSLTE_LDPC_DECODER_C_AVX2 : SRSLTE_LDPC_DECODER_C_AVX2_FLOOD;
// create an LDPC decoder
srslte_ldpc_decoder_t decoder;
if (srslte_ldpc_decoder_init(&decoder, dectype, base_graph, lift_size, 1) != 0) {
perror("decoder init");
exit(-1);
}
printf("Test LDPC decoder:\n");
printf(" Base Graph -> BG%d\n", decoder.bg + 1);
printf(" Lifting Size -> %d\n", decoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
printf(" Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
decoder.liftK,
decoder.liftN - 2 * lift_size,
decoder.bg == BG1 ? 3 : 5);
printf(" Scheduling: %s\n", scheduling ? "flooded" : "layered");
finalK = decoder.liftK;
finalN = decoder.liftN - 2 * lift_size;
messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
messages_sim = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
codewords = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
symbols = malloc(finalN * NOF_MESSAGES * sizeof(int8_t));
if (!messages_true || !messages_sim || !codewords || !symbols) {
perror("malloc");
exit(-1);
}
sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
printf("\nReading example file %s...\n", file_name);
ex_file = fopen(file_name, "re");
if (ex_file == NULL) {
perror("fopen");
exit(-1);
}
get_examples(messages_true, codewords, ex_file);
fclose(ex_file);
for (i = 0; i < NOF_MESSAGES * finalN; i++) {
symbols[i] = codewords[i] == 1 ? -2 : 2;
}
printf("\nDecoding test messages...\n");
struct timeval t[3];
gettimeofday(&t[1], NULL);
for (j = 0; j < NOF_MESSAGES; j++) {
printf(" codeword %d\n", j);
srslte_ldpc_decoder_decode_c(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
printf("Elapsed time: %e s\n", elapsed_time);
printf("\nVerifing results...\n");
for (i = 0; i < NOF_MESSAGES * finalK; i++) {
if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
perror("wrong!!");
exit(-1);
}
}
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
NOF_MESSAGES / elapsed_time,
NOF_MESSAGES * finalK / elapsed_time,
NOF_MESSAGES * finalN / elapsed_time);
printf("\nTest completed successfully!\n\n");
free(symbols);
free(codewords);
free(messages_sim);
free(messages_true);
srslte_ldpc_decoder_free(&decoder);
}

@ -0,0 +1,235 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_c_test.c
* \brief Unit test for the LDPC decoder working with 8-bit integer-valued LLRs.
*
* It decodes a batch of example codewords and compares the resulting messages
* with the expected ones. Reference messages and codewords are provided in
* files **examplesBG1.dat** and **examplesBG2.dat**.
*
* Synopsis: **ldpc_dec_c_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
#include "srslte/phy/utils/debug.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
int lift_size = 2; /*!< \brief Lifting Size. */
int finalK; /*!< \brief Number of uncoded bits (message length). */
int finalN; /*!< \brief Number of coded bits (codeword length). */
int scheduling = 0; /*!< \brief Message scheduling (0 for layered, 1 for flooded). */
#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
printf("\t-x Scheduling [Default %c]\n", scheduling);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:x:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
case 'x':
scheduling = (int)strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Reads the example file.
*/
void get_examples(uint8_t* messages, //
uint8_t* codewords,
FILE* ex_file)
{
char mstr[15]; // message string
char cstr[15]; // codeword string
char tmp[15];
int i = 0;
int j = 0;
sprintf(mstr, "ls%dmsgs", lift_size);
sprintf(cstr, "ls%dcwds", lift_size);
do {
do {
tmp[0] = fgetc(ex_file);
} while (tmp[0] != 'l');
fscanf(ex_file, "%[^\n]", tmp + 1);
fgetc(ex_file); // discard newline
} while (strcmp(tmp, mstr) != 0);
// read messages
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalK; i++) {
int rc = fgetc(ex_file);
messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
fscanf(ex_file, "%[^\n]", tmp);
if (strcmp(tmp, cstr) != 0) {
printf("Something went wrong while reading example file.\n");
exit(-1);
}
fgetc(ex_file); // discard newline
// read codewords
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalN; i++) {
int rc = fgetc(ex_file);
codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
}
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages_true = NULL;
uint8_t* messages_sim = NULL;
uint8_t* codewords = NULL;
int8_t* symbols = NULL;
int i = 0;
int j = 0;
FILE* ex_file = NULL;
char file_name[1000];
parse_args(argc, argv);
srslte_ldpc_decoder_type_t dectype = (scheduling == 0) ? SRSLTE_LDPC_DECODER_C : SRSLTE_LDPC_DECODER_C_FLOOD;
// create an LDPC decoder
srslte_ldpc_decoder_t decoder;
if (srslte_ldpc_decoder_init(&decoder, dectype, base_graph, lift_size, 1) != 0) {
perror("decoder init");
exit(-1);
}
printf("Test LDPC decoder:\n");
printf(" Base Graph -> BG%d\n", decoder.bg + 1);
printf(" Lifting Size -> %d\n", decoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
printf(" Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
decoder.liftK,
decoder.liftN - 2 * lift_size,
decoder.bg == BG1 ? 3 : 5);
printf(" Scheduling: %s\n", scheduling ? "flooded" : "layered");
finalK = decoder.liftK;
finalN = decoder.liftN - 2 * lift_size;
messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
messages_sim = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
codewords = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
symbols = malloc(finalN * NOF_MESSAGES * sizeof(int8_t));
if (!messages_true || !messages_sim || !codewords || !symbols) {
perror("malloc");
exit(-1);
}
sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
printf("\nReading example file %s...\n", file_name);
ex_file = fopen(file_name, "re");
if (ex_file == NULL) {
perror("fopen");
exit(-1);
}
get_examples(messages_true, codewords, ex_file);
fclose(ex_file);
for (i = 0; i < NOF_MESSAGES * finalN; i++) {
symbols[i] = codewords[i] == 1 ? -2 : 2;
}
printf("\nDecoding test messages...\n");
struct timeval t[3];
gettimeofday(&t[1], NULL);
for (j = 0; j < NOF_MESSAGES; j++) {
printf(" codeword %d\n", j);
srslte_ldpc_decoder_decode_c(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
printf("Elapsed time: %e s\n", elapsed_time);
printf("\nVerifing results...\n");
for (i = 0; i < NOF_MESSAGES * finalK; i++) {
if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
perror("wrong!!");
exit(-1);
}
}
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
NOF_MESSAGES / elapsed_time,
NOF_MESSAGES * finalK / elapsed_time,
NOF_MESSAGES * finalN / elapsed_time);
printf("\nTest completed successfully!\n\n");
free(symbols);
free(codewords);
free(messages_sim);
free(messages_true);
srslte_ldpc_decoder_free(&decoder);
}

@ -0,0 +1,227 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_s_test.c
* \brief Unit test for the LDPC decoder working with 16-bit integer-valued LLRs.
*
* It decodes a batch of example codewords and compares the resulting messages
* with the expected ones. Reference messages and codewords are provided in
* files **examplesBG1.dat** and **examplesBG2.dat**.
*
* Synopsis: **ldpc_dec_s_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
#include "srslte/phy/utils/debug.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
int lift_size = 2; /*!< \brief Lifting Size. */
int finalK; /*!< \brief Number of uncoded bits (message length). */
int finalN; /*!< \brief Number of coded bits (codeword length). */
#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Reads the example file.
*/
void get_examples(uint8_t* messages, //
uint8_t* codewords,
FILE* ex_file)
{
char mstr[15]; // message string
char cstr[15]; // codeword string
char tmp[15];
int i = 0;
int j = 0;
sprintf(mstr, "ls%dmsgs", lift_size);
sprintf(cstr, "ls%dcwds", lift_size);
do {
do {
tmp[0] = fgetc(ex_file);
} while (tmp[0] != 'l');
fscanf(ex_file, "%[^\n]", tmp + 1);
fgetc(ex_file); // discard newline
} while (strcmp(tmp, mstr) != 0);
// read messages
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalK; i++) {
int rc = fgetc(ex_file);
messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
fscanf(ex_file, "%[^\n]", tmp);
if (strcmp(tmp, cstr) != 0) {
printf("Something went wrong while reading example file.\n");
exit(-1);
}
fgetc(ex_file); // discard newline
// read codewords
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalN; i++) {
int rc = fgetc(ex_file);
codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
}
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages_true = NULL;
uint8_t* messages_sim = NULL;
uint8_t* codewords = NULL;
int16_t* symbols = NULL;
int i = 0;
int j = 0;
FILE* ex_file = NULL;
char file_name[1000];
parse_args(argc, argv);
// create an LDPC decoder
srslte_ldpc_decoder_t decoder;
if (srslte_ldpc_decoder_init(&decoder, SRSLTE_LDPC_DECODER_S, base_graph, lift_size, 1) != 0) {
perror("decoder init");
exit(-1);
}
printf("Test LDPC decoder:\n");
printf(" Base Graph -> BG%d\n", decoder.bg + 1);
printf(" Lifting Size -> %d\n", decoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
printf(" Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
decoder.liftK,
decoder.liftN - 2 * lift_size,
decoder.bg == BG1 ? 3 : 5);
finalK = decoder.liftK;
finalN = decoder.liftN - 2 * lift_size;
messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
messages_sim = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
codewords = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
symbols = malloc(finalN * NOF_MESSAGES * sizeof(int16_t));
if (!messages_true || !messages_sim || !codewords || !symbols) {
perror("malloc");
exit(-1);
}
sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
printf("\nReading example file %s...\n", file_name);
ex_file = fopen(file_name, "re");
if (ex_file == NULL) {
perror("fopen");
exit(-1);
}
get_examples(messages_true, codewords, ex_file);
fclose(ex_file);
for (i = 0; i < NOF_MESSAGES * finalN; i++) {
symbols[i] = codewords[i] == 1 ? -50 : 50;
}
printf("\nDecoding test messages...\n");
struct timeval t[3];
gettimeofday(&t[1], NULL);
for (j = 0; j < NOF_MESSAGES; j++) {
printf(" codeword %d\n", j);
srslte_ldpc_decoder_decode_s(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
printf("Elapsed time: %e s\n", elapsed_time);
printf("\nVerifing results...\n");
for (i = 0; i < NOF_MESSAGES * finalK; i++) {
if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
perror("wrong!!");
exit(-1);
}
}
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
NOF_MESSAGES / elapsed_time,
NOF_MESSAGES * finalK / elapsed_time,
NOF_MESSAGES * finalN / elapsed_time);
printf("\nTest completed successfully!\n\n");
free(symbols);
free(codewords);
free(messages_sim);
free(messages_true);
srslte_ldpc_decoder_free(&decoder);
}

@ -0,0 +1,227 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_dec_test.c
* \brief Unit test for the LDPC decoder working with float-valued LLRs.
*
* It decodes a batch of example codewords and compares the resulting messages
* with the expected ones. Reference messages and codewords are provided in
* files **examplesBG1.dat** and **examplesBG2.dat**.
*
* Synopsis: **ldpc_dec_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
#include "srslte/phy/utils/debug.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
int lift_size = 2; /*!< \brief Lifting Size. */
int finalK; /*!< \brief Number of uncoded bits (message length). */
int finalN; /*!< \brief Number of coded bits (codeword length). */
#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Reads the example file.
*/
void get_examples(uint8_t* messages, //
uint8_t* codewords,
FILE* ex_file)
{
char mstr[15]; // message string
char cstr[15]; // codeword string
char tmp[15];
int i = 0;
int j = 0;
sprintf(mstr, "ls%dmsgs", lift_size);
sprintf(cstr, "ls%dcwds", lift_size);
do {
do {
tmp[0] = fgetc(ex_file);
} while (tmp[0] != 'l');
fscanf(ex_file, "%[^\n]", tmp + 1);
fgetc(ex_file); // discard newline
} while (strcmp(tmp, mstr) != 0);
// read messages
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalK; i++) {
int rc = fgetc(ex_file);
messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
fscanf(ex_file, "%[^\n]", tmp);
if (strcmp(tmp, cstr) != 0) {
printf("Something went wrong while reading example file.\n");
exit(-1);
}
fgetc(ex_file); // discard newline
// read codewords
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalN; i++) {
int rc = fgetc(ex_file);
codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
}
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages_true = NULL;
uint8_t* messages_sim = NULL;
uint8_t* codewords = NULL;
float* symbols = NULL;
int i = 0;
int j = 0;
FILE* ex_file = NULL;
char file_name[1000];
parse_args(argc, argv);
// create an LDPC decoder
srslte_ldpc_decoder_t decoder;
if (srslte_ldpc_decoder_init(&decoder, SRSLTE_LDPC_DECODER_F, base_graph, lift_size, 1) != 0) {
perror("decoder init");
exit(-1);
}
printf("Test LDPC decoder:\n");
printf(" Base Graph -> BG%d\n", decoder.bg + 1);
printf(" Lifting Size -> %d\n", decoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
printf(" Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
decoder.liftK,
decoder.liftN - 2 * lift_size,
decoder.bg == BG1 ? 3 : 5);
finalK = decoder.liftK;
finalN = decoder.liftN - 2 * lift_size;
messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
messages_sim = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
codewords = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
symbols = malloc(finalN * NOF_MESSAGES * sizeof(float));
if (!messages_true || !messages_sim || !codewords || !symbols) {
perror("malloc");
exit(-1);
}
sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
printf("\nReading example file %s...\n", file_name);
ex_file = fopen(file_name, "re");
if (ex_file == NULL) {
perror("fopen");
exit(-1);
}
get_examples(messages_true, codewords, ex_file);
fclose(ex_file);
for (i = 0; i < NOF_MESSAGES * finalN; i++) {
symbols[i] = codewords[i] == 1 ? -50 : 50;
}
printf("\nDecoding test messages...\n");
struct timeval t[3];
gettimeofday(&t[1], NULL);
for (j = 0; j < NOF_MESSAGES; j++) {
printf(" codeword %d\n", j);
srslte_ldpc_decoder_decode_f(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
printf("Elapsed time: %e s\n", elapsed_time);
printf("\nVerifing results...\n");
for (i = 0; i < NOF_MESSAGES * finalK; i++) {
if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
perror("wrong!!");
exit(-1);
}
}
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
NOF_MESSAGES / elapsed_time,
NOF_MESSAGES * finalK / elapsed_time,
NOF_MESSAGES * finalN / elapsed_time);
printf("\nTest completed successfully!\n\n");
free(symbols);
free(codewords);
free(messages_sim);
free(messages_true);
srslte_ldpc_decoder_free(&decoder);
}

@ -0,0 +1,226 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_enc_avx2_test.c
* \brief Unit test for the LDPC encoder (SIMD-optimized version).
*
* It encodes a batch of example messages and compares the resulting codewords
* with the expected ones. Reference messages and codewords are provided in
* files **examplesBG1.dat** and **examplesBG2.dat**.
*
* Synopsis: **ldpc_enc_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/utils/debug.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
int lift_size = 2; /*!< \brief Lifting Size. */
int finalK; /*!< \brief Number of uncoded bits (message length). */
int finalN; /*!< \brief Number of coded bits (codeword length). */
#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
#define NOF_REPS 1000 /*!< \brief Number of times tests are repeated (for computing throughput). */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Reads the example file.
*/
void get_examples(uint8_t* messages, //
uint8_t* codewords,
FILE* ex_file)
{
char mstr[15]; // message string
char cstr[15]; // codeword string
char tmp[15];
int i = 0;
int j = 0;
sprintf(mstr, "ls%dmsgs", lift_size);
sprintf(cstr, "ls%dcwds", lift_size);
do {
do {
tmp[0] = fgetc(ex_file);
} while (tmp[0] != 'l');
fscanf(ex_file, "%[^\n]", tmp + 1);
fgetc(ex_file); // discard newline
} while (strcmp(tmp, mstr) != 0);
// read messages
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalK; i++) {
int rc = fgetc(ex_file);
messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
fscanf(ex_file, "%[^\n]", tmp);
if (strcmp(tmp, cstr) != 0) {
printf("Something went wrong while reading example file.\n");
exit(-1);
}
fgetc(ex_file); // discard newline
// read codewords
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalN; i++) {
int rc = fgetc(ex_file);
codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
}
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages = NULL;
uint8_t* codewords_true = NULL;
uint8_t* codewords_sim = NULL;
int i = 0;
int j = 0;
int l = 0;
FILE* ex_file = NULL;
char file_name[1000];
parse_args(argc, argv);
// create an LDPC encoder
srslte_ldpc_encoder_t encoder;
if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_AVX2, base_graph, lift_size) != 0) {
perror("encoder init");
exit(-1);
}
printf("Test LDPC encoder:\n");
printf(" Base Graph -> BG%d\n", encoder.bg + 1);
printf(" Lifting Size -> %d\n", encoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
printf(" Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
encoder.liftK,
encoder.liftN - 2 * lift_size,
encoder.bg == BG1 ? 3 : 5);
finalK = encoder.liftK;
finalN = encoder.liftN - 2 * lift_size;
messages = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
codewords_true = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
codewords_sim = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
if (!messages || !codewords_true || !codewords_sim) {
perror("malloc");
exit(-1);
}
sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
printf("\nReading example file %s...\n", file_name);
ex_file = fopen(file_name, "re");
if (ex_file == NULL) {
perror("fopen");
exit(-1);
}
get_examples(messages, codewords_true, ex_file);
fclose(ex_file);
printf("\nEncoding test messages...\n");
struct timeval t[3];
double elapsed_time = 0;
for (j = 0; j < NOF_MESSAGES; j++) {
printf(" codeword %d\n", j);
gettimeofday(&t[1], NULL);
for (l = 0; l < NOF_REPS; l++) {
srslte_ldpc_encoder_encode(&encoder, messages + j * finalK, codewords_sim + j * finalN, finalK, finalN);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time += t[0].tv_sec + 1e-6 * t[0].tv_usec;
}
printf("Elapsed time: %e s\n", elapsed_time / NOF_REPS);
printf("\nVerifing results...\n");
for (i = 0; i < NOF_MESSAGES * finalN; i++) {
if (codewords_sim[i] != codewords_true[i]) {
perror("wrong!!");
exit(-1);
}
}
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
NOF_MESSAGES / (elapsed_time / NOF_REPS),
NOF_MESSAGES * finalK / (elapsed_time / NOF_REPS),
NOF_MESSAGES * finalN / (elapsed_time / NOF_REPS));
printf("\nTest completed successfully!\n\n");
free(codewords_sim);
free(codewords_true);
free(messages);
srslte_ldpc_encoder_free(&encoder);
}

@ -0,0 +1,226 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_enc_test.c
* \brief Unit test for the LDPC encoder.
*
* It encodes a batch of example messages and compares the resulting codewords
* with the expected ones. Reference messages and codewords are provided in
* files **examplesBG1.dat** and **examplesBG2.dat**.
*
* Synopsis: **ldpc_enc_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/utils/debug.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
int lift_size = 2; /*!< \brief Lifting Size. */
int finalK; /*!< \brief Number of uncoded bits (message length). */
int finalN; /*!< \brief Number of coded bits (codeword length). */
#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
#define NOF_REPS 1000 /*!< \brief Number of times tests are repeated (for computing throughput). */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Reads the example file.
*/
void get_examples(uint8_t* messages, //
uint8_t* codewords,
FILE* ex_file)
{
char mstr[15]; // message string
char cstr[15]; // codeword string
char tmp[15];
int i = 0;
int j = 0;
sprintf(mstr, "ls%dmsgs", lift_size);
sprintf(cstr, "ls%dcwds", lift_size);
do {
do {
tmp[0] = fgetc(ex_file);
} while (tmp[0] != 'l');
fscanf(ex_file, "%[^\n]", tmp + 1);
fgetc(ex_file); // discard newline
} while (strcmp(tmp, mstr) != 0);
// read messages
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalK; i++) {
int rc = fgetc(ex_file);
messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
fscanf(ex_file, "%[^\n]", tmp);
if (strcmp(tmp, cstr) != 0) {
printf("Something went wrong while reading example file.\n");
exit(-1);
}
fgetc(ex_file); // discard newline
// read codewords
for (j = 0; j < NOF_MESSAGES; j++) {
for (i = 0; i < finalN; i++) {
int rc = fgetc(ex_file);
codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
}
fgetc(ex_file); // discard newline
}
}
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages = NULL;
uint8_t* codewords_true = NULL;
uint8_t* codewords_sim = NULL;
int i = 0;
int j = 0;
int l = 0;
FILE* ex_file = NULL;
char file_name[1000];
parse_args(argc, argv);
// create an LDPC encoder
srslte_ldpc_encoder_t encoder;
if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
perror("encoder init");
exit(-1);
}
printf("Test LDPC encoder:\n");
printf(" Base Graph -> BG%d\n", encoder.bg + 1);
printf(" Lifting Size -> %d\n", encoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
printf(" Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
encoder.liftK,
encoder.liftN - 2 * lift_size,
encoder.bg == BG1 ? 3 : 5);
finalK = encoder.liftK;
finalN = encoder.liftN - 2 * lift_size;
messages = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
codewords_true = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
codewords_sim = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
if (!messages || !codewords_true || !codewords_sim) {
perror("malloc");
exit(-1);
}
sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
printf("\nReading example file %s...\n", file_name);
ex_file = fopen(file_name, "re");
if (ex_file == NULL) {
perror("fopen");
exit(-1);
}
get_examples(messages, codewords_true, ex_file);
fclose(ex_file);
printf("\nEncoding test messages...\n");
struct timeval t[3];
double elapsed_time = 0;
for (j = 0; j < NOF_MESSAGES; j++) {
printf(" codeword %d\n", j);
gettimeofday(&t[1], NULL);
for (l = 0; l < NOF_REPS; l++) {
srslte_ldpc_encoder_encode(&encoder, messages + j * finalK, codewords_sim + j * finalN, finalK, finalN);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time += t[0].tv_sec + 1e-6 * t[0].tv_usec;
}
printf("Elapsed time: %e s\n", elapsed_time / NOF_REPS);
printf("\nVerifing results...\n");
for (i = 0; i < NOF_MESSAGES * finalN; i++) {
if (codewords_sim[i] != codewords_true[i]) {
perror("wrong!!");
exit(-1);
}
}
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
NOF_MESSAGES / (elapsed_time / NOF_REPS),
NOF_MESSAGES * finalK / (elapsed_time / NOF_REPS),
NOF_MESSAGES * finalN / (elapsed_time / NOF_REPS));
printf("\nTest completed successfully!\n\n");
free(codewords_sim);
free(codewords_true);
free(messages);
srslte_ldpc_encoder_free(&encoder);
}

@ -0,0 +1,646 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_rm_chain_test.c
* \brief End-to-end test for LDPC encoder, rate-matcher, rate-dematcher and decoder.
*
* A batch of example messages is randomly generated, encoded, rate-matched, 2-PAM modulated,
* sent over an AWGN channel and, finally, rate-dematched and decoded by all three types of
* decoder. Transmitted and received messages are compared to estimate the WER.
* Multiple batches are simulated if the number of errors is not significant
* enough.
*
*
* Synopsis: **ldpc_rm_chain_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
* - **-e \<number\>** Codeword length after rate matching (set to 0 [default] for full rate).
* - **-f \<number\>** Number of filler bits (Default 17).
* - **-r \<number\>** Redundancy version {0-3}.
* - **-m \<number\>** Modulation type BPSK = 0, QPSK =1, QAM16 = 2, QAM64 = 3, QAM256 = 4.
* - **-M \<number\>** Limited buffer size.
* - **-s \<number\>** SNR in dB(Default 3 dB).
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/channel/ch_awgn.h"
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/fec/ldpc/ldpc_rm.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/random.h"
#include "srslte/phy/utils/vector.h"
srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
uint32_t lift_size = 2; /*!< \brief Lifting Size. */
uint32_t rm_length = 0; /*!< \brief Codeword length after rate matching. */
uint32_t F = 22 - 5; /*!< \brief Number of filler bits in each CBS. */
uint32_t E = 14000; /*!< \brief Rate-matched Codeword size. */
uint8_t rv = 0; /*!< \brief Redundancy version {0-3}. */
mod_type_t mod_type = BPSK; /*!< \brief Modulation type: BPSK, QPSK, QAM16, QAM64, QAM256 = 4 */
uint32_t Nref = 0; /*!< \brief Limited buffer size. */
float snr = 0; /*!< \brief Signal-to-Noise Ratio [dB]. */
int finalK = 0; /*!< \brief Number of uncoded bits (message length, including punctured and filler bits). */
int finalN = 0; /*!< \brief Number of coded bits (codeword length). */
#define BATCH_SIZE 100 /*!< \brief Number of codewords in a batch. */
#define MAX_N_BATCH 10000 /*!< \brief Max number of simulated batches. */
#define REQ_ERRORS 100 /*!< \brief Minimum number of errors for a significant simulation. */
#define MS_SF 0.75f /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
/*!
* \brief Prints test help when wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX] [-eX] [-fX] [-rX] [-mX] [-MX] [sX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
printf("\t-e Word length after rate matching [Default %d (no rate matching i.e. E = N - F)]\n", rm_length);
printf("\t-f Filler bits size (F) [Default %d]\n", F);
printf("\t-r Redundancy version (rv) [Default %d]\n", rv);
printf("\t-m Modulation_type BPSK=0, QPSK=1, 16QAM=2, 64QAM=3, 256QAM = 4 [Default %d]\n", mod_type);
printf("\t-M Limited buffer size (Nref) [Default = %d (normal buffer Nref = N)]\n", Nref);
printf("\t-s SNR [dB, Default %.1f dB]\n", snr);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:e:f:r:m:M:s:")) != -1) {
switch (opt) {
case 'b':
base_graph = (int)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (int)strtol(optarg, NULL, 10);
break;
case 'e':
rm_length = (int)strtol(optarg, NULL, 10);
break;
case 'f':
F = (uint32_t)strtol(optarg, NULL, 10);
break;
case 'r':
rv = (uint8_t)strtol(optarg, NULL, 10);
break;
case 'm':
mod_type = (mod_type_t)strtol(optarg, NULL, 10);
break;
case 'M':
Nref = (uint32_t)strtol(optarg, NULL, 10);
break;
case 's':
snr = strtod(optarg, NULL);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Prints decoder statistics.
*/
void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time);
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* messages_true = NULL;
uint8_t* messages_sim_f = NULL;
uint8_t* messages_sim_s = NULL;
uint8_t* messages_sim_c = NULL;
uint8_t* messages_sim_c_flood = NULL;
uint8_t* messages_sim_avx = NULL;
uint8_t* messages_sim_avx_flood = NULL;
uint8_t* codewords = NULL;
uint8_t* rm_codewords = NULL;
float* rm_symbols = NULL;
int16_t* rm_symbols_s = NULL;
int8_t* rm_symbols_c = NULL;
float* symbols = NULL; // unrm_symbols
int16_t* symbols_s = NULL; // unrm_symbols
int8_t* symbols_c = NULL; // unrm_symbols
int i = 0;
int j = 0;
parse_args(argc, argv);
// create an LDPC encoder
srslte_ldpc_encoder_t encoder;
#ifdef LV_HAVE_AVX2
if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_AVX2, base_graph, lift_size) != 0) {
perror("encoder init");
exit(-1);
}
#else // no AVX2
if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
perror("encoder init");
exit(-1);
}
#endif // LV_HAVE_AVX2
// create a LDPC rate DeMatcher
finalK = encoder.liftK;
finalN = encoder.liftN - 2 * lift_size;
if (rm_length == 0) {
rm_length = finalN - F;
}
if (Nref == 0) {
Nref = finalN;
}
// create a LDPC rate Matcher
srslte_ldpc_rm_t rm_tx;
if (srslte_ldpc_rm_tx_init(&rm_tx) != 0) {
perror("rate matcher init");
exit(-1);
}
// create a LDPC rate DeMatcher
srslte_ldpc_rm_t rm_rx;
if (srslte_ldpc_rm_rx_init_f(&rm_rx) != 0) {
perror("rate dematcher init");
exit(-1);
}
// create a LDPC rate DeMatcher (int16_t)
srslte_ldpc_rm_t rm_rx_s;
if (srslte_ldpc_rm_rx_init_s(&rm_rx_s) != 0) {
perror("rate dematcher init (int16_t)");
exit(-1);
}
// create a LDPC rate DeMatcher (int8_t)
srslte_ldpc_rm_t rm_rx_c;
if (srslte_ldpc_rm_rx_init_c(&rm_rx_c) != 0) {
perror("rate dematcher init (int8_t)");
exit(-1);
}
// create an LDPC decoder (float)
srslte_ldpc_decoder_t decoder_f;
if (srslte_ldpc_decoder_init(&decoder_f, SRSLTE_LDPC_DECODER_F, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
// create an LDPC decoder (16 bit)
srslte_ldpc_decoder_t decoder_s;
if (srslte_ldpc_decoder_init(&decoder_s, SRSLTE_LDPC_DECODER_S, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init (int16_t)");
exit(-1);
}
// create an LDPC decoder (8 bit)
srslte_ldpc_decoder_t decoder_c;
if (srslte_ldpc_decoder_init(&decoder_c, SRSLTE_LDPC_DECODER_C, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init (int8_t)");
exit(-1);
}
// create an LDPC decoder (8 bit, flooded)
srslte_ldpc_decoder_t decoder_c_flood;
if (srslte_ldpc_decoder_init(&decoder_c_flood, SRSLTE_LDPC_DECODER_C_FLOOD, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
#ifdef LV_HAVE_AVX2
// create an LDPC decoder (8 bit, AVX2 version)
srslte_ldpc_decoder_t decoder_avx;
if (srslte_ldpc_decoder_init(&decoder_avx, SRSLTE_LDPC_DECODER_C_AVX2, base_graph, lift_size, MS_SF) != 0) {
perror("decoder init");
exit(-1);
}
// create an LDPC decoder (8 bit, flooded scheduling, AVX2 version)
srslte_ldpc_decoder_t decoder_avx_flood;
if (srslte_ldpc_decoder_init(&decoder_avx_flood, SRSLTE_LDPC_DECODER_C_AVX2_FLOOD, base_graph, lift_size, MS_SF) !=
0) {
perror("decoder init");
exit(-1);
}
#endif // LV_HAVE_AVX2
// create a random generator
srslte_random_t random_gen = srslte_random_init(0);
printf("Test LDPC chain:\n");
printf(" Base Graph -> BG%d\n", encoder.bg + 1);
printf(" Lifting Size -> %d\n", encoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
printf(" Base code rate -> K/(N-2) = %d/%d = 1/%d\n",
encoder.liftK,
encoder.liftN - 2 * lift_size,
encoder.bg == BG1 ? 3 : 5);
printf("\n");
printf(" Codeblock length -> K = %d\n", finalK);
printf(" Codeword length -> N = %d\n", finalN);
printf(" Rate matched codeword length -> E = %d\n", rm_length);
printf(" Number of filler bits -> F = %d\n", F);
printf(" Redundancy version -> rv = %d\n", rv);
printf(" Final code rate -> (K-F)/E = (%d - %d)/%d = %.3f\n",
encoder.liftK,
F,
rm_length,
1.0 * (encoder.liftK - F) / rm_length);
printf("\n Signal-to-Noise Ratio -> %.2f dB\n", snr);
messages_true = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_f = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_s = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_c = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_c_flood = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_avx = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
messages_sim_avx_flood = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
codewords = malloc(finalN * BATCH_SIZE * sizeof(uint8_t));
rm_codewords = malloc(rm_length * BATCH_SIZE * sizeof(uint8_t));
rm_symbols = malloc(rm_length * BATCH_SIZE * sizeof(float));
rm_symbols_s = malloc(rm_length * BATCH_SIZE * sizeof(uint16_t));
rm_symbols_c = malloc(rm_length * BATCH_SIZE * sizeof(uint8_t));
symbols = malloc(finalN * BATCH_SIZE * sizeof(float));
symbols_s = malloc(finalN * BATCH_SIZE * sizeof(int16_t));
symbols_c = malloc(finalN * BATCH_SIZE * sizeof(int8_t));
if (!messages_true || !messages_sim_f || !messages_sim_s || !messages_sim_c || //
!messages_sim_avx || !messages_sim_c_flood || !messages_sim_avx_flood || //
!codewords || !rm_codewords || !rm_symbols || !rm_symbols_s || !rm_symbols_c || !symbols || !symbols_s ||
!symbols_c) {
perror("malloc");
exit(-1);
}
int i_bit = 0;
int i_batch = 0;
struct timeval t[3];
double elapsed_time_enc = 0;
double elapsed_time_dec_f = 0;
double elapsed_time_dec_s = 0;
double elapsed_time_dec_c = 0;
double elapsed_time_dec_c_flood = 0;
double elapsed_time_dec_avx = 0;
double elapsed_time_dec_avx_flood = 0;
int n_error_words_f = 0;
int n_error_words_s = 0;
int n_error_words_c = 0;
int n_error_words_c_flood = 0;
int n_error_words_avx = 0;
int n_error_words_avx_flood = 0;
float noise_std_dev = srslte_convert_dB_to_amplitude(-snr);
int16_t inf15 = (1U << 14U) - 1;
float gain_s = inf15 * noise_std_dev / 20 / (1 / noise_std_dev + 2);
int8_t inf7 = (1U << 6U) - 1;
float gain_c = inf7 * noise_std_dev / 8 / (1 / noise_std_dev + 2);
printf("\nBatch:\n ");
while (((n_error_words_f < REQ_ERRORS) || (n_error_words_s < REQ_ERRORS) || (n_error_words_c < REQ_ERRORS)) &&
(i_batch < MAX_N_BATCH)) {
i_batch++;
if (!(i_batch % 10)) {
printf("%8d", i_batch);
if (!(i_batch % 90)) {
printf("\n ");
}
}
/* generate data_tx */
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK - F; j++) {
messages_true[i * finalK + j] = srslte_random_uniform_int_dist(random_gen, 0, 1);
}
for (; j < finalK; j++) {
messages_true[i * finalK + j] = FILLER_BIT;
}
}
// lDPC Encoding
// compute the number of symbols that we need to encode/decode: at least (E + F) if E+F < N,
unsigned int n_useful_symbols = (E + F);
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_encoder_encode(
&encoder, messages_true + j * finalK, codewords + j * finalN, finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_enc += t[0].tv_sec + 1e-6 * t[0].tv_usec;
// rate matching
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_rm_tx(&rm_tx,
codewords + j * finalN,
rm_codewords + j * rm_length,
rm_length,
base_graph,
lift_size,
rv,
mod_type,
Nref);
}
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < rm_length; j++) {
rm_symbols[i * rm_length + j] = 1 - 2 * rm_codewords[i * rm_length + j];
}
}
// Apply AWGN
srslte_ch_awgn_f(rm_symbols, rm_symbols, noise_std_dev, BATCH_SIZE * rm_length);
// Convert symbols into LLRs
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < rm_length; j++) {
rm_symbols[i * rm_length + j] = rm_symbols[i * rm_length + j] * 2 / (noise_std_dev * noise_std_dev);
}
}
for (i = 0; i < BATCH_SIZE; i++) {
if (srslte_ldpc_rm_rx_f(&rm_rx,
rm_symbols + i * rm_length,
symbols + i * finalN,
rm_length,
F,
base_graph,
lift_size,
rv,
mod_type,
Nref)) {
exit(-1);
}
}
//////// Floating point
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_f(&decoder_f, symbols + j * finalN, messages_sim_f + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_f += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_f[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_f++;
break;
}
}
}
//////// Fixed point - 16 bit
// Quantize LLRs with 16 bits
srslte_vec_quant_fs(rm_symbols, rm_symbols_s, gain_s, 0, inf15, BATCH_SIZE * rm_length);
// Rate dematcher
for (i = 0; i < BATCH_SIZE; i++) {
if (srslte_ldpc_rm_rx_s(&rm_rx_s,
rm_symbols_s + i * rm_length,
symbols_s + i * finalN,
rm_length,
F,
base_graph,
lift_size,
rv,
mod_type,
Nref)) {
exit(-1);
}
}
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_s(&decoder_s, symbols_s + j * finalN, messages_sim_s + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_s += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_s[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_s++;
break;
}
}
}
//////// Fixed point - 8 bit
// Quantize LLRs with 8 bits
srslte_vec_quant_fc(rm_symbols, rm_symbols_c, gain_c, 0, inf7, BATCH_SIZE * rm_length);
// Rate dematcher
for (i = 0; i < BATCH_SIZE; i++) {
if (srslte_ldpc_rm_rx_c(&rm_rx_c,
rm_symbols_c + i * rm_length,
symbols_c + i * finalN,
rm_length,
F,
base_graph,
lift_size,
rv,
mod_type,
Nref)) {
exit(-1);
}
}
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(&decoder_c, symbols_c + j * finalN, messages_sim_c + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_c += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_c[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_c++;
break;
}
}
}
//////// Fixed point - 8 bit, flooded scheduling
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(
&decoder_c_flood, symbols_c + j * finalN, messages_sim_c_flood + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_c_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_c_flood[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_c_flood++;
break;
}
}
}
#ifdef LV_HAVE_AVX2
//////// Fixed point - 8 bit - AVX2 version
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(
&decoder_avx, symbols_c + j * finalN, messages_sim_avx + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_avx += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_avx[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_avx++;
break;
}
}
}
//////// Fixed point - 8 bit, flooded scheduling - AVX2 version
// Recover messages
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_ldpc_decoder_decode_c(
&decoder_avx_flood, symbols_c + j * finalN, messages_sim_avx_flood + j * finalK, n_useful_symbols);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_avx_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < finalK; j++) {
i_bit = i * finalK + j;
if (messages_sim_avx_flood[i_bit] != (1U & messages_true[i_bit])) {
n_error_words_avx_flood++;
break;
}
}
}
#endif // LV_HAVE_AVX2
}
printf("\nEstimated throughput encoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
i_batch * BATCH_SIZE / elapsed_time_enc,
i_batch * BATCH_SIZE * finalK / elapsed_time_enc,
i_batch * BATCH_SIZE * finalN / elapsed_time_enc);
print_decoder("FLOATING POINT", i_batch, n_error_words_f, elapsed_time_dec_f);
print_decoder("FIXED POINT (16 bits)", i_batch, n_error_words_s, elapsed_time_dec_s);
print_decoder("FIXED POINT (8 bits)", i_batch, n_error_words_c, elapsed_time_dec_c);
print_decoder("FIXED POINT (8 bits, flooded scheduling)", i_batch, n_error_words_c_flood, elapsed_time_dec_c_flood);
#ifdef LV_HAVE_AVX2
print_decoder("FIXED POINT (8 bits - AVX2)", i_batch, n_error_words_avx, elapsed_time_dec_avx);
print_decoder(
"FIXED POINT (8 bits, flooded scheduling - AVX2)", i_batch, n_error_words_avx_flood, elapsed_time_dec_avx_flood);
#endif // LV_HAVE_AVX2
if (n_error_words_s > 10 * n_error_words_f) {
perror("16-bit performance too low!");
exit(-1);
}
if (n_error_words_c > 10 * n_error_words_f) {
perror("8-bit performance too low!");
exit(-1);
}
printf("\nTest completed successfully!\n\n");
free(symbols);
free(symbols_s);
free(symbols_c);
free(rm_symbols);
free(rm_symbols_s);
free(rm_symbols_c);
free(rm_codewords);
free(codewords);
free(messages_sim_avx);
free(messages_sim_c_flood);
free(messages_sim_c);
free(messages_sim_s);
free(messages_sim_f);
free(messages_true);
srslte_random_free(random_gen);
#ifdef LV_HAVE_AVX2
srslte_ldpc_decoder_free(&decoder_avx);
#endif // LV_HAVE_AVX2
srslte_ldpc_decoder_free(&decoder_c_flood);
srslte_ldpc_decoder_free(&decoder_c);
srslte_ldpc_decoder_free(&decoder_s);
srslte_ldpc_decoder_free(&decoder_f);
srslte_ldpc_encoder_free(&encoder);
srslte_ldpc_rm_tx_free(&rm_tx);
srslte_ldpc_rm_rx_free_f(&rm_rx);
srslte_ldpc_rm_rx_free_s(&rm_rx_s);
srslte_ldpc_rm_rx_free_c(&rm_rx_c);
}
void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time)
{
printf("\n**** %s ****", title);
printf("\nEstimated word error rate:\n %e (%d errors)\n", (double)n_errors / n_batches / BATCH_SIZE, n_errors);
printf("Estimated throughput decoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
n_batches * BATCH_SIZE / elapsed_time,
n_batches * BATCH_SIZE * finalK / elapsed_time,
n_batches * BATCH_SIZE * finalN / elapsed_time);
}

@ -0,0 +1,355 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file ldpc_rm_test.c
* \brief Unit test for the LDPC RateMatcher and RateDematcher.
*
* A batch of example messages is randomly generated, encoded, rate-matched, 2-PAM modulated,
* and, finally, rate-dematched and decoded by all three types of
* rate dematchers (float, int16_t, int8_t).
* The rate-dematched codeword is compared against the transmitted codeword
*
* Synopsis: **ldpc_rm_test [options]**
*
* Options:
* - **-b \<number\>** Base Graph (1 or 2. Default 1).
* - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
* - **-e \<number\>** Codeword length after rate matching (set to 0 [default] for full rate).
* - **-f \<number\>** Number of filler bits (Default 17).
* - **-r \<number\>** Redundancy version {0-3}.
* - **-m \<number\>** Modulation type BPSK = 0, QPSK =1, QAM16 = 2, QAM64 = 3, QAM256 = 4.
* - **-M \<number\>** Limited buffer size.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "srslte/phy/fec/ldpc/ldpc_common.h"
#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
#include "srslte/phy/fec/ldpc/ldpc_rm.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/random.h"
srslte_basegraph_t base_graph = BG2; /*!< \brief Base Graph (BG1 or BG2). */
uint32_t lift_size = 208; /*!< \brief Lifting Size. */
uint32_t C = 2; /*!< \brief Number of code block segments (CBS). */
uint32_t F = 10; /*!< \brief Number of filler bits in each CBS. */
uint32_t E = 0; /*!< \brief Rate-matched codeword size (E = 0, no rate matching). */
uint8_t rv = 0; /*!< \brief Redundancy version {0-3}. */
mod_type_t mod_type = QPSK; /*!< \brief Modulation type: BPSK, QPSK, QAM16, QAM64, QAM256. */
uint32_t Nref = 0; /*!< \brief Limited buffer size.*/
uint32_t N = 0; /*!< \brief Codeblock size (including punctured and filler bits). */
uint32_t K = 0; /*!< \brief Codeword size. */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-bX] [-lX] [-eX] [-fX] [-rX] [-mX] [-MX]\n", prog);
printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
printf("\t-l Lifting Size [Default %d]\n", lift_size);
printf("\t-e Word length after rate matching [Default %d (no rate matching i.e. E = N - F)]\n", E);
printf("\t-f Filler bits size (F) [Default %d]\n", F);
printf("\t-r Redundancy version (rv) [Default %d]\n", rv);
printf("\t-m Modulation_type BPSK=0, QPSK=1, 16QAM=2, 64QAM=3, 256QAM = 4 [Default %d]\n", mod_type);
printf("\t-M Limited buffer size (Nref) [Default = %d (normal buffer Nref = N)]\n", Nref);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "b:l:e:f:r:m:M:")) != -1) {
switch (opt) {
case 'b':
base_graph = (uint32_t)strtol(optarg, NULL, 10) - 1;
break;
case 'l':
lift_size = (uint32_t)strtol(optarg, NULL, 10);
break;
case 'e':
E = (uint32_t)strtol(optarg, NULL, 10);
break;
case 'f':
F = (uint32_t)strtol(optarg, NULL, 10);
break;
case 'r':
rv = (uint8_t)strtol(optarg, NULL, 10);
break;
case 'm':
mod_type = (mod_type_t)strtol(optarg, NULL, 10);
break;
case 'M':
Nref = (uint32_t)strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Main test function.
*/
int main(int argc, char** argv)
{
uint8_t* codeblocks = NULL; /* codeblocks including filler bits */
uint8_t* codewords = NULL;
uint8_t* rm_codewords = NULL;
float* rm_symbols = NULL;
int16_t* rm_symbols_s = NULL;
int8_t* rm_symbols_c = NULL;
float* unrm_symbols = NULL;
int16_t* unrm_symbols_s = NULL;
int8_t* unrm_symbols_c = NULL;
uint32_t i = 0;
uint32_t r = 0;
int error = 0;
parse_args(argc, argv);
srslte_random_t random_gen = srslte_random_init(0);
// create an LDPC encoder
srslte_ldpc_encoder_t encoder;
if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
perror("encoder init");
exit(-1);
}
K = encoder.liftK;
N = encoder.liftN - 2 * lift_size;
if (E == 0) {
E = N - F;
}
if (Nref == 0) {
Nref = N;
}
// create a LDPC rate Matcher
srslte_ldpc_rm_t rm_tx;
if (srslte_ldpc_rm_tx_init(&rm_tx) != 0) {
perror("rate matcher init");
exit(-1);
}
// create a LDPC rate DeMatcher
srslte_ldpc_rm_t rm_rx;
if (srslte_ldpc_rm_rx_init_f(&rm_rx) != 0) {
perror("rate dematcher init");
exit(-1);
}
// create a LDPC rate DeMatcher (int16_t)
srslte_ldpc_rm_t rm_rx_s;
if (srslte_ldpc_rm_rx_init_s(&rm_rx_s) != 0) {
perror("rate dematcher init (int16_t)");
exit(-1);
}
// create a LDPC rate DeMatcher (int8_t)
srslte_ldpc_rm_t rm_rx_c;
if (srslte_ldpc_rm_rx_init_c(&rm_rx_c) != 0) {
perror("rate dematcher init (int8_t)");
exit(-1);
}
printf("Test LDPC chain:\n");
printf(" Base Graph -> BG%d\n", encoder.bg + 1);
printf(" Lifting Size -> %d\n", encoder.ls);
printf(" Protograph -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
printf(" Lifted graph -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
printf(" Base code rate -> K/(N-2) = %d/%d = 1/%d\n",
encoder.liftK,
encoder.liftN - 2 * lift_size,
encoder.bg == BG1 ? 3 : 5);
printf("\n");
printf(" Codeblock length -> K = %d\n", K);
printf(" Codeword length -> N = %d\n", N);
printf(" Rate matched codeword length -> E = %d\n", E);
printf(" Number of filler bits -> F = %d\n", F);
printf(" Redundancy version -> rv = %d\n", rv);
printf(" Final code rate -> (K-F)/E = (%d - %d)/%d = %.3f\n", encoder.liftK, F, E, 1.0 * (encoder.liftK - F) / E);
printf("\n");
codeblocks = malloc(C * K * sizeof(uint8_t));
codewords = malloc(C * N * sizeof(uint8_t));
rm_codewords = malloc(C * E * sizeof(uint8_t));
rm_symbols = malloc(C * E * sizeof(float));
rm_symbols_s = malloc(C * E * sizeof(int16_t));
rm_symbols_c = malloc(C * E * sizeof(int8_t));
unrm_symbols = malloc(C * N * sizeof(float));
unrm_symbols_s = malloc(C * N * sizeof(int16_t));
unrm_symbols_c = malloc(C * N * sizeof(int8_t));
if (!codeblocks || !codewords || !rm_codewords || !rm_symbols || !rm_symbols_s || !rm_symbols_c || !unrm_symbols ||
!unrm_symbols_s || !unrm_symbols_c) {
perror("malloc");
exit(-1);
}
// Generate random bits
for (r = 0; r < C; r++) {
for (i = 0; i < K - F; i++) {
// codeblock_seg[i] = rand() % 2;
codeblocks[r * K + i] = srslte_random_uniform_int_dist(random_gen, 0, 1);
}
for (; i < K; i++) { // add filler bits
codeblocks[r * K + i] = FILLER_BIT;
}
}
// lDPC Encoding
// compute the number of symbols that we need to encode/decode: at least (E + F) if E+F < N,
unsigned int n_useful_symbols = (E + F);
// Encode messages
// gettimeofday(&t[1], NULL);
for (r = 0; r < C; r++) {
if (srslte_ldpc_encoder_encode(&encoder, codeblocks + r * K, codewords + r * N, K, n_useful_symbols)) {
exit(-1);
}
// LDPC rate matching
if (srslte_ldpc_rm_tx(
&rm_tx, codewords + r * N, rm_codewords + r * E, E, base_graph, lift_size, rv, mod_type, Nref)) {
exit(-1);
}
// Modulate codewords
// quantization
int16_t inf16 = (1U << 15U) - 1;
int8_t inf8 = (1U << 7U) - 1;
for (i = 0; i < E; i++) {
rm_symbols[r * E + i] = rm_codewords[r * E + i] ? -1 : 1;
rm_symbols_s[r * E + i] = rm_codewords[r * E + i] ? -1 : 1;
rm_symbols_c[r * E + i] = rm_codewords[r * E + i] ? -1 : 1;
}
if (srslte_ldpc_rm_rx_f(
&rm_rx, rm_symbols + r * E, unrm_symbols + r * N, E, F, base_graph, lift_size, rv, mod_type, Nref)) {
exit(-1);
}
if (srslte_ldpc_rm_rx_s(
&rm_rx_s, rm_symbols_s + r * E, unrm_symbols_s + r * N, E, F, base_graph, lift_size, rv, mod_type, Nref)) {
exit(-1);
}
if (srslte_ldpc_rm_rx_c(
&rm_rx_c, rm_symbols_c + r * E, unrm_symbols_c + r * N, E, F, base_graph, lift_size, rv, mod_type, Nref)) {
exit(-1);
}
// Check self correctness for the float version
error = 0;
for (i = 0; i < N; i++) {
if (((unrm_symbols[i + r * N] == 0) && (codewords[i + r * N] != FILLER_BIT)) ||
((unrm_symbols[i + r * N] == INFINITY) && (codewords[i + r * N] == FILLER_BIT)) ||
((unrm_symbols[i + r * N] > 0) && (codewords[i + r * N] == 0)) ||
((unrm_symbols[i + r * N] < 0) && (codewords[i + r * N]))) {
// any of these cases are ok
} else {
error = -1;
break;
}
}
if (error < 0) {
printf("Error in rate-matching block at code segment: %d\n unrm_symb[%d] = %2.1f\n codeword[%d] = %d\n",
r,
i,
unrm_symbols[i + r * N],
i,
codewords[i + r * N]);
} else {
printf(" No errors in rate-matching block\n");
}
// check against float implementation
for (i = 0; i < N; i++) {
if (((int16_t)unrm_symbols[i + r * N] == unrm_symbols_s[i + r * N]) ||
(unrm_symbols[i + r * N] == INFINITY && unrm_symbols_s[i + r * N] == inf16) ||
((int16_t)unrm_symbols[i + r * N] == 0 && unrm_symbols_s[i + r * N] == 0)) {
} else {
error = -2;
break;
}
}
if (error == -2) {
printf("Error in rate-matching block (int16_t) at code segment: %d\n unrm_symb[%d] = %d\n unrm_symb_s[%d] = %d\n",
r,
i,
(int16_t)unrm_symbols[i + r * N],
i,
unrm_symbols_s[i + r * N]);
} else {
printf(" No errors in rate-matching block (int16_t)\n");
}
// check against float implementation
for (i = 0; i < N; i++) {
if (((int8_t)unrm_symbols[i + r * N] == unrm_symbols_c[i + r * N]) ||
(unrm_symbols[i + r * N] == INFINITY && unrm_symbols_c[i + r * N] == inf8)) {
} else {
error = -3;
break;
}
}
if (error == -3) {
printf(
"Error in rate-matching block (int8_t) at code segment: %d\n unrm_symb[%d] = %2.1f\n unrm_symb_c[%d] = %d\n",
r,
i,
unrm_symbols[i + r * N],
i,
unrm_symbols_c[i + r * N]);
} else {
printf(" No errors in rate-matching block: (int8_t)\n");
}
} // codeblocks r
free(unrm_symbols);
free(unrm_symbols_s);
free(unrm_symbols_c);
free(rm_symbols);
free(rm_symbols_s);
free(rm_symbols_c);
free(rm_codewords);
free(codewords);
free(codeblocks);
srslte_random_free(random_gen);
srslte_ldpc_encoder_free(&encoder);
srslte_ldpc_rm_tx_free(&rm_tx);
srslte_ldpc_rm_rx_free_f(&rm_rx);
srslte_ldpc_rm_rx_free_s(&rm_rx_s);
srslte_ldpc_rm_rx_free_c(&rm_rx_c);
return error;
}

@ -0,0 +1,21 @@
#
# Project: 5GCoding-SRS
# Author: Jesus Gomez (CTTC)
# Copyright: Software Radio Systems Limited
#
set(FEC_SOURCES ${FEC_SOURCES}
polar/polar_encoder.c
polar/polar_encoder_pipelined.c
polar/polar_encoder_avx2.c
polar/polar_decoder.c
polar/polar_decoder_ssc_all.c
polar/polar_decoder_ssc_f.c
polar/polar_decoder_ssc_s.c
polar/polar_decoder_ssc_c.c
polar/polar_decoder_ssc_c_avx2.c
polar/polar_decoder_vector.c
polar/polar_decoder_vector_avx2.c
PARENT_SCOPE)
add_subdirectory(test)

@ -0,0 +1,233 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder.c
* \brief Definition of the polar decoder.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* 5G uses a polar decoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
*
*/
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include "polar_decoder_ssc_c.h"
#include "polar_decoder_ssc_c_avx2.h"
#include "polar_decoder_ssc_f.h"
#include "polar_decoder_ssc_s.h"
#include "srslte/phy/fec/polar/polar_decoder.h"
#include "srslte/phy/utils/debug.h"
/*! SSC Polar decoder with float LLR inputs. */
static int decode_ssc_f(void* o, const float* symbols, uint8_t* data)
{
srslte_polar_decoder_t* q = o;
init_polar_decoder_ssc_f(q->ptr, symbols, data);
polar_decoder_ssc_f(q->ptr, data);
return 0;
}
/*! SSC Polar decoder with int16_t LLR inputs. */
static int decode_ssc_s(void* o, const int16_t* symbols, uint8_t* data)
{
srslte_polar_decoder_t* q = o;
init_polar_decoder_ssc_s(q->ptr, symbols, data);
polar_decoder_ssc_s(q->ptr, data);
return 0;
}
/*! SSC Polar decoder with int8_t LLR inputs. */
static int decode_ssc_c(void* o, const int8_t* symbols, uint8_t* data)
{
srslte_polar_decoder_t* q = o;
init_polar_decoder_ssc_c(q->ptr, symbols, data);
polar_decoder_ssc_c(q->ptr, data);
return 0;
}
#ifdef LV_HAVE_AVX2
/*! SSC Polar decoder AVX2 with int8_t LLR inputs . */
static int decode_ssc_c_avx2(void* o, const int8_t* symbols, uint8_t* data)
{
srslte_polar_decoder_t* q = o;
init_polar_decoder_ssc_c_avx2(q->ptr, symbols, data);
polar_decoder_ssc_c_avx2(q->ptr, data);
return 0;
}
#endif // LV_HAVE_AVX2
/*! Destructor of a (float) SSC polar decoder. */
static void free_ssc_f(void* o)
{
srslte_polar_decoder_t* q = o;
delete_polar_decoder_ssc_f(q->ptr);
}
/*! Destructor of a (int16_t) SSC polar decoder. */
static void free_ssc_s(void* o)
{
srslte_polar_decoder_t* q = o;
delete_polar_decoder_ssc_s(q->ptr);
}
/*! Destructor of a (int8_t) SSC polar decoder. */
static void free_ssc_c(void* o)
{
srslte_polar_decoder_t* q = o;
delete_polar_decoder_ssc_c(q->ptr);
}
#ifdef LV_HAVE_AVX2
/*! Destructor of a (int8_t, avx2) SSC polar decoder. */
static void free_ssc_c_avx2(void* o)
{
srslte_polar_decoder_t* q = o;
delete_polar_decoder_ssc_c_avx2(q->ptr);
}
#endif
/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with float LLR inputs. */
static int init_ssc_f(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
{
q->decode_f = decode_ssc_f;
q->free = free_ssc_f;
if ((q->ptr = create_polar_decoder_ssc_f(frozen_set, code_size_log, frozen_set_size)) == NULL) {
ERROR("create_polar_decoder_ssc_f failed\n");
free_ssc_f(q);
return -1;
}
return 0;
}
/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with uint16_t LLR inputs. */
static int init_ssc_s(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
{
q->decode_s = decode_ssc_s;
q->free = free_ssc_s;
if ((q->ptr = create_polar_decoder_ssc_s(frozen_set, code_size_log, frozen_set_size)) == NULL) {
ERROR("create_polar_decoder_ssc_s failed\n");
free_ssc_s(q);
return -1;
}
return 0;
}
/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with uint8_t LLR inputs. */
static int init_ssc_c(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
{
q->decode_c = decode_ssc_c;
q->free = free_ssc_c;
if ((q->ptr = create_polar_decoder_ssc_c(frozen_set, code_size_log, frozen_set_size)) == NULL) {
ERROR("create_polar_decoder_ssc_c failed\n");
free_ssc_c(q);
return -1;
}
return 0;
}
#ifdef LV_HAVE_AVX2
/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with uint8_t LLR inputs and AVX2
* instructions. */
static int
init_ssc_c_avx2(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
{
q->decode_c = decode_ssc_c_avx2;
q->free = free_ssc_c_avx2;
if ((q->ptr = create_polar_decoder_ssc_c_avx2(frozen_set, code_size_log, frozen_set_size)) == NULL) {
ERROR("create_polar_decoder_ssc_c failed\n");
free_ssc_c_avx2(q);
return -1;
}
return 0;
}
#endif
int srslte_polar_decoder_init(srslte_polar_decoder_t* q,
srslte_polar_decoder_type_t type,
uint16_t code_size_log,
uint16_t* frozen_set,
uint16_t frozen_set_size)
{
switch (type) {
case SRSLTE_POLAR_DECODER_SSC_F:
return init_ssc_f(q, frozen_set, code_size_log, frozen_set_size);
case SRSLTE_POLAR_DECODER_SSC_S:
return init_ssc_s(q, frozen_set, code_size_log, frozen_set_size);
case SRSLTE_POLAR_DECODER_SSC_C:
return init_ssc_c(q, frozen_set, code_size_log, frozen_set_size);
#ifdef LV_HAVE_AVX2
case SRSLTE_POLAR_DECODER_SSC_C_AVX2:
return init_ssc_c_avx2(q, frozen_set, code_size_log, frozen_set_size);
#endif
default:
ERROR("Decoder not implemented\n");
return -1;
}
return 0;
}
void srslte_polar_decoder_free(srslte_polar_decoder_t* q)
{
if (q->free) {
q->free(q);
}
memset(q, 0, sizeof(srslte_polar_decoder_t));
}
int srslte_polar_decoder_decode_f(srslte_polar_decoder_t* q, const float* llr, uint8_t* data_decoded)
{
return q->decode_f(q, llr, data_decoded);
}
int srslte_polar_decoder_decode_s(srslte_polar_decoder_t* q, const int16_t* llr, uint8_t* data_decoded)
{
return q->decode_s(q, llr, data_decoded);
}
int srslte_polar_decoder_decode_c(srslte_polar_decoder_t* q, const int8_t* llr, uint8_t* data_decoded)
{
return q->decode_c(q, llr, data_decoded);
}

@ -0,0 +1,102 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_all.c
* \brief Definition of the SSC polar decoder functions common to all implementations
*
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include "polar_decoder_ssc_all.h"
#include "../utils_avx2.h"
int init_node_type(const uint16_t* frozen_set, struct Params* param)
{
uint8_t s = 0; // stage
uint8_t* is_not_rate_0 = NULL;
uint8_t* is_rate_1 = NULL;
uint16_t* i_even = NULL;
uint16_t* i_odd = NULL;
uint16_t code_size = param->code_stage_size[param->code_size_log];
uint16_t code_half_size = param->code_stage_size[param->code_size_log - 1];
is_not_rate_0 = aligned_alloc(SRSLTE_AVX2_B_SIZE, 2 * code_size * sizeof(uint8_t));
if (!is_not_rate_0) {
perror("malloc");
return -1;
}
is_rate_1 = is_not_rate_0 + code_size;
i_odd = malloc(code_half_size * sizeof(uint16_t));
if (!i_odd) {
free(is_not_rate_0);
perror("malloc");
return -1;
}
i_even = malloc(code_half_size * sizeof(uint16_t));
if (!i_even) {
free(is_not_rate_0);
free(i_odd);
perror("malloc");
return -1;
}
memset(i_even, 0, code_half_size);
memset(i_odd, 0, code_half_size);
for (uint16_t i = 0; i < code_half_size; i++) {
i_even[i] = 2 * i;
i_odd[i] = 2 * i + 1;
}
// node_type = is_not_rate_0_node: 0 if rate 0, 1 if not rate 0.
memset(is_not_rate_0, 1, code_size);
memset(is_rate_1, 1, code_size);
for (uint16_t i = 0; i < param->frozen_set_size; i++) {
is_not_rate_0[frozen_set[i]] = 0;
is_rate_1[frozen_set[i]] = 0;
}
s = 0;
for (uint16_t j = 0; j < code_size; j++) {
param->node_type[s][j] = 3 * is_not_rate_0[j]; // 0 if rate-0; 2 if rate-r; 3 if rate 1
}
for (s = 1; s < param->code_size_log + 1; s++) {
for (uint16_t j = 0; j < param->code_stage_size[param->code_size_log - s]; j++) {
is_not_rate_0[j] = is_not_rate_0[i_even[j]] | is_not_rate_0[i_odd[j]]; // bitor
is_rate_1[j] = is_rate_1[i_even[j]] & is_rate_1[i_odd[j]]; // bitand
param->node_type[s][j] = 2 * is_not_rate_0[j] + is_rate_1[j]; // 0 if rate-0; 2 if rate-r; 3 if rate 1
}
}
free(i_even);
free(i_odd);
free(is_not_rate_0);
return 0;
}

@ -0,0 +1,78 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_all.h
* \brief Declaration of the SSC polar decoder functions common to all implementations
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_DECODER_SSC_ALL_H
#define POLAR_DECODER_SSC_ALL_H
#include <inttypes.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*!
* \brief Types of node in an SSC decoder.
*/
typedef enum {
RATE_0 = 0, /*!< \brief See function rate_0_node(). */
RATE_R = 2, /*!< \brief See function rate_r_node(). */
RATE_1 = 3, /*!< \brief See function rate_1_node(). */
} node_rate;
/*!
* \brief Stores constants.
*/
struct Params {
uint8_t code_size_log; /*!< \brief \f$log_2\f$ of code size. */
uint16_t* code_stage_size; /*!< \brief Number of bits of the encoder input/output vector at a given stage. */
uint16_t frozen_set_size; /*!< \brief Number of frozen bits. */
uint8_t** node_type; /*!< \brief Node type indicator 1 at all stages 3 (rate-1), 2 (rate-r), 0 (rate-0). */
};
/*!
* \brief Describes the state of a SSC polar decoder
*/
struct State {
uint8_t stage; /*!< \brief Current stage [0 - code_size_log] of the decoding algorithm. */
bool flag_finished; /*!< \brief True if the last bit is decoded. False otherwise. */
uint16_t*
active_node_per_stage; /*!< \brief Indicates the active node in each stage of the algorithm at a given moment. */
};
/*!
* Computes node types and initializes struct Params.
* \param[in] frozen_set The position of the frozen bits in the codeword.
* \param[in, out] param A struct Params
*/
int init_node_type(const uint16_t* frozen_set, struct Params* param);
#endif // polar_decoder_SSC_ALL_H

@ -0,0 +1,422 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_c.c
* \brief Definition of the SSC polar decoder inner functions working with
* 8-bit integer-valued LLRs.
*
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
// IMPORTANT: polar_decoder_SSC_c.c is exactly the polar_decoder_SSC_f.c except for:
// (1) #include "polar_decoder_ssc_c.h"
// (2) the naming of the external function, which finish with _s instead of _f
// (3) the initialization of them of the set functions in create_polar_decoder_ssc_s
// pp->f = srslte_vec_function_f_ccc;
// pp->g = srslte_vec_function_g_bccc;
// pp->xor = srslte_vec_xor_bbb;
// pp->hard_bit = srslte_vec_hard_bit_cc;
#include "polar_decoder_ssc_c.h"
#include "../utils_avx2.h"
#include "polar_decoder_vector.h"
#include "srslte/phy/fec/polar/polar_encoder.h"
#include "srslte/phy/utils/vector.h"
/*!
* \brief Describes an SSC polar decoder (8-bit version).
*/
struct pSSC_c {
int8_t** llr0; /*!< \brief Pointers to the upper half of LLRs values at all stages. */
int8_t** llr1; /*!< \brief Pointers to the lower half of LLRs values at all stages. */
uint8_t* est_bit; /*!< \brief Pointers to the temporary estimated bits. */
struct Params* param; /*!< \brief Pointer to a Params structure. */
struct State* state; /*!< \brief Pointer to a State. */
srslte_polar_encoder_t* enc; /*!< \brief Pointer to a srslte_polar_encoder_t. */
void (*f)(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len); /*!< \brief Pointer to the function-f. */
void (*g)(const uint8_t* b,
const int8_t* x,
const int8_t* y,
int8_t* z,
const uint16_t len); /*!< \brief Pointer to the function-g. */
void (*xor)(const uint8_t* x,
const uint8_t* y,
uint8_t* z,
const uint32_t len); /*!< \brief Pointer to the function-g. */
void (*hard_bit)(const int8_t* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
};
/*!
* Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
* Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
* return the associated \f$2^s\f$ estimated bits.
*
*/
static void simplified_node(void* p, uint8_t* message);
/*!
* All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
* pointer to point to the next active node. It is assumed that message bits are initialized to 0.
*
*/
static void rate_0_node(void* p);
/*!
* ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
* making a hard decision on them.
* ::RATE_1 nodes also update message bits vector.
*
*/
static void rate_1_node(void* p, uint8_t* message);
/*!
* ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bit by calling
* the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
* At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
* with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
* This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
* updates \a estbits memory space for stage \f$(s + 1)\f$.
*
*/
static void rate_r_node(void* p, uint8_t* message);
int init_polar_decoder_ssc_c(void* p, const int8_t* input_llr, uint8_t* data_decoded)
{
struct pSSC_c* pp = p;
if (p == NULL) {
return -1;
}
uint8_t code_size_log = pp->param->code_size_log; // code_size_log.
int16_t code_size = pp->param->code_stage_size[code_size_log];
int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
// Initializes the data_decoded_vector to all zeros
memset(data_decoded, 0, code_size);
// Initialize est_bit vector to all zeros
memset(pp->est_bit, 0, code_size);
// Initializes LLR buffer for the last stage/level with the input LLRs values
for (uint16_t i = 0; i < code_half_size; i++) {
pp->llr0[code_size_log][i] = input_llr[i];
pp->llr1[code_size_log][i] = input_llr[i + code_half_size];
}
// Initializes the state of the decoding tree
pp->state->stage = code_size_log + 1; // start from the only one node at the last stage + 1.
for (uint16_t i = 0; i < code_size_log + 1; i++) {
pp->state->active_node_per_stage[i] = 0;
}
pp->state->flag_finished = false;
return 0;
}
int polar_decoder_ssc_c(void* p, uint8_t* data_decoded)
{
if (p == NULL) {
return -1;
}
simplified_node(p, data_decoded);
return 0;
}
void delete_polar_decoder_ssc_c(void* p)
{
struct pSSC_c* pp = p;
if (p != NULL) {
free(pp->llr0[0]); // remove LLR buffer.
free(pp->llr0);
free(pp->llr1);
free(pp->param->node_type[0]);
free(pp->param->node_type);
free(pp->est_bit); // remove estbits buffer.
free(pp->param->code_stage_size);
free(pp->param);
free(pp->state->active_node_per_stage);
free(pp->state);
srslte_polar_encoder_free(pp->enc);
free(pp->enc);
free(pp);
}
}
void* create_polar_decoder_ssc_c(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
{
struct pSSC_c* pp = NULL; // pointer to the polar decoder instance
// allocate memory to the polar decoder instance
if ((pp = malloc(sizeof(struct pSSC_c))) == NULL) {
return NULL;
}
// set functions
pp->f = srslte_vec_function_f_ccc;
pp->g = srslte_vec_function_g_bccc;
pp->xor = srslte_vec_xor_bbb;
pp->hard_bit = srslte_vec_hard_bit_cc;
// encoder of maximum size
if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
free(pp);
return NULL;
}
srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
// algorithm constants/parameters
if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
free(pp->enc);
free(pp);
return NULL;
}
if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
pp->param->code_stage_size[0] = 1;
for (uint8_t i = 1; i < code_size_log + 1; i++) {
pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
}
pp->param->code_size_log = code_size_log;
// state -- initialized in polar_decoder_ssc_init
if ((pp->state = malloc(sizeof(struct State))) == NULL) {
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
if ((pp->state->active_node_per_stage = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// allocates memory for estimated bits per stage
uint16_t est_bits_size = pp->param->code_stage_size[code_size_log];
pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bits_size); // every 32 chars are aligned
// allocate memory for LLR pointers.
pp->llr0 = malloc((code_size_log + 1) * sizeof(int8_t*));
pp->llr1 = malloc((code_size_log + 1) * sizeof(int8_t*));
// There are LLR buffers for n = 0 to n = code_size_log. Each with size 2^n. Thus,
// the total memory needed is 2^(n+1)-1.
// Only the stages starting at multiples of SRSLTE_AVX2_B_SIZE are aligned.
// Let n_simd_llr be the exponent of the SIMD size in nummer of LLRs.
// i.e. in a SIMD instruction we can load 2^(n_simd_llr) LLR values
// then the memory for stages s >= n_simd_llr - 1 is aligned.
// but only the operations at stages s > n_simd_llr have all the inputs aligned.
uint8_t n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
uint16_t llr_all_stages = 1U << n_llr_all_stages;
pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(int8_t)); // 32*8=256
// allocate memory to the polar decoder instance
if (pp->llr0[0] == NULL) {
free(pp->est_bit);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// initialize all LLR pointers
pp->llr1[0] = pp->llr0[0] + 1;
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->llr0[s] = pp->llr0[0] + pp->param->code_stage_size[s];
pp->llr1[s] = pp->llr0[0] + pp->param->code_stage_size[s] + pp->param->code_stage_size[s - 1];
}
// allocate memory for node type pointers, one per stage.
pp->param->frozen_set_size = frozen_set_size;
pp->param->node_type = malloc((code_size_log + 1) * sizeof(uint8_t*));
// allocate memory to node_type_ssc. Stage s has 2^(N-s) nodes s=0,...,N.
// Thus, same size as LLRs all stages.
pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
if (pp->param->node_type[0] == NULL) {
free(pp->param->node_type);
free(pp->est_bit);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
}
init_node_type(frozen_set, pp->param);
return pp;
}
static void simplified_node(void* p, uint8_t* message)
{
struct pSSC_c* pp = p;
pp->state->stage--; // to child node.
uint8_t stage = pp->state->stage;
uint16_t bit_pos = pp->state->active_node_per_stage[stage];
switch (pp->param->node_type[stage][bit_pos]) {
case RATE_1:
rate_1_node(pp, message);
break;
case RATE_0:
rate_0_node(pp);
break;
case RATE_R:
rate_r_node(pp, message);
break;
default:
printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
exit(-1);
break;
}
pp->state->stage++; // to parent node.
}
static void rate_0_node(void* p)
{
struct pSSC_c* pp = p;
uint8_t code_size_log = pp->param->code_size_log; // code_size_log.
int16_t code_size = pp->param->code_stage_size[code_size_log];
uint16_t bit_pos = pp->state->active_node_per_stage[0];
uint8_t stage = pp->state->stage;
if (bit_pos == code_size - 1) {
pp->state->flag_finished = true;
} else {
// update active node at all the stages
for (uint8_t i = 0; i <= stage; i++) {
pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
}
}
}
static void rate_1_node(void* p, uint8_t* message)
{
struct pSSC_c* pp = p;
uint8_t stage = pp->state->stage; // for SSC decoder rate 1 nodes are always at stage 0.
uint16_t bit_pos = pp->state->active_node_per_stage[0];
uint16_t code_size = pp->param->code_stage_size[pp->param->code_size_log];
uint16_t code_stage_size = pp->param->code_stage_size[stage];
uint8_t* codeword = pp->est_bit + bit_pos;
int8_t* LLR = pp->llr0[stage];
pp->hard_bit(LLR, codeword, code_stage_size);
if (stage != 0) {
srslte_polar_encoder_encode(pp->enc, codeword, message + bit_pos, stage);
} else {
message[bit_pos] = codeword[0];
}
// update active node at all the stages
for (uint8_t i = 0; i <= stage; i++) {
pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
}
// check if this is the last bit
if (pp->state->active_node_per_stage[0] == code_size) {
pp->state->flag_finished = true;
}
}
static void rate_r_node(void* p, uint8_t* message)
{
struct pSSC_c* pp = p;
uint8_t* estbits0 = NULL;
uint8_t* estbits1 = NULL;
uint16_t bit_pos = 0;
int16_t offset0 = 0;
int16_t offset1 = 0;
uint8_t stage = pp->state->stage;
uint16_t stage_size = pp->param->code_stage_size[stage];
uint16_t stage_half_size = pp->param->code_stage_size[stage - 1];
pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the left (up) of the tree.
simplified_node(pp, message);
if (pp->state->flag_finished == true) { // (just in case). However for 5G frozen sets, the code can never end here.
return;
}
bit_pos = pp->state->active_node_per_stage[0];
offset0 = bit_pos - stage_half_size;
estbits0 = pp->est_bit + offset0;
pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the right (down) of the tree.
simplified_node(pp, message);
if (pp->state->flag_finished == true) {
return;
}
bit_pos = pp->state->active_node_per_stage[0];
offset0 = bit_pos - stage_size;
offset1 = offset0 + stage_half_size;
estbits0 = pp->est_bit + offset0;
estbits1 = pp->est_bit + offset1;
pp->xor (estbits0, estbits1, estbits0, stage_half_size);
// update this node index
pp->state->active_node_per_stage[stage] = pp->state->active_node_per_stage[stage] + 1; // return to the father node
}

@ -0,0 +1,77 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_c.h
* \brief Declaration of the SSC polar decoder inner functions working with
* 8-bit integer-valued LLRs.
* \author Jesus Gomez (CTTC) \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_DECODER_SSC_C_H
#define POLAR_DECODER_SSC_C_H
#include "polar_decoder_ssc_all.h"
/*!
* Creates an SSC polar decoder structure of type pSSC, and allocates memory for the decoding buffers.
*
* This function is exactly the same as the one for the floating-point version.
* Note, however, that it works with a different pSSC structure (different function pointers
* pSSC::f, pSSC::f, pSSC::g, pSSC::xor and pSSC::hard_bit).
*
* \param[in] frozen_set The position of the frozen bits in the codeword.
* \param[in] frozen_set_size Number of frozen bits.
* \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
* \return A pointer to a pSSC structure if the function executes correctly, NULL otherwise.
*/
void* create_polar_decoder_ssc_c(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
/*!
* The (8-bit) polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
*
* \param[in, out] p A pointer to the dismantled decoder.
*/
void delete_polar_decoder_ssc_c(void* p);
/*!
* Initializes an (8-bit) SSC polar decoder before processing a new codeword.
*
* \param[in, out] p A void pointer used to declare a pSSC structure.
* \param[in] llr LLRs for the new codeword.
* \param[out] data_decoded Pointer to the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_polar_decoder_ssc_c(void* p, const int8_t* llr, uint8_t* data_decoded);
/*!
* Decodes a data message from a 8 bit resolution codeword with the specified decoder. Note that
* a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_c().
*
* \param[in] p A pointer to the desired decoder.
* \param[out] data The decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int polar_decoder_ssc_c(void* p, uint8_t* data);
#endif // POLAR_DECODER_SSC_C_H

@ -0,0 +1,360 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_c_avx2.c
* \brief Definition of the SSC polar decoder inner functions working with
* 8-bit integer-valued LLRs and AVX2 instructions.
*
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include "polar_decoder_ssc_c_avx2.h"
#include "../utils_avx2.h"
#include "polar_decoder_vector_avx2.h"
#include "srslte/phy/fec/polar/polar_encoder.h"
#ifdef LV_HAVE_AVX2
/*!
* \brief Describes the state of a AVX2 SSC polar decoder
*/
struct StateAVX2 {
uint8_t stage; /*!< \brief Current stage [0 - code_size_log] of the decoding algorithm. */
uint16_t bit_pos; /*!< \brief position of the next bit to be estimated in est_bit buffer. */
};
/*!
* \brief Describes an SSC polar decoder (8-bit version).
*/
struct pSSC_c_avx2 {
int8_t** llr0; /*!< \brief Pointers to the upper half of LLRs values at all stages. */
int8_t** llr1; /*!< \brief Pointers to the lower half of LLRs values at all stages. */
uint8_t* est_bit; /*!< \brief Pointers to the temporary estimated bits. */
struct Params* param; /*!< \brief Pointer to a Params structure. */
struct StateAVX2* state; /*!< \brief Pointer to a State. */
srslte_polar_encoder_t* enc; /*!< \brief Pointer to a srslte_polar_encoder_t. */
void (*f)(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len); /*!< \brief Pointer to the function-f. */
void (*g)(const uint8_t* b,
const int8_t* x,
const int8_t* y,
int8_t* z,
const uint16_t len); /*!< \brief Pointer to the function-g. */
void (*xor)(const uint8_t* x,
const uint8_t* y,
uint8_t* z,
const uint16_t len); /*!< \brief Pointer to the function-g. */
void (*hard_bit)(const int8_t* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
};
/*!
* max function
*/
static int max(int a, int b)
{
return a > b ? a : b;
}
/*!
* Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
* Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
* return the associated \f$2^s\f$ estimated bits.
*
* All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
* pointer to point to the next active node. It is assumed that message bits are initialized to 0.
*
* ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
* making a hard decision on them.
* ::RATE_1 nodes also update message bits vector.
*
* ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bits by calling
* the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
* At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
* with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
* This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
* updates \a estbits memory space for stage \f$(s + 1)\f$.
*
*/
static void simplified_node(struct pSSC_c_avx2* p);
void delete_polar_decoder_ssc_c_avx2(void* p)
{
struct pSSC_c_avx2* pp = p;
if (p != NULL) {
free(pp->llr0[0]); // remove LLR buffer.
free(pp->llr0);
free(pp->llr1);
free(pp->param->node_type[0]);
free(pp->param->node_type);
free(pp->est_bit); // remove estbits buffer.
free(pp->param->code_stage_size);
free(pp->param);
free(pp->state);
srslte_polar_encoder_free(pp->enc);
free(pp->enc);
free(pp);
}
}
void* create_polar_decoder_ssc_c_avx2(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
{
struct pSSC_c_avx2* pp = NULL; // pointer to the polar decoder instance
// allocate memory to the polar decoder instance
if ((pp = malloc(sizeof(struct pSSC_c_avx2))) == NULL) {
return NULL;
}
// set functions
pp->f = srslte_vec_function_f_ccc_avx2;
pp->g = srslte_vec_function_g_bccc_avx2;
pp->xor = srslte_vec_xor_bbb_avx2;
pp->hard_bit = srslte_vec_hard_bit_cc_avx2;
// encoder of maximum size
if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
free(pp);
return NULL;
}
srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_AVX2, code_size_log);
// algorithm constants/parameters
if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
free(pp->enc);
free(pp);
return NULL;
}
if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
pp->param->code_stage_size[0] = 1;
for (uint8_t i = 1; i < code_size_log + 1; i++) {
pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
}
pp->param->code_size_log = code_size_log;
// state -- initialized in polar_decoder_ssc_init
if ((pp->state = malloc(sizeof(struct StateAVX2))) == NULL) {
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// allocates memory for estimated bits per stage
// allocates extra SRSLTE_AVX2_B_SIZE bytes to allow store the output of 256-bit instructions
int est_bit_size = pp->param->code_stage_size[code_size_log] + SRSLTE_AVX2_B_SIZE;
pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bit_size); // every 32 chars are aligned
// allocate memory for LLR pointers.
pp->llr0 = malloc((code_size_log + 1) * sizeof(int8_t*));
pp->llr1 = malloc((code_size_log + 1) * sizeof(int8_t*));
// LLR MEMORY NOT ALIGNED FOR LLR_BUFFERS_SIZE < SRSLTE_SIMB_LLR_ALIGNED
// We do not align the memory at lower stages, as if done, after each function f and function g
// operation, the second half of the output vector needs to be moved to the next
// aligned position. This extra operation may incur more overhead that the gain of aligned memory.
uint8_t n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
uint16_t llr_all_stages = 1U << n_llr_all_stages;
// Reserve at least SRSLTE_AVX2_B_SIZE bytes for each stage, so that there is space for the output
// of the 32-bytes mm256 vectorized functions.
// llr1 (second half) of lower stages is not aligned.
uint16_t llr_all_stages_avx2 = llr_all_stages;
if (code_size_log >= 5) {
llr_all_stages_avx2 += SRSLTE_AVX2_B_SIZE * 5;
} else {
llr_all_stages_avx2 += (code_size_log + 1) * SRSLTE_AVX2_B_SIZE;
}
// add extra SRSLTE_AVX2_B_SIZE llrs positions for hard_bit functions on the last bits have
// access to allocated memory
llr_all_stages_avx2 += SRSLTE_AVX2_B_SIZE;
pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages_avx2 * sizeof(int8_t)); // 32*8=256
// allocate memory to the polar decoder instance
if (pp->llr0[0] == NULL) {
free(pp->est_bit);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
pp->llr1[0] = pp->llr0[0] + 1;
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->llr0[s] = pp->llr0[s - 1] + max(SRSLTE_AVX2_B_SIZE, pp->param->code_stage_size[s - 1]);
pp->llr1[s] = pp->llr0[s] + pp->param->code_stage_size[s - 1];
}
// allocate memory for node type pointers, one per stage.
pp->param->frozen_set_size = frozen_set_size;
pp->param->node_type = malloc((code_size_log + 1) * sizeof(uint8_t*));
// allocate memory to node_type_ssc. Stage s has 2^(N-s) nodes s=0,...,N.
// Thus, same size as LLRs all stages.
pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
if (pp->param->node_type[0] == NULL) {
free(pp->param->node_type);
free(pp->est_bit);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
}
init_node_type(frozen_set, pp->param);
return pp;
}
int init_polar_decoder_ssc_c_avx2(void* p, const int8_t* input_llr, uint8_t* data_decoded)
{
struct pSSC_c_avx2* pp = p;
if (p == NULL) {
return -1;
}
uint8_t code_size_log = pp->param->code_size_log;
int16_t code_size = pp->param->code_stage_size[code_size_log];
int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
// Initializes the data_decoded_vector to all zeros
memset(data_decoded, 0, code_size);
// Initialize est_bit vector to all zeros
int est_bit_size = pp->param->code_stage_size[code_size_log] + SRSLTE_AVX2_B_SIZE;
memset(pp->est_bit, 0, est_bit_size);
// Initializes LLR buffer for the last stage/level with the input LLRs values
memcpy(&pp->llr0[code_size_log][0], &input_llr[0], code_half_size * sizeof(int8_t));
memcpy(&pp->llr1[code_size_log][0], &input_llr[code_half_size], code_half_size * sizeof(int8_t));
// Initializes the state of the decoding tree
pp->state->stage = code_size_log + 1; // start from the only one node at the last stage + 1.
pp->state->bit_pos = 0;
return 0;
}
int polar_decoder_ssc_c_avx2(void* p, uint8_t* data_decoded)
{
if (p == NULL) {
return -1;
}
struct pSSC_c_avx2* pp = p;
simplified_node(pp);
// est_bit contains the coded bits. To obtain the message, we call the encoder
srslte_polar_encoder_encode(pp->enc, pp->est_bit, data_decoded, pp->param->code_size_log);
// transform {0,-128} into {0, 1}
srslte_vec_sign_to_bit_c_avx2(data_decoded, 1U << pp->param->code_size_log);
return 0;
}
static void simplified_node(struct pSSC_c_avx2* p)
{
struct pSSC_c_avx2* pp = p;
pp->state->stage--; // to child node.
uint8_t stage = pp->state->stage;
uint16_t bit_pos = pp->state->bit_pos >> stage;
uint8_t* estbits0 = NULL;
uint8_t* estbits1 = NULL;
uint16_t stage_size = pp->param->code_stage_size[stage];
uint16_t stage_half_size = pp->param->code_stage_size[stage - 1];
switch (pp->param->node_type[stage][bit_pos]) {
case RATE_1:
pp->hard_bit(pp->llr0[stage], pp->est_bit + pp->state->bit_pos, stage_size);
pp->state->bit_pos = pp->state->bit_pos + stage_size;
break;
case RATE_0:
pp->state->bit_pos = pp->state->bit_pos + stage_size;
break;
case RATE_R:
pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the left (up) of the tree.
simplified_node(pp);
estbits0 = pp->est_bit + pp->state->bit_pos - stage_half_size;
pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the right (down) of the tree.
simplified_node(pp);
estbits0 = pp->est_bit + pp->state->bit_pos - stage_size;
estbits1 = pp->est_bit + pp->state->bit_pos - stage_size + stage_half_size;
pp->xor (estbits0, estbits1, estbits0, stage_half_size);
break;
default:
printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
exit(-1);
break;
}
pp->state->stage++; // to parent node.
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,74 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_c_avx2.h
* \brief Declaration of the SSC polar decoder inner functions working with
* 8-bit integer-valued LLRs and AVX2 instructions
* \author Jesus Gomez (CTTC) \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_DECODER_SSC_C_AVX2_H
#define POLAR_DECODER_SSC_C_AVX2_H
#include "polar_decoder_ssc_all.h"
/*!
* Creates an SSC polar decoder structure of type pSSC_c_avx2, and allocates memory for the decoding buffers.
*
* \param[in] frozen_set The position of the frozen bits in the codeword.
* \param[in] frozen_set_size Number of frozen bits.
* \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
* \return A pointer to a pSSC_c_avx2 structure if the function executes correctly, NULL otherwise.
*/
void* create_polar_decoder_ssc_c_avx2(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
/*!
* The (8-bit, avx2) polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
*
* \param[in, out] p A pointer to the dismantled decoder.
*/
void delete_polar_decoder_ssc_c_avx2(void* p);
/*!
* Initializes an (8-bit, avx2) SSC polar decoder before processing a new codeword.
*
* \param[in, out] p A void pointer used to declare a pSSC_c_avx2 structure.
* \param[in] llr LLRs for the new codeword.
* \param[out] data_decoded Pointer to the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_polar_decoder_ssc_c_avx2(void* p, const int8_t* llr, uint8_t* data_decoded);
/*!
* Decodes a data message from a 8 bit resolution codeword with the specified decoder. Note that
* a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_c_avx2().
*
* \param[in] p A pointer to the desired decoder.
* \param[out] data The decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int polar_decoder_ssc_c_avx2(void* p, uint8_t* data);
#endif // POLAR_DECODER_SSC_C_AVX2_H

@ -0,0 +1,416 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_f.c
* \brief Definition of the SSC polar decoder inner functions working with
* float-valued LLRs.
*
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include "polar_decoder_ssc_f.h"
#include "../utils_avx2.h"
#include "polar_decoder_vector.h"
#include "srslte/phy/fec/polar/polar_encoder.h"
#include "srslte/phy/utils/vector.h"
/*!
* \brief Describes an SSC polar decoder (float version).
*/
struct pSSC_f {
float** llr0; /*!< \brief Pointers to the upper half of LLRs values at all stages. */
float** llr1; /*!< \brief Pointers to the lower half of LLRs values at all stages. */
uint8_t* est_bit; /*!< \brief Pointers to the temporary estimated bits. */
struct Params* param; /*!< \brief Pointer to a Params structure. */
struct State* state; /*!< \brief Pointer to a State. */
srslte_polar_encoder_t* enc; /*!< \brief Pointer to a srslte_polar_encoder_t. */
void (*f)(const float* x, const float* y, float* z, const uint16_t len); /*!< \brief Pointer to the function-f. */
void (*g)(const uint8_t* b,
const float* x,
const float* y,
float* z,
const uint16_t len); /*!< \brief Pointer to the function-g. */
void (*xor)(const uint8_t* x,
const uint8_t* y,
uint8_t* z,
const uint32_t len); /*!< \brief Pointer to the function-g. */
void (*hard_bit)(const float* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
};
/*!
* Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
* Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
* return the associated \f$2^s\f$ estimated bits.
*
*/
static void simplified_node(void* p, uint8_t* message);
/*!
* All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
* pointer to point to the next active node. It is assumed that message bits are initialized to 0.
*
*/
static void rate_0_node(void* p);
/*!
* ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
* making a hard decision on them.
* ::RATE_1 nodes also update message bits vector.
*
*/
static void rate_1_node(void* p, uint8_t* message);
/*!
* ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bit by calling
* the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
* At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
* with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
* This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
* updates \a estbits memory space for stage \f$(s + 1)\f$.
*
*/
static void rate_r_node(void* p, uint8_t* message);
int init_polar_decoder_ssc_f(void* p, const float* input_llr, uint8_t* data_decoded)
{
struct pSSC_f* pp = p;
if (p == NULL) {
return -1;
}
uint8_t code_size_log = pp->param->code_size_log;
int16_t code_size = pp->param->code_stage_size[code_size_log];
int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
// Initializes the data_decoded_vector to all zeros
memset(data_decoded, 0, code_size);
// Initialize est_bit vector to all zeros
memset(pp->est_bit, 0, code_size);
// Initializes LLR buffer for the last stage/level with the input LLRs values
for (uint16_t i = 0; i < code_half_size; i++) {
pp->llr0[code_size_log][i] = input_llr[i];
pp->llr1[code_size_log][i] = input_llr[i + code_half_size];
}
// Initializes the state of the decoding tree
pp->state->stage = code_size_log + 1; // start from the only one node at the last stage + 1.
for (uint16_t i = 0; i < code_size_log + 1; i++) {
pp->state->active_node_per_stage[i] = 0;
}
pp->state->flag_finished = false;
return 0;
}
int polar_decoder_ssc_f(void* p, uint8_t* data_decoded)
{
if (p == NULL) {
return -1;
}
simplified_node(p, data_decoded);
return 0;
}
void delete_polar_decoder_ssc_f(void* p)
{
struct pSSC_f* pp = p;
if (p != NULL) {
free(pp->llr0[0]); // remove LLR buffer.
free(pp->llr0);
free(pp->llr1);
free(pp->param->node_type[0]);
free(pp->param->node_type);
free(pp->est_bit); // remove estbits buffer.
free(pp->param->code_stage_size);
free(pp->param);
free(pp->state->active_node_per_stage);
free(pp->state);
srslte_polar_encoder_free(pp->enc);
free(pp->enc);
free(pp);
}
}
void* create_polar_decoder_ssc_f(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
{
struct pSSC_f* pp = NULL; // pointer to the polar decoder instance
// allocate memory to the polar decoder instance
if ((pp = malloc(sizeof(struct pSSC_f))) == NULL) {
return NULL;
}
// set functions
pp->f = srslte_vec_function_f_fff;
pp->g = srslte_vec_function_g_bfff;
pp->xor = srslte_vec_xor_bbb;
pp->hard_bit = srslte_vec_hard_bit_fc;
// encoder of maximum size
if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
free(pp);
return NULL;
}
srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
// algorithm constants/parameters
if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
free(pp->enc);
free(pp);
return NULL;
}
if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
pp->param->code_stage_size[0] = 1;
for (uint8_t i = 1; i < code_size_log + 1; i++) {
pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
}
pp->param->code_size_log = code_size_log;
// state -- initialized in polar_decoder_ssc_init
if ((pp->state = malloc(sizeof(struct State))) == NULL) {
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
if ((pp->state->active_node_per_stage = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// allocates memory for estimated bits per stage
uint16_t est_bits_size = pp->param->code_stage_size[code_size_log];
pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bits_size); // every 32 chars are aligned
// allocate memory for LLR pointers.
pp->llr0 = malloc((code_size_log + 1) * sizeof(float*));
pp->llr1 = malloc((code_size_log + 1) * sizeof(float*));
// There are LLR buffers for n = 0 to n = code_size_log. Each with size 2^n. Thus,
// the total memory needed is 2^(n+1)-1.
// Only the stages starting at multiples of SRSLTE_AVX2_B_SIZE are aligned.
// Let n_simd_llr be the exponent of the SIMD size in nummer of LLRs.
// i.e. in a SIMD instruction we can load 2^(n_simd_llr) LLR values
// then the memory for stages s >= n_simd_llr - 1 is aligned.
// but only the operations at stages s > n_simd_llr have all the inputs aligned.
uint8_t n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
uint16_t llr_all_stages = 1U << n_llr_all_stages;
pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(float)); // 32*8=256
// allocate memory to the polar decoder instance
if (pp->llr0[0] == NULL) {
free(pp->llr1);
free(pp->llr0);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// initialize all LLR pointers
pp->llr1[0] = pp->llr0[0] + 1;
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->llr0[s] = pp->llr0[0] + pp->param->code_stage_size[s];
pp->llr1[s] = pp->llr0[0] + pp->param->code_stage_size[s] + pp->param->code_stage_size[s - 1];
}
// allocate memory for node type pointers, one per stage.
pp->param->frozen_set_size = frozen_set_size;
pp->param->node_type = malloc((code_size_log + 1) * sizeof(uint8_t*));
// allocate memory to node_type_ssc. Stage s has 2^(N-s) nodes s=0,...,N.
// Thus, same size as LLRs all stages.
pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
if (pp->param->node_type[0] == NULL) {
free(pp->llr0[0]);
free(pp->llr1);
free(pp->llr0);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
}
init_node_type(frozen_set, pp->param);
return pp;
}
static void simplified_node(void* p, uint8_t* message)
{
struct pSSC_f* pp = p;
pp->state->stage--; // to child node.
uint8_t stage = pp->state->stage;
uint16_t bit_pos = pp->state->active_node_per_stage[stage];
switch (pp->param->node_type[stage][bit_pos]) {
case RATE_1:
rate_1_node(pp, message);
break;
case RATE_0:
rate_0_node(pp);
break;
case RATE_R:
rate_r_node(pp, message);
break;
default:
printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
exit(-1);
break;
}
pp->state->stage++; // to parent node.
}
static void rate_0_node(void* p)
{
struct pSSC_f* pp = p;
uint8_t code_size_log = pp->param->code_size_log; // code_size_log.
int16_t code_size = pp->param->code_stage_size[code_size_log];
uint16_t bit_pos = pp->state->active_node_per_stage[0];
uint8_t stage = pp->state->stage;
if (bit_pos == code_size - 1) {
pp->state->flag_finished = true;
} else {
// update active node at all the stages
for (uint8_t i = 0; i <= stage; i++) {
pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
}
}
}
static void rate_1_node(void* p, uint8_t* message)
{
struct pSSC_f* pp = p;
uint8_t stage = pp->state->stage; // for SSC decoder rate 1 nodes are always at stage 0.
uint16_t bit_pos = pp->state->active_node_per_stage[0];
uint16_t code_size = pp->param->code_stage_size[pp->param->code_size_log];
uint16_t code_stage_size = pp->param->code_stage_size[stage];
uint8_t* codeword = pp->est_bit + bit_pos;
float* LLR = pp->llr0[stage];
pp->hard_bit(LLR, codeword, code_stage_size);
if (stage != 0) {
srslte_polar_encoder_encode(pp->enc, codeword, message + bit_pos, stage);
} else {
message[bit_pos] = codeword[0];
}
// update active node at all the stages
for (uint8_t i = 0; i <= stage; i++) {
pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
}
// check if this is the last bit
if (pp->state->active_node_per_stage[0] == code_size) {
pp->state->flag_finished = true;
}
}
static void rate_r_node(void* p, uint8_t* message)
{
struct pSSC_f* pp = p;
uint8_t* estbits0 = NULL;
uint8_t* estbits1 = NULL;
uint16_t bit_pos = 0;
int16_t offset0 = 0;
int16_t offset1 = 0;
uint8_t stage = pp->state->stage;
uint16_t stage_size = pp->param->code_stage_size[stage];
uint16_t stage_half_size = pp->param->code_stage_size[stage - 1];
pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the left (up) of the tree.
simplified_node(pp, message);
if (pp->state->flag_finished == true) { // (just in case). However for 5G frozen sets, the code can never end here.
return;
}
bit_pos = pp->state->active_node_per_stage[0];
offset0 = bit_pos - stage_half_size;
estbits0 = pp->est_bit + offset0;
pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the right (down) of the tree.
simplified_node(pp, message);
if (pp->state->flag_finished == true) {
return;
}
// compute_xor(pp);
bit_pos = pp->state->active_node_per_stage[0];
offset0 = bit_pos - stage_size;
offset1 = offset0 + stage_half_size;
estbits0 = pp->est_bit + offset0;
estbits1 = pp->est_bit + offset1;
pp->xor (estbits0, estbits1, estbits0, stage_half_size);
// update this node index
pp->state->active_node_per_stage[stage] = pp->state->active_node_per_stage[stage] + 1; // return to the father node
}

@ -0,0 +1,71 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_f.h
* \brief Declaration of the SSC polar decoder inner functions working with
* float-valued LLRs.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_DECODER_SSC_F_H
#define POLAR_DECODER_SSC_F_H
#include "polar_decoder_ssc_all.h"
/*!
* Creates an SSC polar decoder structure of type pSSC, and allocates memory for the decoding buffers.
* \param[in] frozen_set The position of the frozen bits in the codeword.
* \param[in] frozen_set_size Number of frozen bits.
* \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
* \return A pointer to a pSSC structure if the function executes correctly, NULL otherwise.
*/
void* create_polar_decoder_ssc_f(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
/*!
* The polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
* \param[in, out] p A pointer to the dismantled decoder.
*/
void delete_polar_decoder_ssc_f(void* p);
/*!
* Initializes an SSC polar decoder before processing a new codeword.
* \param[in, out] p A void pointer used to declare a pSSC structure.
* \param[in] llr LLRs for the new codeword.
* \param[out] data_decoded Pointer to the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_polar_decoder_ssc_f(void* p, const float* llr, uint8_t* data_decoded);
/*!
* Decodes a data message from a codeword with the specified decoder. Note that
* a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_f().
* \param[in] p A pointer to the desired decoder.
* \param[out] data The decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int polar_decoder_ssc_f(void* p, uint8_t* data);
#endif // POLAR_DECODER_SSC_F_H

@ -0,0 +1,430 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_s.c
* \brief Definition of the SSC polar decoder inner functions working with
* 16-bit integer-valued LLRs.
*
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
// IMPORTANT: polar_decoder_SSC_s.c is exactly the polar_decoder_SSC_f.c except for:
// (1) #include "polar_decoder_ssc_s.h"
// (2) the naming of the external function, which finish with _s instead of _f
// (3) the initialization of them of the set functions in create_polar_decoder_ssc_s
// pp->f = srslte_vec_function_f_sss;
// pp->g = srslte_vec_function_g_bsss;
// pp->xor = srslte_vec_xor_bbb;
// pp->hard_bit = srslte_vec_hard_bit_sc;
#include "polar_decoder_ssc_s.h"
#include "../utils_avx2.h"
#include "polar_decoder_vector.h"
#include "srslte/phy/fec/polar/polar_encoder.h"
#include "srslte/phy/utils/vector.h"
/*!
* \brief Type indicator for printing LLRs if debugging
*/
#define PRIllr "d" // for printing llrs if debugging
/*!
* \brief Describes an SSC polar decoder (16-bit version).
*/
struct pSSC_s {
int16_t** llr0; /*!< \brief Pointers to the upper half of LLRs values at all stages. */
int16_t** llr1; /*!< \brief Pointers to the lower half of LLRs values at all stages. */
uint8_t* est_bit; /*!< \brief Pointers to the temporary estimated bits. */
struct Params* param; /*!< \brief Pointer to a Params structure. */
struct State* state; /*!< \brief Pointer to a State. */
srslte_polar_encoder_t* enc; /*!< \brief Pointer to a srslte_polar_encoder_t. */
void (*f)(const int16_t* x,
const int16_t* y,
int16_t* z,
const uint16_t len); /*!< \brief Pointer to the function-f. */
void (*g)(const uint8_t* b,
const int16_t* x,
const int16_t* y,
int16_t* z,
const uint16_t len); /*!< \brief Pointer to the function-g. */
void (*xor)(const uint8_t* x,
const uint8_t* y,
uint8_t* z,
const uint32_t len); /*!< \brief Pointer to the function-g. */
void (*hard_bit)(const int16_t* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
};
/*!
* Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
* Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
* return the associated \f$2^s\f$ estimated bits.
*
*/
static void simplified_node(void* p, uint8_t* message);
/*!
* All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
* pointer to point to the next active node. It is assumed that message bits are initialized to 0.
*
*/
static void rate_0_node(void* p);
/*!
* ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
* making a hard decision on them.
* ::RATE_1 nodes also update message bits vector.
*
*/
static void rate_1_node(void* p, uint8_t* message);
/*!
* ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bit by calling
* the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
* At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
* with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
* This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
* updates \a estbits memory space for stage \f$(s + 1)\f$.
*
*/
static void rate_r_node(void* p, uint8_t* message);
int init_polar_decoder_ssc_s(void* p, const int16_t* input_llr, uint8_t* data_decoded)
{
struct pSSC_s* pp = p;
if (p == NULL) {
return -1;
}
uint8_t code_size_log = pp->param->code_size_log;
int16_t code_size = pp->param->code_stage_size[code_size_log];
int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
// Initializes the data_decoded_vector to all zeros
memset(data_decoded, 0, code_size);
// Initialize est_bit vector to all zeros
memset(pp->est_bit, 0, code_size);
// Initializes LLR buffer for the last stage/level with the input LLRs values
for (uint16_t i = 0; i < code_half_size; i++) {
pp->llr0[code_size_log][i] = input_llr[i];
pp->llr1[code_size_log][i] = input_llr[i + code_half_size];
}
// Initializes the state of the decoding tree
pp->state->stage = code_size_log + 1; // start from the only one node at the last stage + 1.
for (uint16_t i = 0; i < code_size_log + 1; i++) {
pp->state->active_node_per_stage[i] = 0;
}
pp->state->flag_finished = false;
return 0;
}
int polar_decoder_ssc_s(void* p, uint8_t* data_decoded)
{
if (p == NULL) {
return -1;
}
simplified_node(p, data_decoded);
return 0;
}
void delete_polar_decoder_ssc_s(void* p)
{
struct pSSC_s* pp = p;
if (p != NULL) {
free(pp->llr0[0]); // remove LLR buffer.
free(pp->llr0);
free(pp->llr1);
free(pp->param->node_type[0]);
free(pp->param->node_type);
free(pp->est_bit); // remove estbits buffer.
free(pp->param->code_stage_size);
free(pp->param);
free(pp->state->active_node_per_stage);
free(pp->state);
srslte_polar_encoder_free(pp->enc);
free(pp->enc);
free(pp);
}
}
void* create_polar_decoder_ssc_s(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
{
struct pSSC_s* pp = NULL; // pointer to the polar decoder instance
// allocate memory to the polar decoder instance
if ((pp = malloc(sizeof(struct pSSC_s))) == NULL) {
return NULL;
}
// set functions
pp->f = srslte_vec_function_f_sss;
pp->g = srslte_vec_function_g_bsss;
pp->xor = srslte_vec_xor_bbb;
pp->hard_bit = srslte_vec_hard_bit_sc;
// encoder of maximum size
if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
free(pp);
return NULL;
}
srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
// algorithm constants/parameters
if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
free(pp->enc);
free(pp);
return NULL;
}
if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
pp->param->code_stage_size[0] = 1;
for (uint8_t i = 1; i < code_size_log + 1; i++) {
pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
}
pp->param->code_size_log = code_size_log;
// state -- initialized in polar_decoder_ssc_init
if ((pp->state = malloc(sizeof(struct State))) == NULL) {
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
if ((pp->state->active_node_per_stage = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// allocates memory for estimated bits per stage
uint16_t est_bits_size = pp->param->code_stage_size[code_size_log];
pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bits_size); // every 32 chars are aligned
// allocate memory for LLR pointers.
pp->llr0 = malloc((code_size_log + 1) * sizeof(int16_t*));
pp->llr1 = malloc((code_size_log + 1) * sizeof(int16_t*));
// There are LLR buffers for n = 0 to n = code_size_log. Each with size 2^n. Thus,
// the total memory needed is 2^(n+1)-1.
// Only the stages starting at multiples of SRSLTE_AVX2_B_SIZE are aligned.
// Let n_simd_llr be the exponent of the SIMD size in nummer of LLRs.
// i.e. in a SIMD instruction we can load 2^(n_simd_llr) LLR values
// then the memory for stages s >= n_simd_llr - 1 is aligned.
// but only the operations at stages s > n_simd_llr have all the inputs aligned.
uint8_t n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
uint16_t llr_all_stages = 1U << n_llr_all_stages;
pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(int16_t)); // 32*8=256
// allocate memory to the polar decoder instance
if (pp->llr0[0] == NULL) {
free(pp->est_bit);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// initialize all LLR pointers
pp->llr1[0] = pp->llr0[0] + 1;
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->llr0[s] = pp->llr0[0] + pp->param->code_stage_size[s];
pp->llr1[s] = pp->llr0[0] + pp->param->code_stage_size[s] + pp->param->code_stage_size[s - 1];
}
// allocate memory for node type pointers, one per stage.
pp->param->frozen_set_size = frozen_set_size;
pp->param->node_type = malloc((code_size_log + 1) * sizeof(uint8_t*));
// allocate memory to node_type_ssc. Stage s has 2^(N-s) nodes s=0,...,N.
// Thus, same size as LLRs all stages.
pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
if (pp->param->node_type[0] == NULL) {
free(pp->param->node_type);
free(pp->est_bit);
free(pp->state);
free(pp->param->code_stage_size);
free(pp->param);
free(pp->enc);
free(pp);
return NULL;
}
// initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
for (uint8_t s = 1; s < code_size_log + 1; s++) {
pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
}
init_node_type(frozen_set, pp->param);
return pp;
}
static void simplified_node(void* p, uint8_t* message)
{
struct pSSC_s* pp = p;
pp->state->stage--; // to child node.
uint8_t stage = pp->state->stage;
uint16_t bit_pos = pp->state->active_node_per_stage[stage];
switch (pp->param->node_type[stage][bit_pos]) {
case RATE_1:
rate_1_node(pp, message);
break;
case RATE_0:
rate_0_node(pp);
break;
case RATE_R:
rate_r_node(pp, message);
break;
default:
printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
exit(-1);
break;
}
pp->state->stage++; // to parent node.
}
static void rate_0_node(void* p)
{
struct pSSC_s* pp = p;
uint8_t code_size_log = pp->param->code_size_log; // code_size_log.
int16_t code_size = pp->param->code_stage_size[code_size_log];
uint16_t bit_pos = pp->state->active_node_per_stage[0];
uint8_t stage = pp->state->stage;
if (bit_pos == code_size - 1) {
pp->state->flag_finished = true;
} else {
// update active node at all the stages
for (uint8_t i = 0; i <= stage; i++) {
pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
}
}
}
static void rate_1_node(void* p, uint8_t* message)
{
struct pSSC_s* pp = p;
uint8_t stage = pp->state->stage; // for SSC decoder rate 1 nodes are always at stage 0.
uint16_t bit_pos = pp->state->active_node_per_stage[0];
uint16_t code_size = pp->param->code_stage_size[pp->param->code_size_log];
uint16_t code_stage_size = pp->param->code_stage_size[stage];
uint8_t* codeword = pp->est_bit + bit_pos;
int16_t* LLR = pp->llr0[stage];
pp->hard_bit(LLR, codeword, code_stage_size);
if (stage != 0) {
srslte_polar_encoder_encode(pp->enc, codeword, message + bit_pos, stage);
} else {
message[bit_pos] = codeword[0];
}
// update active node at all the stages
for (uint8_t i = 0; i <= stage; i++) {
pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
}
// check if this is the last bit
if (pp->state->active_node_per_stage[0] == code_size) {
pp->state->flag_finished = true;
}
}
static void rate_r_node(void* p, uint8_t* message)
{
struct pSSC_s* pp = p;
uint8_t* estbits0 = NULL;
uint8_t* estbits1 = NULL;
uint16_t bit_pos = 0;
int16_t offset0 = 0;
int16_t offset1 = 0;
uint8_t stage = pp->state->stage;
uint16_t stage_size = pp->param->code_stage_size[stage];
uint16_t stage_half_size = pp->param->code_stage_size[stage - 1];
pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the left (up) of the tree.
simplified_node(pp, message);
if (pp->state->flag_finished == true) { // (just in case). However for 5G frozen sets, the code can never end here.
return;
}
bit_pos = pp->state->active_node_per_stage[0];
offset0 = bit_pos - stage_half_size;
estbits0 = pp->est_bit + offset0;
pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
// move to the child node to the right (down) of the tree.
simplified_node(pp, message);
if (pp->state->flag_finished == true) {
return;
}
bit_pos = pp->state->active_node_per_stage[0];
offset0 = bit_pos - stage_size;
offset1 = offset0 + stage_half_size;
estbits0 = pp->est_bit + offset0;
estbits1 = pp->est_bit + offset1;
pp->xor (estbits0, estbits1, estbits0, stage_half_size);
// update this node index
pp->state->active_node_per_stage[stage] = pp->state->active_node_per_stage[stage] + 1; // return to the father node
}

@ -0,0 +1,78 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_ssc_s.h
* \brief Definition of the SSC polar decoder inner functions working with
* 16-bit integer-valued LLRs.
* \author Jesus Gomez (CTTC) \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_DECODER_SSC_S_H
#define POLAR_DECODER_SSC_S_H
#include "polar_decoder_ssc_all.h"
/*!
* Creates an SSC polar decoder structure of type pSSC, and allocates memory for the decoding buffers.
*
* This function is exactly the same as the one for the floating-point version.
* Note, however, that it works with a different pSSC structure (different function pointers
* pSSC::f, pSSC::f, pSSC::g, pSSC::xor and pSSC::hard_bit).
*
* \param[in] frozen_set The position of the frozen bits in the codeword.
* \param[in] frozen_set_size Number of frozen bits.
* \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
* \return A pointer to a pSSC structure if the function executes correctly, NULL otherwise.
*/
void* create_polar_decoder_ssc_s(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
/*!
* The 16-bit polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
*
* \param[in, out] p A pointer to the dismantled decoder.
*/
void delete_polar_decoder_ssc_s(void* p);
/*!
* Initializes a 16-bit SSC polar decoder before processing a new codeword.
*
* \param[in, out] p A void pointer used to declare a pSSC structure.
* \param[in] llr LLRs for the new codeword.
* \param[out] data_decoded Pointer to the decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int init_polar_decoder_ssc_s(void* p, const int16_t* llr, uint8_t* data_decoded);
/*!
* Decodes a data message from a 16-bit resolution codeword with the specified decoder. Note that
* a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_c().
*
* \param[in] p A pointer to the desired decoder.
* \param[out] data The decoded message.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int polar_decoder_ssc_s(void* p, uint8_t* data);
#endif // POLAR_DECODER_SSC_S_H

@ -0,0 +1,216 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_vector.c
* \brief Definition of the polar decoder vectorizable functions.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include "math.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h> //abs function
/*!
* Sign of a real number.
*/
static int sgn(float v)
{
return (v > 0) - (v < 0);
}
/*!
* Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
*/
#define hard_bit \
{ \
int s = 0; \
for (uint16_t i = 0; i < len; ++i) { \
s = sgn(x[i]); \
if (s == 0) { \
z[i] = 0; \
} else { \
z[i] = (uint8_t)(1 - s) / 2; \
} \
} \
}
void srslte_vec_function_f_fff(const float* x, const float* y, float* z, const uint16_t len)
{
float L0 = NAN;
float L1 = NAN;
float absL0 = NAN;
float absL1 = NAN;
float sgnL0L1 = NAN;
for (int i = 0; i < len; i++) {
L0 = x[i];
L1 = y[i];
absL0 = fabsf(L0);
absL1 = fabsf(L1);
sgnL0L1 = sgn(L0) * sgn(L1);
if (absL0 >= absL1) {
L0 = sgnL0L1 * absL1;
} else {
L0 = sgnL0L1 * absL0;
}
z[i] = L0;
}
}
void srslte_vec_function_f_sss(const int16_t* x, const int16_t* y, int16_t* z, const uint16_t len)
{
int16_t L0 = 0;
int16_t L1 = 0;
int16_t absL0 = 0;
int16_t absL1 = 0;
int16_t sgnL0L1 = 0;
for (int i = 0; i < len; i++) {
L0 = x[i];
L1 = y[i];
absL0 = abs(L0);
absL1 = abs(L1);
sgnL0L1 = sgn(L0) * sgn(L1);
if (absL0 >= absL1) {
L0 = sgnL0L1 * absL1;
} else {
L0 = sgnL0L1 * absL0;
}
z[i] = L0;
}
}
void srslte_vec_function_f_ccc(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
{
int8_t L0 = 0;
int8_t L1 = 0;
int8_t absL0 = 0;
int8_t absL1 = 0;
int8_t sgnL0L1 = 0;
for (int i = 0; i < len; i++) {
L0 = x[i];
L1 = y[i];
absL0 = abs(L0);
absL1 = abs(L1);
sgnL0L1 = sgn(L0) * sgn(L1);
if (absL0 >= absL1) {
L0 = sgnL0L1 * absL1;
} else {
L0 = sgnL0L1 * absL0;
}
z[i] = L0;
}
}
void srslte_vec_hard_bit_fc(const float* x, uint8_t* z, const uint16_t len)
{
hard_bit;
}
void srslte_vec_hard_bit_sc(const int16_t* x, uint8_t* z, const uint16_t len)
{
hard_bit;
}
void srslte_vec_hard_bit_cc(const int8_t* x, uint8_t* z, const uint16_t len)
{
hard_bit;
}
void srslte_vec_function_g_bfff(const uint8_t* b, const float* x, const float* y, float* z, const uint16_t len)
{
float L0 = NAN;
float L1 = NAN;
int8_t V = 0;
for (int i = 0; i < len; i++) {
L0 = x[i];
L1 = y[i];
V = -2 * b[i] + 1; // (warning!) changes size from uint8_t to int8_t
L0 = L1 + V * L0;
z[i] = L0;
}
}
void srslte_vec_function_g_bsss(const uint8_t* b, const int16_t* x, const int16_t* y, int16_t* z, const uint16_t len)
{
int16_t L0 = 0;
int16_t L1 = 0;
int8_t V = 0;
long tmp = 0;
for (int i = 0; i < len; i++) {
L0 = x[i];
L1 = y[i];
V = -2 * b[i] + 1; // (warning!) changes size from uint8_t to int8_t
tmp = (long)L1 + V * L0;
if (tmp > 32767) {
tmp = 32767;
}
if (tmp < -32767) {
tmp = -32767;
}
L0 = (int16_t)tmp;
z[i] = L0;
}
}
void srslte_vec_function_g_bccc(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
{
int8_t L0 = 0;
int8_t L1 = 0;
int8_t V = 0;
long tmp = 0;
for (int i = 0; i < len; i++) {
L0 = x[i];
L1 = y[i];
V = -2 * b[i] + 1; // (warning!) changes size from uint8_t to int8_t
tmp = (long)L1 + V * L0;
if (tmp > 127) {
tmp = 127;
}
if (tmp < -127) {
tmp = -127;
}
L0 = (int8_t)tmp;
z[i] = L0;
}
}

@ -0,0 +1,119 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_vector.h
* \brief Declaration of the polar decoder vectorizable functions.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_VECTOR_FUNCTIONS_H
#define POLAR_VECTOR_FUNCTIONS_H
#include "srslte/config.h"
#include <stdint.h>
/*!
* Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise (box-plus operator).
* \param[in] x A pointer to a vector of floats.
* \param[in] y A pointer to a vector of floats.
* \param[out] z A pointer to a vector of floats.
* \param[in] len Length of vectors x, y and z.
*/
SRSLTE_API void srslte_vec_function_f_fff(const float* x, const float* y, float* z, uint16_t len);
/*!
* Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise (box-plus operator).
* \param[in] x A pointer to a vector of int16_t.
* \param[in] y A pointer to a vector of int16_t.
* \param[out] z A pointer to a vector of int16_t.
* \param[in] len Length of vectors x, y and z.
*/
SRSLTE_API void srslte_vec_function_f_sss(const int16_t* x, const int16_t* y, int16_t* z, uint16_t len);
/*!
* Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise (box-plus operator).
* \param[in] x A pointer to a vector of int8_t.
* \param[in] y A pointer to a vector of int8_t.
* \param[out] z A pointer to a vector of int8_t.
* \param[in] len Length of vectors x, y and z.
*/
SRSLTE_API void srslte_vec_function_f_ccc(const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
/*!
* Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$.
* \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
* \param[in] x A pointer to a vector of floats.
* \param[in] y A pointer to a vector of floats.
* \param[out] z A pointer to a vector of floats.
* \param[in] len Length of vectors b, x, y and z.
*/
SRSLTE_API void srslte_vec_function_g_bfff(const uint8_t* b, const float* x, const float* y, float* z, uint16_t len);
/*!
* Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$.
* \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
* \param[in] x A pointer to a vector of int16_t.
* \param[in] y A pointer to a vector of int16_t.
* \param[out] z A pointer to a vector of int16_t.
* \param[in] len Length of vectors b, x, y and z.
*/
SRSLTE_API void
srslte_vec_function_g_bsss(const uint8_t* b, const int16_t* x, const int16_t* y, int16_t* z, uint16_t len);
/*!
* Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$.
* \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
* \param[in] x A pointer to a vector of int8_t.
* \param[in] y A pointer to a vector of int8_t.
* \param[out] z A pointer to a vector of int8_t.
* \param[in] len Length of vectors b, x, y and z.
*/
SRSLTE_API void srslte_vec_function_g_bccc(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
/*!
* Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
* \param[in] x A pointer to a vector of floats.
* \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] len Length of vectors x and z.
*/
SRSLTE_API void srslte_vec_hard_bit_fc(const float* x, uint8_t* z, uint16_t len);
/*!
* Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
* \param[in] x A pointer to a vector of int16_t.
* \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] len Length of vectors x and z.
*/
SRSLTE_API void srslte_vec_hard_bit_sc(const int16_t* x, uint8_t* z, uint16_t len);
/*!
* Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
* \param[in] x A pointer to a vector of int8_t.
* \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] len Length of vectors x and z.
*/
SRSLTE_API void srslte_vec_hard_bit_cc(const int8_t* x, uint8_t* z, uint16_t len);
#endif // POLAR_VECTOR_FUNCTIONS_H

@ -0,0 +1,136 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_vector_avx2.c
* \brief Definition of the polar decoder vectorizable functions using AVX2 instructions.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#include "../utils_avx2.h"
#include <math.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifdef LV_HAVE_AVX2
#include <immintrin.h>
/*!
* \brief Bit mask to extract the Most Significant Bit (MSB).
*/
#define MSB_MASK (-128) // 0b10000000
// General remarks
// We replace bits by {0, 128} (uint8_t) or {0, -128} (int8_t)
void srslte_vec_function_f_ccc_avx2(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
{
for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
__m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
__m256i m_y = _mm256_loadu_si256((__m256i*)&y[i]);
__m256i m_sign = _mm256_sign_epi8(m_x, m_y);
__m256i m_abs_x = _mm256_abs_epi8(m_x);
__m256i m_abs_y = _mm256_abs_epi8(m_y);
__m256i m_min_abs_x_abs_y = _mm256_min_epi8(m_abs_x, m_abs_y);
__m256i m_z = _mm256_sign_epi8(m_min_abs_x_abs_y, m_sign);
_mm256_storeu_si256((__m256i*)&z[i], m_z);
}
}
void srslte_vec_function_g_bccc_avx2(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
{
const __m256i M_1 = _mm256_set1_epi8(1);
const __m256i M_NEG127 = _mm256_set1_epi8(-127);
for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
__m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
__m256i m_y = _mm256_loadu_si256((__m256i*)&y[i]);
__m256i m_b = _mm256_loadu_si256((__m256i*)&b[i]);
__m256i m_b_or_1 =
_mm256_or_si256(m_b, M_1); // avoids m_b being 0, in which case m_sign_x = 0 (in the next instruction)
__m256i m_sign_x = _mm256_sign_epi8(m_x, m_b_or_1);
__m256i m_z = _mm256_adds_epi8(m_sign_x, m_y);
__m256i m_sz = _mm256_max_epi8(M_NEG127, m_z);
_mm256_storeu_si256((__m256i*)&z[i], m_sz);
}
}
void srslte_vec_xor_bbb_avx2(const uint8_t* x, const uint8_t* y, uint8_t* z, uint16_t len)
{
for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
__m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
__m256i m_y = _mm256_loadu_si256((__m256i*)&y[i]);
__m256i m_z = _mm256_xor_si256(m_x, m_y);
_mm256_storeu_si256((__m256i*)&z[i], m_z);
}
}
void srslte_vec_hard_bit_cc_avx2(const int8_t* x, uint8_t* z, const uint16_t len)
{
const __m256i M_MSB_MASK = _mm256_set1_epi8(MSB_MASK);
for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
__m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
__m256i m_z = _mm256_and_si256(m_x, M_MSB_MASK);
_mm256_storeu_si256((__m256i*)&z[i], m_z);
}
// restore, by setting to 0, the memory positions between z + len and z + len + SRSLTE_AVX2_B_SIZE
memset(z + len, 0, SRSLTE_AVX2_B_SIZE);
}
void srslte_vec_sign_to_bit_c_avx2(uint8_t* x, uint16_t len)
{
const __m256i M_NEG1 = _mm256_set1_epi8(-1);
int i = 0;
for (; i < len - SRSLTE_AVX2_B_SIZE + 1; i += SRSLTE_AVX2_B_SIZE) {
__m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
__m256i m_abs_x = _mm256_sign_epi8(M_NEG1, m_x);
_mm256_storeu_si256((__m256i*)&x[i], m_abs_x);
}
// executed if code_size < 32, which is never the case in 5G
for (; i < len; i++) {
x[i] = x[i] >> 7U;
}
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,89 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_decoder_vector_avx2.h
* \brief Declaration of the 8-bit AVX2 polar decoder vectorizable functions.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_VECTOR_FUNCTIONS_AVX2_H
#define POLAR_VECTOR_FUNCTIONS_AVX2_H
#include "../utils_avx2.h"
#include "srslte/config.h"
#include <stdint.h>
/*!
* Transforms input uint8_t bits represented by {0, 128} to {0, 1} with AVX2 instructions,
* the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
* Specifically, the function returns 0 if x=0 and 1 if x<0, otherwise the output is not defined.
* \param[in, out] x A pointer to a vector of uint8_t.
* \param[in] len Length of vectors x, y and z.
*/
SRSLTE_API void srslte_vec_sign_to_bit_c_avx2(uint8_t* x, uint16_t len);
/*!
* Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise
* (box-plus operator) with AVX2 instructions,
* the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
* \param[in] x A pointer to a vector of int8_t.
* \param[in] y A pointer to a vector of int8_t.
* \param[out] z A pointer to a vector of int8_t.
* \param[in] len Length of vectors x, y and z.
*/
SRSLTE_API void srslte_vec_function_f_ccc_avx2(const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
/*!
* Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$ with AVX2 instructions,
* the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
* \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
* \param[in] x A pointer to a vector of int8_t.
* \param[in] y A pointer to a vector of int8_t.
* \param[out] z A pointer to a vector of int8_t.
* \param[in] len Length of vectors b, x, y and z.
*/
SRSLTE_API void
srslte_vec_function_g_bccc_avx2(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
/*!
* Computes \f$ z = x \oplus y \f$ elementwise with AVX2 instructions,
* the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
* \param[in] x A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] y A pointer to a vector of uint8_t with 0's and 1's.
* \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] len Length of vectors x, y and z.
*/
SRSLTE_API void srslte_vec_xor_bbb_avx2(const uint8_t* x, const uint8_t* y, uint8_t* z, uint16_t len);
/*!
* Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$ with AVX2 instructions,
* the output must have size larger that \ref SRSLTE_AVX2_B_SIZE.
* \param[in] x A pointer to a vector of int8_t.
* \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
* \param[in] len Length of vectors x and z.
*/
SRSLTE_API void srslte_vec_hard_bit_cc_avx2(const int8_t* x, uint8_t* z, uint16_t len);
#endif // POLAR_VECTOR_FUNCTIONS_H

@ -0,0 +1,130 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_encoder.c
* \brief Definition of the polar encoder.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
*
*/
#include "srslte/phy/fec/polar/polar_encoder.h"
#include "polar_encoder_avx2.h"
#include "polar_encoder_pipelined.h"
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#ifdef LV_HAVE_AVX2
/*! AVX2 polar encoder */
static int encode_avx2(void* o, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
{
srslte_polar_encoder_t* q = o;
polar_encoder_encode_avx2(q->ptr, input, output, code_size_log);
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the AVX2 encoder. */
static void free_avx2(void* o)
{
srslte_polar_encoder_t* q = o;
delete_polar_encoder_avx2(q->ptr);
}
/*! Initializes a polar encoder structure to use the AVX2 polar encoder algorithm*/
static int init_avx2(srslte_polar_encoder_t* q, const uint8_t code_size_log)
{
q->encode = encode_avx2;
q->free = free_avx2;
if ((q->ptr = create_polar_encoder_avx2(code_size_log)) == NULL) {
free_avx2(q);
return -1;
}
return 0;
}
#endif // LV_HAVE_AVX2
/*! Pipelined polar encoder */
static int encode_pipelined(void* o, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
{
srslte_polar_encoder_t* q = o;
polar_encoder_encode_pipelined(q->ptr, input, output, code_size_log);
return 0;
}
/*! Carries out the actual destruction of the memory allocated to the pipelined encoder. */
static void free_pipelined(void* o)
{
srslte_polar_encoder_t* q = o;
delete_polar_encoder_pipelined(q->ptr);
}
/*! Initializes a polar encoder structure to use the pipeline polar encoder algorithm*/
static int init_pipelined(srslte_polar_encoder_t* q, const uint8_t code_size_log)
{
q->encode = encode_pipelined;
q->free = free_pipelined;
if ((q->ptr = create_polar_encoder_pipelined(code_size_log)) == NULL) {
free_pipelined(q);
return -1;
}
return 0;
}
int srslte_polar_encoder_init(srslte_polar_encoder_t* q, srslte_polar_encoder_type_t type, const uint8_t code_size_log)
{
switch (type) { // NOLINT
case SRSLTE_POLAR_ENCODER_PIPELINED:
return init_pipelined(q, code_size_log);
#ifdef LV_HAVE_AVX2
case SRSLTE_POLAR_ENCODER_AVX2:
return init_avx2(q, code_size_log);
#endif // LV_HAVE_AVX2
default:
return -1;
}
return 0;
}
void srslte_polar_encoder_free(srslte_polar_encoder_t* q)
{
if (q->free) {
q->free(q);
}
memset(q, 0, sizeof(srslte_polar_encoder_t));
}
int srslte_polar_encoder_encode(srslte_polar_encoder_t* q,
const uint8_t* input,
uint8_t* output,
const uint8_t code_size_log)
{
return q->encode(q, input, output, code_size_log);
}

@ -0,0 +1,200 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_encoder_avx2.c
* \brief Definition of the AVX2 polar encoder.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
*
*/
#include "../utils_avx2.h"
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#ifdef LV_HAVE_AVX2
#include <emmintrin.h>
#include <immintrin.h>
#include <tmmintrin.h>
/*!
* \brief Describes an AVX2 polar encoder.
*/
struct pAVX2 {
uint8_t code_size_log; /*!< \brief The \f$ log_2\f$ of the maximum supported number of bits of the encoder
input/output vector. */
uint8_t* tmp; /*!< \brief Pointer to a temporary buffer. */
};
void delete_polar_encoder_avx2(void* o)
{
struct pAVX2* q = o;
if (q->tmp) {
free(q->tmp);
}
free(q);
}
void* create_polar_encoder_avx2(const uint8_t code_size_log)
{
struct pAVX2* q = NULL; // pointer to the polar encoder instance
// allocate memory to the polar decoder instance
if ((q = malloc(sizeof(struct pAVX2))) == NULL) {
return NULL;
}
uint16_t code_size = 1U << code_size_log;
if (code_size_log > SRSLTE_AVX2_B_SIZE_LOG) {
q->tmp = malloc(code_size * sizeof(uint8_t));
} else {
q->tmp = malloc(SRSLTE_AVX2_B_SIZE * sizeof(uint8_t));
}
if (!q->tmp) {
free(q);
perror("malloc");
return NULL;
}
q->code_size_log = code_size_log;
return q;
}
/*!
* Runs, in parallel, \f$ 2^{5-stage}\f$ polar encoders of size \f$ 2^{stage} \f$ each for s=1 to 5.
*/
static inline void srslte_vec_polar_encoder_32_avx2(const uint8_t* x, uint8_t* z, uint8_t stage)
{
const __m256i MZERO = _mm256_set1_epi8(0);
__m256i simd_x = _mm256_loadu_si256((__m256i*)x);
__m256i simd_y;
switch (stage) {
case 5:
// in 0x21, the 2 takes zeros, and the 1 takes the second half of simd_x
simd_y = _mm256_permute2x128_si256(simd_x, MZERO, 0x21);
simd_x = _mm256_xor_si256(simd_x, simd_y);
case 4:
simd_y = _mm256_srli_si256(simd_x, 8); // move each half 8-bytes= 64
simd_x = _mm256_xor_si256(simd_x, simd_y);
case 3: // stage 3
simd_y = _mm256_srli_epi64(simd_x, 32);
simd_x = _mm256_xor_si256(simd_x, simd_y);
case 2: // stage 2
simd_y = _mm256_srli_epi32(simd_x, 16);
simd_x = _mm256_xor_si256(simd_x, simd_y);
case 1: // stage 1
simd_y = _mm256_srli_epi16(simd_x, 8);
simd_x = _mm256_xor_si256(simd_x, simd_y);
_mm256_storeu_si256((__m256i*)z, simd_x);
break;
default:
printf("Wrong stage = %d\n", stage);
}
}
/*!
* Computes \f$ z = x \oplus y \f$ elementwise with AVX2 instructions.
*/
static inline void srslte_vec_xor_bbb_avx2(const uint8_t* x, const uint8_t* y, uint8_t* z, uint16_t len)
{
for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
__m256i simd_x = _mm256_loadu_si256((__m256i*)&x[i]);
__m256i simd_y = _mm256_loadu_si256((__m256i*)&y[i]);
__m256i simd_z = _mm256_xor_si256(simd_x, simd_y);
_mm256_storeu_si256((__m256i*)&z[i], simd_z);
}
}
int polar_encoder_encode_avx2(void* p, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
{
struct pAVX2* q = p;
uint8_t* tmp = q->tmp;
uint8_t* x = NULL;
uint8_t* y = NULL;
uint8_t* z = NULL;
if (q == NULL) {
return -1;
}
// load data
uint32_t code_size = 1U << code_size_log;
memcpy(tmp, input, code_size * sizeof(uint8_t));
if (code_size_log > q->code_size_log) {
printf("ERROR: max code size log %d, current code size log %d.\n", q->code_size_log, code_size_log);
return -1;
}
uint32_t code_size_stage = 0;
uint32_t code_half_size_stage = 0;
uint32_t num_blocks = 0;
uint32_t s = code_size_log;
for (; s > SRSLTE_AVX2_B_SIZE_LOG; s--) {
code_size_stage = 1U << s;
code_half_size_stage = 1U << (s - 1);
num_blocks = 1U << (code_size_log - s);
for (uint32_t b = 0; b < num_blocks; b++) {
x = &tmp[b * code_size_stage];
y = x + code_half_size_stage;
z = x;
srslte_vec_xor_bbb_avx2(x, y, z, code_half_size_stage);
}
}
uint32_t num_simd_size_blocks = 1;
if (code_size_log > SRSLTE_AVX2_B_SIZE_LOG) {
num_simd_size_blocks = 1U << (code_size_log - SRSLTE_AVX2_B_SIZE_LOG);
}
for (uint32_t b = 0; b < num_simd_size_blocks; b++) {
x = &tmp[b * SRSLTE_AVX2_B_SIZE];
z = x;
srslte_vec_polar_encoder_32_avx2(x, z, s);
}
memcpy(output, tmp, code_size * sizeof(uint8_t));
return 0;
}
#endif // LV_HAVE_AVX2

@ -0,0 +1,62 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_encoder_avx2.h
* \brief Declaration of the AVX2 polar encoder.
* \author Jesus Gomez (CTTC) \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_ENCODER_AVX2_H
#define POLAR_ENCODER_AVX2_H
#include <stdint.h>
/*!
* The AVX2 polar encoder "destructor": it frees all the resources allocated to the encoder.
*
* \param[in, out] p A pointer to the dismantled encoder.
*/
void delete_polar_encoder_avx2(void* p);
/*!
* Encodes the input vector into a codeword with the specified polar encoder.
* \param[in] p A void pointer used to declare a AVX2 polar encoder structure.
* \param[in] input The encoder input vector.
* \param[out] output The encoder output vector.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the encoder input/output vector.
* It can less or equal to the maximum code_size_log specified in q.code_size_log of the srslte_polar_encoder_t
* structure \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int polar_encoder_encode_avx2(void* p, const uint8_t* input, uint8_t* output, uint8_t code_size_log);
/*!
* Creates an AVX2 polar encoder structure of type pAVX2, and allocates memory for the encoding buffers.
*
* \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
* \return A pointer to a pAVX2 structure if the function executes correctly, NULL otherwise.
*/
void* create_polar_encoder_avx2(uint8_t code_size_log);
#endif // POLAR_ENCODER_AVX2_H

@ -0,0 +1,160 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_encoder_pipelined.c
* \brief Definition of the pipelined polar encoder.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* The pipelined polar encoder is described in
* Erdal Arikan, "Polar code: A pipelined implementation" presented at "4th International Symposium on Broadband
* Communication (ISBC 2010) July 11-14, 2010, Melaka, Malaysia"
* 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
*
*/
#include "srslte/phy/fec/polar/polar_encoder.h"
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
/*!
* \brief Describes an PIPELINED polar encoder.
*/
struct pPIPELINED {
uint16_t code_size; /*!< \brief Number of bits of the encoder input/output vector. */
uint8_t code_size_log; /*!< \brief The \f$ log_2\f$ of the maximum supported number of bits of the encoder
input/output vector. */
uint16_t code_half_size; /*!< \brief Half of the number of bits of the encoder input/output vector. */
uint16_t* i_even; /*!< \brief Pointer to the even positions of the encoder input/output vector. */
uint16_t* i_odd; /*!< \brief Pointer to the odd positions of the encoder input/output vector. */
uint8_t* tmp; /*!< \brief Pointer to a temporary buffer. */
};
void delete_polar_encoder_pipelined(void* o)
{
struct pPIPELINED* q = o;
if (q->i_even) {
free(q->i_even);
}
if (q->i_odd) {
free(q->i_odd);
}
if (q->tmp) {
free(q->tmp);
}
free(q);
}
void* create_polar_encoder_pipelined(const uint8_t code_size_log)
{
struct pPIPELINED* q = NULL; // pointer to the polar encoder instance
// allocate memory to the polar decoder instance
if ((q = malloc(sizeof(struct pPIPELINED))) == NULL) {
return NULL;
}
uint16_t code_size = 1U << code_size_log;
uint16_t code_half_size = code_size / 2;
q->i_odd = malloc(code_half_size * sizeof(uint16_t));
if (!q->i_odd) {
free(q);
perror("malloc");
return NULL;
}
q->i_even = malloc(code_half_size * sizeof(uint16_t));
if (!q->i_even) {
free(q->i_odd);
free(q);
perror("malloc");
return NULL;
}
q->tmp = malloc(code_size * sizeof(uint8_t));
if (!q->tmp) {
free(q->i_even);
free(q->i_odd);
free(q);
perror("malloc");
return NULL;
}
for (uint16_t i = 0; i < code_size / 2; i++) {
q->i_even[i] = 2 * i;
q->i_odd[i] = 2 * i + 1;
}
q->code_size = code_size;
q->code_size_log = code_size_log;
q->code_half_size = code_half_size;
return q;
}
int polar_encoder_encode_pipelined(void* p, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
{
struct pPIPELINED* q = p;
if (q == NULL) {
return -1;
}
// first stage also initializes output vector
uint16_t code_half_size = 1U << (code_size_log - 1U);
if (code_half_size > q->code_half_size) {
printf("ERROR: max code size %d, current code size %d.\n", 2 * q->code_half_size, 2 * code_half_size);
return -1;
}
for (uint16_t j = 0; j < code_half_size; j++) {
q->tmp[j] = input[q->i_even[j]];
q->tmp[j + code_half_size] = input[q->i_odd[j]];
}
for (uint16_t j = 0; j < code_half_size; j++) {
output[q->i_odd[j]] = q->tmp[q->i_odd[j]];
output[q->i_even[j]] = q->tmp[q->i_even[j]] ^ q->tmp[q->i_odd[j]]; // bitXor
}
// remaining stages
for (uint16_t i = 1; i < code_size_log; i++) {
for (uint16_t j = 0; j < code_half_size; j++) {
q->tmp[j] = output[q->i_even[j]];
q->tmp[j + code_half_size] = output[q->i_odd[j]];
}
for (uint16_t j = 0; j < code_half_size; j++) {
output[q->i_odd[j]] = q->tmp[q->i_odd[j]];
output[q->i_even[j]] = q->tmp[q->i_even[j]] ^ q->tmp[q->i_odd[j]]; // bitXor
}
}
return 0;
}

@ -0,0 +1,62 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_encoder_pipelined.h
* \brief Declaration of the pipelined polar encoder.
* \author Jesus Gomez (CTTC) \date 2020
*
* \copyright Software Radio Systems Limited
*
*/
#ifndef POLAR_ENCODER_PIPELINED_H
#define POLAR_ENCODER_PIPELINED_H
#include <stdint.h>
/*!
* The pipelined polar encoder "destructor": it frees all the resources allocated to the encoder.
*
* \param[in, out] p A pointer to the dismantled encoder.
*/
void delete_polar_encoder_pipelined(void* p);
/*!
* Encodes the input vector into a codeword with the specified polar encoder.
* \param[in] p A void pointer used to declare a pPIPELINED structure.
* \param[in] input The encoder input vector.
* \param[out] output The encoder output vector.
* \param[in] code_size_log The \f$\log_2\f$ of the number of bits of the encoder input/output vector.
* It can less or equal to the maximum code_size_log specified in q.code_size_log of the srslte_polar_encoder_t
* structure \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int polar_encoder_encode_pipelined(void* p, const uint8_t* input, uint8_t* output, uint8_t code_size_log);
/*!
* Creates a pipelined polar encoder structure of type pPIPELINED, and allocates memory for the encoding buffers.
*
* \param[in] code_size_log \f$\log_2\f$ of the number of bits in the codeword.
* \return A pointer to a pPIPELINED structure if the function executes correctly, NULL otherwise.
*/
void* create_polar_encoder_pipelined(uint8_t code_size_log);
#endif // POLAR_ENCODER_PIPELINED_H

@ -0,0 +1,71 @@
#
# Project: 5GCoding-SRS
# Author: Jesus Gomez (CTTC)
# Copyright: Software Radio Systems Limited
#
add_library(polar_test_utils polar_sets.c subchannel_allocation.c)
add_executable(polar_chain_test polar_chain_test.c)
target_link_libraries(polar_chain_test srslte_phy polar_test_utils)
set_target_properties(polar_chain_test
PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/tests/polar"
)
file(GLOB FROZEN_SETS
"frozensets/*.bin"
)
set(OUT_FROZEN_SETS ${FROZEN_SETS})
list(TRANSFORM OUT_FROZEN_SETS REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/frozensets/" "")
list(TRANSFORM OUT_FROZEN_SETS PREPEND "${PROJECT_SOURCE_DIR}/tests/polar/frozensets/")
add_custom_command(
OUTPUT ${OUT_FROZEN_SETS}
COMMAND cp -r frozensets "${PROJECT_SOURCE_DIR}/tests/polar"
DEPENDS ${FROZEN_SETS}
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
COMMENT "Copying frozen set files"
VERBATIM
)
add_custom_target(polar_frozen_sets
DEPENDS ${OUT_FROZEN_SETS}
)
add_dependencies(polar_chain_test polar_frozen_sets)
### Test polar libs
function(polar_unit_tests)
set(S ${ARGV0}) #101 means no noise, 100 scan
set(listC 5 6 6 6 7 7 8 8 9 9 10)
set(listR 32 64 64 64 128 128 256 256 512 864 1024)
set(listM 31 31 36 63 36 64 36 128 256 56 512)
set(listP 0 0 0 0 0 0 0 0 0 0 0)
set(listW 0 0 0 0 0 0 0 0 0 0 0)
list(LENGTH listC len)
math(EXPR lenr "${len} - 1")
foreach(num RANGE ${lenr})
list(GET listC ${num} cval)
list(GET listR ${num} rval)
list(GET listM ${num} mval)
list(GET listP ${num} pval)
list(GET listW ${num} wval)
add_test(NAME ${test_name}-s${S}-c${cval}-r${rval}-m${mval}-p${pval}-w${wval}
COMMAND ${test_command} -s${S} -c${cval} -r${rval} -m${mval} -p${pval} -w${wval}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/polar
)
endforeach()
endfunction()
# Unit tests
set(test_name POLAR-UNIT-TEST)
set(test_command polar_chain_test)
polar_unit_tests(101)
# WER (performance) tests
# For these tests, run ctest --verbose
set(test_name POLAR-PERF-TEST)
set(test_command polar_chain_test)
polar_unit_tests(-3)

@ -0,0 +1,803 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_chain_test.c
* \brief Throughput and WER tests for the polar encoder/decoder.
*
* Synopsis: **polar_test [options]**
*
* Options:
*
* - <b>-c \<number\></b> \f$log_2\f$ of the codeword length [Default 8]
*
* - <b>-r \<number\></b> Rate matching size [Default 256]
*
* - <b>-m \<number\></b> Message size [Default 128]
*
* - <b>-p \<number\></b> Parity-set size [Default 0]
*
* - <b>-w \<number\></b> nWmPC [Default 0]
*
* - <b>-s \<number\></b> SNR [dB, Default 3.00 dB] -- Use 100 for scan, and 101 for noiseless
*
* - <b>-o \<number\></b> Print output results [Default 0] -- Use 0 for detailed, Use 1 for 1 line, Use 2 for vector
* form
*
* It (1) generates a random set of bits (data); (2) passes the data bits
* through the subchannel allocation block where the input vector to the
* encoder is generated; (3) encodes the input vector; (4) adds Gaussian channel noise
* (optional); (5) passes the decoder output through the subchannel
* deallocation block where data bits are extracted; (6) compares the decoded
* bits with the original data bits and measures the throughput (in bit / s).
*
* The message, frozen and parity bit sets corresponding to the input
* parameters -c, -r, -m, -p, -w must be available in the subfolder \a
* frozensets of the execution directory.
* These sets are stored in files with the following name convention:
* > polar_code_<code_size>_<rate_matching_size>_<message_size>_<parity_set_size>_<wmPC>.bin
*
* See \ref polar for futher details.
*
*/
#include "math.h"
#include "srslte/phy/channel/ch_awgn.h"
#include "srslte/phy/common/timestamp.h"
#include "srslte/phy/utils/bit.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/phy_logger.h"
#include "srslte/phy/utils/random.h"
#include "srslte/phy/utils/vector.h" // srslte_convert_dB_to_amplitude
#include <inttypes.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
// cttc utils lib
#include "srslte/phy/utils/vector.h"
// polar libs
#include "polar_sets.h"
#include "srslte/phy/fec/polar/polar_decoder.h"
#include "srslte/phy/fec/polar/polar_encoder.h"
#include "subchannel_allocation.h"
#define SNR_POINTS 10 /*!< \brief Number of SNR evaluation points.*/
#define SNR_MIN (-2.0) /*!< \brief Min SNR [dB].*/
#define SNR_MAX 8.0 /*!< \brief Max SNR [dB].*/
#define BATCH_SIZE 100 /*!< \brief Number of codewords in a batch. */
#define MAX_N_BATCH 10000 /*!< \brief Max number of simulated batches. */
#define REQ_ERRORS 100 /*!< \brief Minimum number of errors for a significant simulation. */
// default values
uint8_t code_size_log = 8; /*!< \brief \f$log_2\f$ of code size. */
uint16_t message_size = 128; /*!< \brief Number of message bits (data and CRC). */
uint16_t rate_matching_size = 256; /*!< \brief Number of bits of the codeword after rate matching. */
uint8_t parity_set_size = 0; /*!< \brief Number of parity bits. */
uint8_t nWmPC = 0; /*!< \brief Number of parity bits of minimum weight type. */
double snr_db = 3; /*!< \brief SNR in dB (101 for no noise, 100 for scan). */
int print_output = 0; /*!< \brief print output form (0 for detailed, 1 for 1 line, 2 for vector). */
/*!
* \brief Prints test help when a wrong parameter is passed as input.
*/
void usage(char* prog)
{
printf("Usage: %s [-cX] [-rX] [-mX] [-pX] [-wX] [-sX]\n", prog);
printf("\t-c log2 of the codeword length [Default %d]\n", code_size_log);
printf("\t-r Rate matching size [Default %d]\n", rate_matching_size);
printf("\t-m Message size [Default %d]\n", message_size);
printf("\t-p Parity-set size [Default %d]\n", parity_set_size);
printf("\t-w nWmPC [Default %d]\n", nWmPC);
printf("\t-s SNR [dB, Default %.2f dB] -- Use 100 for scan, and 101 for noiseless\n", snr_db);
printf("\t-o Print output results [Default %d] -- Use 0 for detailed, Use 1 for 1 line, Use 2 for vector form\n",
print_output);
}
/*!
* \brief Parses the input line.
*/
void parse_args(int argc, char** argv)
{
int opt = 0;
while ((opt = getopt(argc, argv, "c:r:m:p:w:e:s:t:o:")) != -1) {
switch (opt) {
case 'c':
code_size_log = (int)strtol(optarg, NULL, 10);
break;
case 'r':
rate_matching_size = (int)strtol(optarg, NULL, 10);
break;
case 'm':
message_size = (int)strtol(optarg, NULL, 10);
break;
case 'p':
parity_set_size = (int)strtol(optarg, NULL, 10);
break;
case 'w':
nWmPC = (int)strtol(optarg, NULL, 10);
break;
case 's':
snr_db = strtof(optarg, NULL);
break;
case 'o':
print_output = strtol(optarg, NULL, 10);
break;
default:
usage(argv[0]);
exit(-1);
}
}
}
/*!
* \brief Main function.
*/
int main(int argc, char** argv)
{
uint8_t* data_tx = NULL;
uint8_t* data_rx = NULL;
uint8_t* data_rx_s = NULL;
uint8_t* data_rx_c = NULL;
uint8_t* data_rx_c_avx2 = NULL;
uint8_t* input_enc = NULL; // input encoder
uint8_t* output_enc = NULL; // output encoder
uint8_t* output_enc_avx2 = NULL; // output encoder
float* llr = NULL; // input decoder
int16_t* llr_s = NULL; // input decoder
int8_t* llr_c = NULL; // input decoder
int8_t* llr_c_avx2 = NULL; // input decoder
uint8_t* output_dec = NULL; // output decoder
uint8_t* output_dec_s = NULL; // output decoder
uint8_t* output_dec_c = NULL; // output decoder
uint8_t* output_dec_c_avx2 = NULL; // output decoder
double var[SNR_POINTS + 1];
double snr_db_vec[SNR_POINTS + 1];
int i = 0;
int j = 0;
int snr_points = 0;
int errors_symb = 0;
int errors_symb_s = 0;
int errors_symb_c = 0;
int errors_symb_c_avx2 = 0;
int n_error_words[SNR_POINTS + 1];
int n_error_words_s[SNR_POINTS + 1];
int n_error_words_c[SNR_POINTS + 1];
int n_error_words_c_avx2[SNR_POINTS + 1];
int last_i_batch[SNR_POINTS + 1];
struct timeval t[3];
double elapsed_time_dec[SNR_POINTS + 1];
double elapsed_time_dec_s[SNR_POINTS + 1];
double elapsed_time_dec_c[SNR_POINTS + 1];
double elapsed_time_dec_c_avx2[SNR_POINTS + 1];
double elapsed_time_enc[SNR_POINTS + 1];
double elapsed_time_enc_avx2[SNR_POINTS + 1];
// 16-bit quantizer
int16_t inf16 = (1U << 15U) - 1;
int8_t inf8 = (1U << 7U) - 1;
float gain_s = NAN;
float gain_c = NAN;
float gain_c_avx2 = NAN;
srslte_polar_sets_t sets;
srslte_subchn_alloc_t subch;
srslte_polar_encoder_t enc;
srslte_polar_decoder_t dec;
srslte_polar_decoder_t dec_s; // 16-bit
srslte_polar_decoder_t dec_c; // 8-bit
#ifdef LV_HAVE_AVX2
srslte_polar_encoder_t enc_avx2;
srslte_polar_decoder_t dec_c_avx2; // 8-bit
#endif // LV_HAVE_AVX2
parse_args(argc, argv);
uint16_t code_size = 1U << code_size_log;
printf("Test POLAR chain:\n");
printf(" Final code bits -> E = %d\n", rate_matching_size);
printf(" Code bits -> N = %d\n", code_size);
printf(" CRC + Data bits -> K = %d\n", message_size);
printf(" Parity Check bits -> PC = %d \n", parity_set_size);
printf(" Code rate -> (K + PC)/N = (%d + %d)/%d = %.2f\n",
message_size,
parity_set_size,
code_size,
(double)(message_size + parity_set_size) / code_size);
// read polar index sets from a file
srslte_polar_code_sets_read(&sets, message_size, code_size_log, rate_matching_size, parity_set_size, nWmPC);
// subchannel allocation
srslte_subchannel_allocation_init(&subch, code_size_log, message_size, sets.message_set);
// initialize encoder pipeline
srslte_polar_encoder_init(&enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
// initialize a POLAR decoder (float)
srslte_polar_decoder_init(&dec, SRSLTE_POLAR_DECODER_SSC_F, code_size_log, sets.frozen_set, sets.frozen_set_size);
// initialize a POLAR decoder (16 bit)
srslte_polar_decoder_init(&dec_s, SRSLTE_POLAR_DECODER_SSC_S, code_size_log, sets.frozen_set, sets.frozen_set_size);
// initialize a POLAR decoder (8 bit)
srslte_polar_decoder_init(&dec_c, SRSLTE_POLAR_DECODER_SSC_C, code_size_log, sets.frozen_set, sets.frozen_set_size);
#ifdef LV_HAVE_AVX2
// initialize encoder avx2
srslte_polar_encoder_init(&enc_avx2, SRSLTE_POLAR_ENCODER_AVX2, code_size_log);
// initialize a POLAR decoder (8 bit, avx2)
srslte_polar_decoder_init(
&dec_c_avx2, SRSLTE_POLAR_DECODER_SSC_C_AVX2, code_size_log, sets.frozen_set, sets.frozen_set_size);
#endif // LV_HAVE_AVX2
#ifdef DATA_ALL_ONES
#else
srslte_random_t random_gen = srslte_random_init(0);
#endif
data_tx = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
data_rx = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
data_rx_s = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
data_rx_c = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
data_rx_c_avx2 = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
input_enc = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
output_enc = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
output_enc_avx2 = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
llr = srslte_vec_f_malloc(code_size * BATCH_SIZE);
llr_s = srslte_vec_i16_malloc(code_size * BATCH_SIZE);
llr_c = srslte_vec_i8_malloc(code_size * BATCH_SIZE);
llr_c_avx2 = srslte_vec_i8_malloc(code_size * BATCH_SIZE);
output_dec = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
output_dec_s = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
output_dec_c = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
output_dec_c_avx2 = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
if (!data_tx || !data_rx || !data_rx_s || !data_rx_c || !data_rx_c_avx2 || !input_enc || !output_enc ||
!output_enc_avx2 || !llr || !llr_s || !llr_c || !llr_c_avx2 || !output_dec || !output_dec_s || !output_dec_c ||
!output_dec_c_avx2) {
perror("malloc");
exit(-1);
}
// if snr_db = 100 compute a rage from SNR_MIN to SNR_MAX with SNR_POINTS
// else use the specified SNR.
double snr_inc = NAN;
snr_inc = (SNR_MAX - SNR_MIN) / SNR_POINTS;
if (snr_db == 100.0) {
snr_points = SNR_POINTS;
for (int32_t i = 0; i < snr_points; i++) {
snr_db = SNR_MIN + i * snr_inc;
snr_db_vec[i] = snr_db;
var[i] = srslte_convert_dB_to_amplitude(-snr_db);
}
snr_db_vec[snr_points] = 101; // include the no noise case
snr_points++;
} else {
snr_db_vec[0] = snr_db;
var[0] = srslte_convert_dB_to_amplitude(-snr_db);
snr_points = 1;
}
if (snr_db == 100) { // scan
printf(" SNR_MIN = %f, SNR_INC = %f, SNR_MAX = %f, snr_points: %d\n",
SNR_MIN,
snr_inc,
SNR_MIN + snr_inc * snr_points,
snr_points);
}
for (int32_t i_snr = 0; i_snr < snr_points; i_snr++) {
if (snr_db_vec[i_snr] == 101) {
printf("\n Signal-to-Noise Ratio -> infinite\n");
} else {
printf("\n Signal-to-Noise Ratio -> %.1f dB\n", snr_db_vec[i_snr]);
}
elapsed_time_enc[i_snr] = 0;
elapsed_time_enc_avx2[i_snr] = 0;
elapsed_time_dec[i_snr] = 0;
elapsed_time_dec_s[i_snr] = 0;
elapsed_time_dec_c[i_snr] = 0;
elapsed_time_dec_c_avx2[i_snr] = 0;
n_error_words[i_snr] = 0;
n_error_words_s[i_snr] = 0;
n_error_words_c[i_snr] = 0;
n_error_words_c_avx2[i_snr] = 0;
int i_batch = 0;
printf("\nBatch:\n ");
int req_errors = 0;
int max_n_batch = 0;
if (snr_db_vec[i_snr] == 101) {
req_errors = 1;
max_n_batch = 1;
} else {
req_errors = REQ_ERRORS;
max_n_batch = MAX_N_BATCH;
}
while ((n_error_words[i_snr] < req_errors) && (i_batch < max_n_batch)) {
i_batch++;
if (!(i_batch % 10)) {
printf("%8d", i_batch);
if (!(i_batch % 90)) {
printf("\n ");
}
}
// generate data_tx
#ifdef DATA_ALL_ONES
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < message_size; j++) {
data_tx[i * message_size + j] = 1;
}
}
#else
for (i = 0; i < BATCH_SIZE; i++) {
for (j = 0; j < message_size; j++) {
data_tx[i * message_size + j] = srslte_random_uniform_int_dist(random_gen, 0, 1);
}
}
#endif
// subchannel_allocation block
for (i = 0; i < BATCH_SIZE; i++) {
srslte_subchannel_allocation(&subch, data_tx + i * message_size, input_enc + i * code_size);
}
// encoding pipeline
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_polar_encoder_encode(&enc, input_enc + j * code_size, output_enc + j * code_size, code_size_log);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_enc[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
#ifdef LV_HAVE_AVX2
// encoding avx2
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_polar_encoder_encode(
&enc_avx2, input_enc + j * code_size, output_enc_avx2 + j * code_size, code_size_log);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_enc_avx2[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
// check encoders have the same output.
// check errors with respect the output of the pipeline encoder
for (i = 0; i < BATCH_SIZE; i++) {
if (srslte_bit_diff(output_enc + i * code_size, output_enc_avx2 + i * code_size, code_size) != 0) {
printf("ERROR: Wrong avx2 encoder output. SNR= %f, Batch: %d\n", snr_db_vec[i_snr], i);
exit(-1);
}
}
#endif // LV_HAVE_AVX2
for (j = 0; j < code_size * BATCH_SIZE; j++) {
llr[j] = output_enc[j] ? -1 : 1;
}
// add noise
if (snr_db_vec[i_snr] != 101) {
srslte_ch_awgn_f(llr, llr, var[i_snr], BATCH_SIZE * code_size);
// Convert symbols into LLRs
for (j = 0; j < BATCH_SIZE * code_size; j++) {
llr[j] *= 2 / (var[i_snr] * var[i_snr]);
}
}
// decoding float point
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_polar_decoder_decode_f(&dec, llr + j * code_size, output_dec + j * code_size);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
// extract message bits - float decoder
for (j = 0; j < BATCH_SIZE; j++) {
srslte_subchannel_deallocation(&subch, output_dec + j * code_size, data_rx + j * message_size);
}
// check errors - float decpder
for (i = 0; i < BATCH_SIZE; i++) {
errors_symb = srslte_bit_diff(data_tx + i * message_size, data_rx + i * message_size, message_size);
if (errors_symb != 0) {
n_error_words[i_snr]++;
}
}
// decoding 16-bit
// 16-quantization
if (snr_db_vec[i_snr] == 101) {
srslte_vec_quant_fs(llr, llr_s, 8192, 0, 32767, BATCH_SIZE * code_size);
} else {
gain_s = inf16 * var[i_snr] / 20 / (1 / var[i_snr] + 2);
srslte_vec_quant_fs(llr, llr_s, gain_s, 0, inf16, BATCH_SIZE * code_size);
}
// decoding 16-bit
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_polar_decoder_decode_s(&dec_s, llr_s + j * code_size, output_dec_s + j * code_size);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_s[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
// extract message bits 16-bit decoder
for (j = 0; j < BATCH_SIZE; j++) {
srslte_subchannel_deallocation(&subch, output_dec_s + j * code_size, data_rx_s + j * message_size);
}
// check errors 16-bit decoder
for (i = 0; i < BATCH_SIZE; i++) {
errors_symb_s = srslte_bit_diff(data_tx + i * message_size, data_rx_s + i * message_size, message_size);
if (errors_symb_s != 0) {
n_error_words_s[i_snr]++;
}
}
// 8-bit decoding
// 8-bit quantization
if (snr_db_vec[i_snr] == 101) {
srslte_vec_quant_fc(llr, llr_c, 32, 0, 127, BATCH_SIZE * code_size);
} else {
gain_c = inf8 * var[i_snr] / 20 / (1 / var[i_snr] + 2);
srslte_vec_quant_fc(llr, llr_c, gain_c, 0, inf8, BATCH_SIZE * code_size);
}
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_polar_decoder_decode_c(&dec_c, llr_c + j * code_size, output_dec_c + j * code_size);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_c[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
// extract message bits
for (j = 0; j < BATCH_SIZE; j++) {
srslte_subchannel_deallocation(&subch, output_dec_c + j * code_size, data_rx_c + j * message_size);
}
// check errors 8-bits decoder
for (i = 0; i < BATCH_SIZE; i++) {
errors_symb_c = srslte_bit_diff(data_tx + i * message_size, data_rx_c + i * message_size, message_size);
if (errors_symb_c != 0) {
n_error_words_c[i_snr]++;
}
}
#ifdef LV_HAVE_AVX2
// 8-bit avx2 decoding
// 8-bit quantization
if (snr_db_vec[i_snr] == 101) {
srslte_vec_quant_fc(llr, llr_c_avx2, 32, 0, 127, BATCH_SIZE * code_size);
} else {
gain_c_avx2 = inf8 * var[i_snr] / 20 / (1 / var[i_snr] + 2);
srslte_vec_quant_fc(llr, llr_c_avx2, gain_c_avx2, 0, inf8, BATCH_SIZE * code_size);
}
gettimeofday(&t[1], NULL);
for (j = 0; j < BATCH_SIZE; j++) {
srslte_polar_decoder_decode_c(&dec_c_avx2, llr_c_avx2 + j * code_size, output_dec_c_avx2 + j * code_size);
}
gettimeofday(&t[2], NULL);
get_time_interval(t);
elapsed_time_dec_c_avx2[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
// extract message bits
for (j = 0; j < BATCH_SIZE; j++) {
srslte_subchannel_deallocation(&subch, output_dec_c_avx2 + j * code_size, data_rx_c_avx2 + j * message_size);
}
// check errors 8-bits decoder
for (i = 0; i < BATCH_SIZE; i++) {
errors_symb_c_avx2 =
srslte_bit_diff(data_tx + i * message_size, data_rx_c_avx2 + i * message_size, message_size);
if (errors_symb_c_avx2 != 0) {
n_error_words_c_avx2[i_snr]++;
}
}
#endif // LV_HAVE_AVX2
last_i_batch[i_snr] = i_batch;
} // end while BATCH
} // snr_db
printf("\n");
switch (print_output) {
case 2:
printf("SNR=[");
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
printf("%3.1f ", snr_db_vec[i_snr] - 3);
}
printf("];\n");
printf("WER=[");
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
printf("%e ", (float)n_error_words[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
}
printf("];\n");
printf("WER_16=[");
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
printf("%e ", (float)n_error_words_s[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
}
printf("];\n");
printf("WER_8=[");
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
printf("%e ", (float)n_error_words_c[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
}
printf("];\n");
#ifdef LV_HAVE_AVX2
printf("WER_8_AVX2=[");
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
printf("%e ", (float)n_error_words_c_avx2[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
}
printf("];\n");
#endif // LV_HAVE_AVX2
break;
case 1:
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
printf("SNR: %3.1f\t enc_pipe_thrpt(Mbps): %.2f\t enc_avx2_thrpt(Mbps): "
"%.2f\n",
snr_db_vec[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_enc[i_snr]),
last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_enc_avx2[i_snr]));
printf("SNR: %3.1f\t FLOAT WER: %.8f %d/%d \t dec_thrput(Mbps): %.2f\n",
snr_db_vec[i_snr],
(double)n_error_words[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size,
last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec[i_snr]));
printf("SNR: %3.1f\t INT16 WER: %.8f %d/%d \t dec_thrput(Mbps): %.2f\n",
snr_db_vec[i_snr],
(double)n_error_words_s[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words_s[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size,
last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec_s[i_snr]));
printf("SNR: %3.1f\t INT8 WER: %.8f %d/%d \t dec_thrput(Mbps): %.2f\n",
snr_db_vec[i_snr],
(double)n_error_words_c[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words_c[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size,
last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec_c[i_snr]));
#ifdef LV_HAVE_AVX2
printf("SNR: %3.1f\t INT8-AVX2 WER: %.8f %d/%d \t dec_thrput(Mbps): %.2f\n",
snr_db_vec[i_snr],
(double)n_error_words_c_avx2[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words_c_avx2[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size,
last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec_c_avx2[i_snr]));
#endif // LV_HAVE_AVX2
printf("\n");
}
break;
default:
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
printf("**** PIPELINE ENCODER ****\n");
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_enc[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_enc[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_enc[i_snr]);
#ifdef LV_HAVE_AVX2
printf("\n**** AVX2 ENCODER ****\n");
printf("Estimated throughput:\n %e word/s\n %e bit/s (information)\n %e bit/s "
"(encoded)\n",
last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_enc_avx2[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_enc_avx2[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_enc_avx2[i_snr]);
#endif // LV_HAVE_AVX2
printf("\n**** FLOATING POINT ****");
printf("\nEstimated word error rate:\n %e (%d errors)\n",
(double)n_error_words[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words[i_snr]);
printf("Estimated throughput decoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec[i_snr]);
printf("\n**** FIXED POINT (16 bits) ****");
printf("\nEstimated word error rate:\n %e (%d errors)\n",
(double)n_error_words_s[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words_s[i_snr]);
printf("Estimated throughput decoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec_s[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec_s[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec_s[i_snr]);
printf("\n**** FIXED POINT (8 bits) ****");
printf("\nEstimated word error rate:\n %e (%d errors)\n",
(double)n_error_words_c[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words_c[i_snr]);
printf("Estimated throughput decoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec_c[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec_c[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec_c[i_snr]);
#ifdef LV_HAVE_AVX2
printf("\n**** FIXED POINT (8 bits, AVX2) ****");
printf("\nEstimated word error rate:\n %e (%d errors)\n",
(double)n_error_words_c_avx2[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
n_error_words_c_avx2[i_snr]);
printf("Estimated throughput decoder:\n %e word/s\n %e bit/s (information)\n %e bit/s (encoded)\n",
last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec_c_avx2[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec_c_avx2[i_snr],
last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec_c_avx2[i_snr]);
#endif // LV_HAVE_AVX2
printf("\n");
}
break;
}
free(data_tx);
free(data_rx);
free(data_rx_s);
free(data_rx_c);
free(input_enc);
free(output_enc);
free(output_enc_avx2);
free(llr);
free(llr_s);
free(llr_c);
free(output_dec);
free(output_dec_s);
free(output_dec_c);
#ifdef DATA_ALL_ONES
#else
srslte_random_free(random_gen);
#endif
// free sets
srslte_polar_code_sets_free(&sets);
srslte_polar_encoder_free(&enc);
srslte_polar_decoder_free(&dec);
srslte_polar_decoder_free(&dec_s);
srslte_polar_decoder_free(&dec_c);
#ifdef LV_HAVE_AVX2
srslte_polar_encoder_free(&enc_avx2);
srslte_polar_decoder_free(&dec_c_avx2);
#endif // LV_HAVE_AVX2
int expected_errors = 0;
int i_snr = 0;
if (snr_db_vec[i_snr] == 101) {
if (n_error_words[0] > expected_errors) {
printf("\n(float) Test failed!\n\n");
} else {
printf("\n(float) Test completed successfully!\n\n");
}
printf("\r");
if (n_error_words_s[0] > expected_errors) {
printf("\n(16 bit) Test failed!\n\n");
} else {
printf("\n(16 bit) Test completed successfully!\n\n");
}
printf("\r");
if (n_error_words_c[0] > expected_errors) {
printf("\n(8 bit) Test failed!\n\n");
} else {
printf("\n(8 bit) Test completed successfully!\n\n");
}
printf("\r");
#ifdef LV_HAVE_AVX2
if (n_error_words_c_avx2[0] > expected_errors) {
printf("\n(8 bit, avx2) Test failed!\n\n");
} else {
printf("\n(8 bit, avx2) Test completed successfully!\n\n");
}
#endif // LV_HAVE_AVX2
printf("\r");
exit((n_error_words[0] > expected_errors) || (n_error_words_s[0] > expected_errors) ||
(n_error_words_c[0] > expected_errors)
#ifdef LV_HAVE_AVX2
|| (n_error_words_c_avx2[0] > expected_errors)
#endif // LV_HAVE_AVX2
);
} else {
for (int i_snr = 0; i_snr < snr_points; i_snr++) {
if (n_error_words_s[i_snr] > 10 * n_error_words[i_snr]) {
perror("16-bit performance at SNR = %d too low!");
exit(-1);
}
if (n_error_words_c[i_snr] > 10 * n_error_words[i_snr]) {
perror("8-bit performance at SNR = %d too low!");
exit(-1);
}
#ifdef LV_HAVE_AVX2
if (n_error_words_c_avx2[i_snr] > 10 * n_error_words[i_snr]) {
perror("8-bit avx2 performance at SNR = %d too low!");
exit(-1);
}
#endif // LV_HAVE_AVX2
}
printf("\nTest completed successfully!\n\n");
printf("\r");
}
}

@ -0,0 +1,119 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_sets.c
* \brief Definition of the auxiliary function that reads polar index sets from a file.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* The message and parity check sets provided by this functions are needed by
* the subchannel allocation block.
* The frozen bit set provided by this function is used by the polar decoder.
*
*/
#include "polar_sets.h"
#include <inttypes.h>
#include <srslte/phy/utils/vector.h>
#include <stdio.h>
#include <stdlib.h> //exit
#include <string.h>
void srslte_polar_code_sets_free(srslte_polar_sets_t* c)
{
if (c != NULL) {
free(c->frozen_set);
free(c->info_set);
free(c->message_set);
free(c->parity_set);
}
}
int srslte_polar_code_sets_read(srslte_polar_sets_t* c,
const uint16_t message_size,
const uint8_t code_size_log,
const uint16_t rate_matching_size,
const uint8_t parity_set_size,
const uint8_t nWmPC)
{
FILE* fptr = NULL;
char filename[50];
uint16_t code_size = 1U << code_size_log;
c->frozen_set_size = code_size - message_size - parity_set_size;
c->parity_set_size = parity_set_size;
c->info_set_size = message_size + parity_set_size;
c->message_set_size = message_size;
c->frozen_set = srslte_vec_u16_malloc(c->frozen_set_size);
if (!c->frozen_set) {
perror("malloc");
exit(-1);
}
c->info_set = srslte_vec_u16_malloc(c->info_set_size);
if (!c->info_set) {
perror("malloc");
exit(-1);
}
c->message_set = srslte_vec_u16_malloc(c->message_set_size);
if (!c->message_set) {
perror("malloc");
exit(-1);
}
c->parity_set = srslte_vec_u16_malloc(parity_set_size);
if (!c->parity_set) {
perror("malloc");
exit(-1);
}
sprintf(filename,
"frozensets/polar_code_sets_%hu_%hu_%hu_%hu_%u.bin",
code_size,
rate_matching_size,
c->message_set_size,
c->parity_set_size,
nWmPC);
fptr = fopen(filename, "rbe");
if (fptr == NULL) {
printf("Error! file: %s does not exit. Probably, the polar set file is missing in folder "
"/frozensets for the provided code parameters.\n",
filename);
exit(1);
}
fread(c->info_set, sizeof(uint16_t), c->info_set_size, fptr);
fread(c->message_set, sizeof(uint16_t), c->message_set_size, fptr);
fread(c->parity_set, sizeof(uint16_t), c->parity_set_size, fptr);
fread(c->frozen_set, sizeof(uint16_t), c->frozen_set_size, fptr);
fclose(fptr);
return 0;
}

@ -0,0 +1,80 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file polar_sets.h
* \brief Declaration of the auxiliary function that reads polar index sets from a file.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* The message and parity check sets provided by this functions are needed by
* the subchannel allocation block.
* The frozen bit set provided by this function is used by the polar decoder.
*
*/
#ifndef SRSLTE_POLAR_SETS_H
#define SRSLTE_POLAR_SETS_H
#include "srslte/config.h"
#include <stdint.h>
/*!
* \brief Describes a polar set.
*/
typedef struct {
uint16_t message_set_size; /*!< \brief Number of message bits (data and CRC). */
uint16_t info_set_size; /*!< \brief Number of message bits plus parity bits. */
uint16_t parity_set_size; /*!< \brief Number of parity check bits. */
uint16_t frozen_set_size; /*!< \brief Number of frozen bits. */
uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
uint16_t* info_set; /*!< \brief Pointer to the indices of the encoder input vector containing data, CRC and
parity check bits.*/
uint16_t* parity_set; /*!< \brief Pointer to the indices of the encoder input vector containing the parity bits.*/
uint16_t* frozen_set; /*!< \brief Pointer to the indices of the encoder input vector containing frozen bits.*/
} srslte_polar_sets_t;
/*!
* Initializes the different index sets as needed by the subchannel allocation block and/or by the polar decoder.
* \param[out] c A pointer to the initialized polar set.
* \param[in] message_size Number of data + CRC bits.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
* \param[in] rate_matching_size Number of bits of the codeword after rate matching.
* \param[in] parity_set_size Number of parity bits.
* \param[in] nWmPC Number of parity bits of minimum weight type.
* \return An integer: 0 if the function executes correctly, -1 otherwise.
*/
int srslte_polar_code_sets_read(srslte_polar_sets_t* c,
uint16_t message_size,
uint8_t code_size_log,
uint16_t rate_matching_size,
uint8_t parity_set_size,
uint8_t nWmPC);
/*!
* The polar set "destructor": it frees all the resources.
* \param[in] c A pointer to the dismantled polar set.
*/
void srslte_polar_code_sets_free(srslte_polar_sets_t* c);
#endif // SRSLTE_POLAR_SETS_H

@ -0,0 +1,66 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file subchannel_allocation.c
* \brief Defiition of the auxiliary subchannel allocation block.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* These functions are not fully functional nor tested to be 3gpp-5G compliant.
* Please, use only for testing purposes.
*
*/
#include "subchannel_allocation.h"
#include <string.h> //memset
void srslte_subchannel_allocation_init(srslte_subchn_alloc_t* c,
const uint8_t code_size_log,
const uint16_t message_set_size,
uint16_t* message_set)
{
c->code_size = 1U << code_size_log;
c->message_size = message_set_size;
c->message_set = message_set;
}
void srslte_subchannel_allocation(const srslte_subchn_alloc_t* c, const uint8_t* message, uint8_t* input_encoder)
{
memset(input_encoder, 0, c->code_size * sizeof(uint8_t));
uint16_t i_o = 0;
for (uint16_t i = 0; i < c->message_size; i++) {
i_o = c->message_set[i];
input_encoder[i_o] = message[i];
}
}
void srslte_subchannel_deallocation(const srslte_subchn_alloc_t* c, const uint8_t* output_decoder, uint8_t* message)
{
uint16_t i_o = 0;
for (uint16_t i = 0; i < c->message_size; i++) {
i_o = c->message_set[i];
message[i] = output_decoder[i_o];
}
}

@ -0,0 +1,86 @@
/*
* Copyright 2013-2020 Software Radio Systems Limited
*
* This file is part of srsLTE.
*
* srsLTE is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* srsLTE is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* A copy of the GNU Affero General Public License can be found in
* the LICENSE file in the top-level directory of this distribution
* and at http://www.gnu.org/licenses/.
*
*/
/*!
* \file subchannel_allocation.h
* \brief Declaration of the auxiliary subchannel allocation block.
* \author Jesus Gomez (CTTC)
* \date 2020
*
* \copyright Software Radio Systems Limited
*
* These functions are not fully functional nor tested to be 3gpp-5G compliant.
* Please, use only for testing purposes.
*
*/
#ifndef SRSLTE_SUB_CHANNEL_ALLOC_H
#define SRSLTE_SUB_CHANNEL_ALLOC_H
#include "srslte/config.h"
#include "stdint.h"
/*!
* \brief Describes a subchannel allocation.
*/
typedef struct SRSLTE_API {
uint16_t code_size; /*!< \brief Number of bits, \f$N\f$, of the encoder input/output vector. */
uint16_t message_size; /*!< \brief Number of bits, \f$K\f$, of data + CRC. */
uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
} srslte_subchn_alloc_t;
/*!
* Initializes a subchannel allocation instance.
* \param[out] c A pointer to the srslte_subchn_alloc_t structure
* containing the parameters needed by the subchannel allocation function.
* \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
* \param[in] message_set_size Number of data + CRC bits.
* \param[in] message_set Pointer to the indices of the encoder input vector containing
* data and CRC bits.
*/
void srslte_subchannel_allocation_init(srslte_subchn_alloc_t* c,
uint8_t code_size_log,
uint16_t message_set_size,
uint16_t* message_set);
/*!
* Allocates message bits (data + CRC) to the encoder input bit vector at the
* positions specified in \a c->message_set and zeros to the remaining
* positions. This function is not fully 5G compliant as parity bits positions
* are set to 0.
* \param[in] c A pointer to the srslte_subchn_alloc_t structure containing
* the parameters needed by the subchannel allocation function.
* \param[in] message A pointer to the vector with the message bits (data and CRC).
* \param[out] input_encoder A pointer to the encoder input bit vector.
*/
void srslte_subchannel_allocation(const srslte_subchn_alloc_t* c, const uint8_t* message, uint8_t* input_encoder);
/*!
* Extracts message bits (data + CRC) from the decoder output vector
* according to the positions specified in \a c->message_set.
* \param[in] c A pointer to the srslte_subchn_alloc_t structure containing the
* parameters needed by the subchannel allocation function.
* \param[in] output_decoder A pointer to the decoder output bit vector.
* \param[out] message A pointer to the vector with the message bits (data and CRC).
*/
void srslte_subchannel_deallocation(const srslte_subchn_alloc_t* c, const uint8_t* output_decoder, uint8_t* message);
#endif // SRSLTE_SUB_CHANNEL_ALLOC_H

@ -65,7 +65,7 @@ void srslte_scrambling_c_offset(srslte_sequence_t* s, cf_t* data, int offset, in
static inline void scrambling_b(uint8_t* c, uint8_t* data, int len)
{
srslte_vec_xor_bbb((int8_t*)c, (int8_t*)data, (int8_t*)data, len);
srslte_vec_xor_bbb(c, data, data, len);
}
void srslte_scrambling_b(srslte_sequence_t* s, uint8_t* data)

@ -19,12 +19,12 @@
*
*/
#include "srslte/srslte.h"
#include <stdlib.h>
#include <string.h>
#include "srslte/phy/dft/dft.h"
#include "srslte/phy/utils/convolution.h"
#include "srslte/phy/utils/debug.h"
#include "srslte/phy/utils/vector.h"
int srslte_conv_fft_cc_init(srslte_conv_fft_cc_t* q, uint32_t input_len, uint32_t filter_len)

@ -87,7 +87,7 @@ float squared_error(cf_t a, cf_t b)
}
TEST(
srslte_vec_xor_bbb, MALLOC(int8_t, x); MALLOC(int8_t, y); MALLOC(int8_t, z);
srslte_vec_xor_bbb, MALLOC(uint8_t, x); MALLOC(uint8_t, y); MALLOC(uint8_t, z);
cf_t gold = 0.0f;
for (int i = 0; i < block_size; i++) {

@ -31,7 +31,7 @@
#include "srslte/phy/utils/vector.h"
#include "srslte/phy/utils/vector_simd.h"
void srslte_vec_xor_bbb(int8_t* x, int8_t* y, int8_t* z, const uint32_t len)
void srslte_vec_xor_bbb(const uint8_t* x, const uint8_t* y, uint8_t* z, const uint32_t len)
{
srslte_vec_xor_bbb_simd(x, y, z, len);
}
@ -597,6 +597,62 @@ uint32_t srslte_vec_max_abs_ci(const cf_t* x, const uint32_t len)
return srslte_vec_max_ci_simd(x, len);
}
void srslte_vec_quant_fs(const float* in,
int16_t* out,
const float gain,
const float offset,
const float clip,
const uint32_t len)
{
int i = 0;
long tmp = 0;
const int16_t inf = (1U << 15U) - 1;
for (i = 0; i < len; i++) {
if (isinf(in[i])) {
tmp = inf * (-2 * (in[i] < 0) + 1);
} else {
tmp = (long)(offset + gain * in[i] + INT16_MAX + 0.5) - INT16_MAX;
if (tmp < -clip) {
tmp = -clip;
}
if (tmp > clip) {
tmp = clip;
}
}
out[i] = (int16_t)tmp;
}
}
void srslte_vec_quant_fc(const float* in,
int8_t* out,
const float gain,
const float offset,
const float clip,
const uint32_t len)
{
int i = 0;
long tmp = 0;
for (i = 0; i < len; i++) {
if (isinf(in[i])) {
tmp = 127 * (-2 * (in[i] < 0) + 1);
} else {
tmp = (long)(offset + gain * in[i] + INT8_MAX + 0.5) - INT8_MAX;
if (tmp < -clip) {
tmp = -clip;
}
if (tmp > clip) {
tmp = clip;
}
}
out[i] = (int8_t)tmp;
}
}
void srslte_vec_quant_fus(const float* in,
uint16_t* out,
const float gain,

@ -30,27 +30,27 @@
#include "srslte/phy/utils/simd.h"
#include "srslte/phy/utils/vector_simd.h"
void srslte_vec_xor_bbb_simd(const int8_t* x, const int8_t* y, int8_t* z, const int len)
void srslte_vec_xor_bbb_simd(const uint8_t* x, const uint8_t* y, uint8_t* z, const int len)
{
int i = 0;
#if SRSLTE_SIMD_B_SIZE
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_b_t a = srslte_simd_b_load(&x[i]);
simd_b_t b = srslte_simd_b_load(&y[i]);
simd_b_t a = srslte_simd_b_load((int8_t*)&x[i]);
simd_b_t b = srslte_simd_b_load((int8_t*)&y[i]);
simd_b_t r = srslte_simd_b_xor(a, b);
srslte_simd_b_store(&z[i], r);
srslte_simd_b_store((int8_t*)&z[i], r);
}
} else {
for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
simd_b_t a = srslte_simd_b_loadu(&x[i]);
simd_b_t b = srslte_simd_b_loadu(&y[i]);
simd_b_t a = srslte_simd_b_loadu((int8_t*)&x[i]);
simd_b_t b = srslte_simd_b_loadu((int8_t*)&y[i]);
simd_b_t r = srslte_simd_b_xor(a, b);
srslte_simd_b_storeu(&z[i], r);
srslte_simd_b_storeu((int8_t*)&z[i], r);
}
}
#endif /* SRSLTE_SIMD_B_SIZE */

Loading…
Cancel
Save