Added Polar and LDPC forward error correction

4 years ago · 2c4aa1e379
parent a351b2534e
commit 2c4aa1e379
90 changed files with 23323 additions and 20 deletions
--- a/lib/include/srslte/phy/fec/ldpc/base_graph.h
+++ b/lib/include/srslte/phy/fec/ldpc/base_graph.h
@ -0,0 +1,107 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file base_graph.h
+ * \brief Declaration of the two LDPC base graphs employed in the 5G NR
+ * standard.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * This file declares the dimensions of the base graphs and provides an interface
+ * for obtaining the set index and the permutation matrix corresponding to a
+ * given *lifting size*.
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef SRSLTE_BASEGRAPH_H
+#define SRSLTE_BASEGRAPH_H
+
+#include "srslte/config.h"
+
+#include <stdint.h>
+
+#define BG1Nfull 68 /*!< \brief Number of variable nodes in BG1. */
+#define BG1N 66     /*!< \brief Number of variable nodes in BG1 after puncturing. */
+#define BG1M 46     /*!< \brief Number of check nodes in BG1. */
+#define BG1K 22     /*!< \brief Number of "uncoded bits" in BG1. */
+
+#define BG2Nfull 52 /*!< \brief Number of variable nodes in BG2. */
+#define BG2N 50     /*!< \brief Number of variable nodes in BG2 after puncturing. */
+#define BG2M 42     /*!< \brief Number of check nodes in BG2. */
+#define BG2K 10     /*!< \brief Number of "uncoded bits" in BG2. */
+
+#define MAX_CNCT 20 /*!< \brief Maximum number (+1) of connected variables per check node. */
+
+#define NOF_LIFTSIZE 8 /*!< \brief Number of possible lifting size indices. */
+
+#define MAX_LIFTSIZE 384 /*!< \brief Maximum lifting size. */
+
+#define VOID_LIFTSIZE 255 /*!< \brief Identifies an invalid lifting size in the lookup table. */
+/*!
+ * \brief Identifies a missing connection between a check node and a variable node
+ * in the protograph. */
+#define NO_CNCT 0xFFFF
+
+/*! \brief Possible base graphs, BG1 or BG2. */
+typedef enum SRSLTE_API {
+  BG1 = 0, /*!< \brief Base Graph 1 */
+  BG2,     /*!< \brief Base Graph 2 */
+} srslte_basegraph_t;
+
+/*!
+ * Creates the parity-check matrix for the given base graph and lifting size
+ * in the compact form (a normalized permutation matrix). Also returns the
+ * indices of the variable nodes associated to each check node.
+ * \param[out] pcm       The compact parity-check matrix: entry \f$(m,n)\f$ is an
+ *                       integer between 0 and LS-1 if check-node \f$m\f$ is
+ *                       connected to variable node \f$n\f$ in the protograph.
+ *                       This number specifies the order of the circular
+ *                       rotation applied to the identity matrix in the full
+ *                       graph (see also Section 3.4.1 of Deliverable 1). This
+ *                       pointer can be safely cast to 'uint16_t(*)[BGbgNfull]'
+ *                       (see also ::BG1Nfull and ::BG2Nfull) to get an BGbgM x
+ *                       BGbgNfull matrix.
+ * \param[out] positions For each check node, the corresponding row of this
+ *                       matrix contains the indices of the connected variable
+ *                       nodes (see also ::BG1_positions and ::BG2_positions).
+ * \param[in]  bg        The desired base graph (BG1 or BG2).
+ * \param[in]  ls        The desired lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int create_compact_pcm(uint16_t* pcm, int8_t (*positions)[MAX_CNCT], srslte_basegraph_t bg, uint16_t ls);
+
+/*!
+ * Reads the lookup table and returns the set index corresponding to the given
+ * lifting size.
+ * \param[in] ls A lifting size.
+ * \return An integer between 0 and 7 (included), ::VOID_LIFTSIZE if ls is an
+ * invalid lifting size
+ */
+static inline uint8_t get_ls_index(uint16_t ls)
+{
+  extern const uint8_t LSindex[];
+  return (ls <= MAX_LIFTSIZE ? LSindex[ls] : VOID_LIFTSIZE);
+}
+
+#endif // SRSLTE_BASEGRAPH_H
--- a/lib/include/srslte/phy/fec/ldpc/ldpc_common.h
+++ b/lib/include/srslte/phy/fec/ldpc/ldpc_common.h
@ -0,0 +1,37 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_common.h
+ * \brief Declaration of elements common to both the LDPC encoder and the LDPC decoder.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef SRSLTE_LDPCCOMMON_H
+#define SRSLTE_LDPCCOMMON_H
+
+#define FILLER_BIT 254 /*!< \brief Identifies a filler bit. */
+
+#endif // SRSLTE_LDPCCOMMON_H
--- a/lib/include/srslte/phy/fec/ldpc/ldpc_decoder.h
+++ b/lib/include/srslte/phy/fec/ldpc/ldpc_decoder.h
@ -0,0 +1,150 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_decoder.h
+ * \brief Declaration of the LDPC decoder.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef SRSLTE_LDPCDECODER_H
+#define SRSLTE_LDPCDECODER_H
+
+#include "srslte/phy/fec/ldpc/base_graph.h"
+
+/*!
+ * \brief Types of LDPC decoder.
+ */
+typedef enum {
+  SRSLTE_LDPC_DECODER_F,            /*!< \brief %Decoder working with real-valued LLRs. */
+  SRSLTE_LDPC_DECODER_S,            /*!< \brief %Decoder working with 16-bit integer-valued LLRs. */
+  SRSLTE_LDPC_DECODER_C,            /*!< \brief %Decoder working with 8-bit integer-valued LLRs. */
+  SRSLTE_LDPC_DECODER_C_FLOOD,      /*!< \brief %Decoder working with 8-bit integer-valued LLRs, flooded scheduling. */
+  SRSLTE_LDPC_DECODER_C_AVX2,       /*!< \brief %Decoder working with 8-bit integer-valued LLRs (AVX2 version). */
+  SRSLTE_LDPC_DECODER_C_AVX2_FLOOD, /*!< \brief %Decoder working with 8-bit integer-valued LLRs, flooded scheduling
+                                   (AVX2 version). */
+} srslte_ldpc_decoder_type_t;
+
+/*!
+ * \brief Describes an LDPC decoder.
+ */
+typedef struct SRSLTE_API {
+  void*              ptr;   /*!< \brief Registers used by the decoder. */
+  srslte_basegraph_t bg;    /*!< \brief Current base graph. */
+  uint16_t           ls;    /*!< \brief Current lifting size. */
+  uint8_t            bgN;   /*!< \brief Number of variable nodes in the BG. */
+  uint16_t           liftN; /*!< \brief Number of variable nodes in the lifted graph. */
+  uint8_t            bgM;   /*!< \brief Number of check nodes in the BG. */
+  uint16_t           liftM; /*!< \brief Number of check nodes in the lifted graph. */
+  uint8_t            bgK;   /*!< \brief Number of "uncoded bits" in the BG. */
+  uint16_t           liftK; /*!< \brief Number of uncoded bits in the lifted graph. */
+  uint16_t*          pcm;   /*!< \brief Pointer to the parity check matrix (compact form). */
+
+  int8_t (*var_indices)[MAX_CNCT]; /*!< \brief Pointer to lists of variable indices connected to a given check node. */
+
+  float scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum algorithm. */
+
+  void (*free)(void*); /*!< \brief Pointer to a "destructor". */
+
+  int (*decode_f)(void*,
+                  const float*,
+                  uint8_t*,
+                  uint32_t); /*!< \brief Pointer to the decoding function (float version). */
+  int (*decode_s)(void*,
+                  const int16_t*,
+                  uint8_t*,
+                  uint32_t); /*!< \brief Pointer to the decoding function (16-bit version). */
+  int (*decode_c)(void*,
+                  const int8_t*,
+                  uint8_t*,
+                  uint32_t); /*!< \brief Pointer to the decoding function (16-bit version). */
+} srslte_ldpc_decoder_t;
+
+/*!
+ * Initializes all the LDPC decoder variables according to the given base graph
+ * and lifting size.
+ * \param[out] q            A pointer to a srslte_ldpc_decoder_t structure.
+ * \param[in]  type         Type of LDPC decoder.
+ * \param[in]  bg           The desired base graph (BG1 or BG2).
+ * \param[in]  ls           The desired lifting size.
+ * \param[in]  scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_decoder_init(srslte_ldpc_decoder_t*     q,
+                                        srslte_ldpc_decoder_type_t type,
+                                        srslte_basegraph_t         bg,
+                                        uint16_t                   ls,
+                                        float                      scaling_fctr);
+
+/*!
+ * The LDPC decoder "destructor": it frees all the resources allocated to the decoder.
+ * \param[in] q A pointer to the dismantled decoder.
+ */
+SRSLTE_API void srslte_ldpc_decoder_free(srslte_ldpc_decoder_t* q);
+
+/*!
+ * Carries out the actual decoding with real-valued LLRs.
+ * \param[in] q A pointer to the LDPC decoder (a srslte_ldpc_decoder_t structure
+ *    instance) that carries out the decoding.
+ * \param[in] llrs The LLRs obtained from the channel samples that correspond to
+ *    the codeword to be decoded.
+ * \param[out] message The message (uncoded bits) resulting from the decoding
+ *    operation.
+ * \param[in] cdwd_rm_length The number of bits forming the codeword (after rate matching).
+ */
+SRSLTE_API int
+srslte_ldpc_decoder_decode_f(srslte_ldpc_decoder_t* q, const float* llrs, uint8_t* message, uint32_t cdwd_rm_length);
+
+/*!
+ * Carries out the actual decoding with 16-bit integer-valued LLRs. It is
+ * recommended to use a 15-bit representation for the LLRs, given that all
+ * values exceeding \f$ 2^{15}-1 \f$ (in magnitude) will be considered as infinity.
+ * \param[in] q A pointer to the LDPC decoder (a srslte_ldpc_decoder_t structure
+ *    instance) that carries out the decoding.
+ * \param[in] llrs The LLRs obtained from the channel samples that correspond to
+ *    the codeword to be decoded.
+ * \param[out] message The message (uncoded bits) resulting from the decoding
+ *    operation.
+ * \param[in] cdwd_rm_length The number of bits forming the codeword (after rate matching).
+ */
+SRSLTE_API int
+srslte_ldpc_decoder_decode_s(srslte_ldpc_decoder_t* q, const int16_t* llrs, uint8_t* message, uint32_t cdwd_rm_length);
+
+/*!
+ * Carries out the actual decoding with 8-bit integer-valued LLRs. It is
+ * recommended to use a 7-bit representation for the LLRs, given that all
+ * values exceeding \f$ 2^{7}-1 \f$ (in magnitude) will be considered as infinity.
+ * \param[in] q A pointer to the LDPC decoder (a srslte_ldpc_decoder_t structure
+ *    instance) that carries out the decoding.
+ * \param[in] llrs The LLRs obtained from the channel samples that correspond to
+ *    the codeword to be decoded.
+ * \param[out] message The message (uncoded bits) resulting from the decoding
+ *    operation.
+ * \param[in] cdwd_rm_length The number of bits forming the codeword (after rate matching).
+ */
+SRSLTE_API int
+srslte_ldpc_decoder_decode_c(srslte_ldpc_decoder_t* q, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length);
+
+#endif // SRSLTE_LDPCDECODER_H
--- a/lib/include/srslte/phy/fec/ldpc/ldpc_encoder.h
+++ b/lib/include/srslte/phy/fec/ldpc/ldpc_encoder.h
@ -0,0 +1,104 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_encoder.h
+ * \brief Declaration of the LDPC encoder.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef SRSLTE_LDPCENCODER_H
+#define SRSLTE_LDPCENCODER_H
+
+#include "srslte/phy/fec/ldpc/base_graph.h"
+
+/*!
+ * \brief Types of LDPC encoder.
+ */
+typedef enum SRSLTE_API {
+  SRSLTE_LDPC_ENCODER_C = 0, /*!< \brief Non-optimized encoder. */
+#if LV_HAVE_AVX2
+  SRSLTE_LDPC_ENCODER_AVX2, /*!< \brief SIMD-optimized encoder. */
+#endif                      // LV_HAVE_AVX2
+} srslte_ldpc_encoder_type_t;
+
+/*!
+ * \brief Describes an LDPC encoder.
+ */
+typedef struct SRSLTE_API {
+  void*              ptr;   /*!< \brief %Encoder auxiliary registers. */
+  srslte_basegraph_t bg;    /*!< \brief Current base graph. */
+  uint16_t           ls;    /*!< \brief Current lifting size. */
+  uint8_t            bgN;   /*!< \brief Number of variable nodes in the BG. */
+  uint16_t           liftN; /*!< \brief Number of variable nodes in the lifted graph. */
+  uint8_t            bgM;   /*!< \brief Number of check nodes in the BG. */
+  uint16_t           liftM; /*!< \brief Number of check nodes in the lifted graph. */
+  uint8_t            bgK;   /*!< \brief Number of "uncoded bits" in the BG. */
+  uint16_t           liftK; /*!< \brief Number of uncoded bits in the lifted graph. */
+  uint16_t*          pcm;   /*!< \brief Pointer to the parity check matrix (compact form). */
+  void (*free)(void*);      /*!< \brief Pointer to a "destructor". */
+  /*! \brief Pointer to the encoder function. */
+  int (*encode)(void*, const uint8_t*, uint8_t*, uint32_t, uint32_t);
+  /*!  \brief Pointer to the encoder for the high-rate region. */
+  void (*encode_high_rate)(void*, uint8_t*);
+  /*!  \brief Pointer to the encoder for the high-rate region (SIMD-optimized version). */
+  void (*encode_high_rate_avx2)(void*);
+
+} srslte_ldpc_encoder_t;
+
+/*!
+ * Initializes all the LDPC encoder variables according to the given base graph
+ * and lifting size.
+ * \param[out] q A pointer to a srslte_ldpc_encoder_t structure.
+ * \param[in] type The encoder type.
+ * \param[in] bg The desired base graph (BG1 or BG2).
+ * \param[in] ls The desired lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int
+srslte_ldpc_encoder_init(srslte_ldpc_encoder_t* q, srslte_ldpc_encoder_type_t type, srslte_basegraph_t bg, uint16_t ls);
+
+/*!
+ * The LDPC encoder "destructor": it frees all the resources allocated to the encoder.
+ * \param[in] q A pointer to the dismantled encoder.
+ */
+SRSLTE_API void srslte_ldpc_encoder_free(srslte_ldpc_encoder_t* q);
+
+/*!
+ * Encodes a message into a codeword with the specified encoder.
+ * \param[in] q A pointer to the desired encoder.
+ * \param[in] input The message to encode.
+ * \param[out] output The resulting codeword.
+ * \param[in] input_length The number of uncoded bits in the input message.
+ * \param[in] cdwd_rm_length The codeword length after rate matching.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_encoder_encode(srslte_ldpc_encoder_t* q,
+                                          const uint8_t*         input,
+                                          uint8_t*               output,
+                                          uint32_t               input_length,
+                                          uint32_t               cdwd_rm_length);
+
+#endif // SRSLTE_LDPCENCODER_H
--- a/lib/include/srslte/phy/fec/ldpc/ldpc_rm.h
+++ b/lib/include/srslte/phy/fec/ldpc/ldpc_rm.h
@ -0,0 +1,224 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_rm.h
+ * \brief Declaration of the LDPC RateMatcher and RateDematcher.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef SRSLTE_LDPCRM_H
+#define SRSLTE_LDPCRM_H
+
+#include "srslte/phy/fec/ldpc/base_graph.h"
+
+/*!
+ * \brief Types of modulations and associated modulation order.
+ */
+typedef enum SRSLTE_API {
+  BPSK,  /*!< \brief pi/2-BPSK. */
+  QPSK,  /*!< \brief QPSK. */
+  QAM16, /*!< \brief QAM16. */
+  QAM64, /*!< \brief QAM64. */
+  QAM256 /*!< \brief QAM256. */
+} mod_type_t;
+
+/*!
+ * \brief Describes a rate matcher or rate dematcher (K, F are ignored at rate matcher)
+ */
+typedef struct SRSLTE_API {
+  void*              ptr;       /*!< \brief %Rate Matcher auxiliary registers. */
+  srslte_basegraph_t bg;        /*!< \brief Current base graph. */
+  uint16_t           ls;        /*!< \brief Current lifting size. */
+  uint32_t           N;         /*!< \brief Codeword size. */
+  uint32_t           E;         /*!< \brief Rate-Matched codeword size. */
+  uint32_t           K;         /*!< \brief Codeblock size (including punctured and filler bits). */
+  uint32_t           F;         /*!< \brief Number of filler bits in the codeblock. */
+  uint32_t           k0;        /*!< \brief Starting position in the circular buffer. */
+  uint32_t           mod_order; /*!< \brief Modulation order. */
+  uint32_t           Ncb;       /*!< \brief Limit to the number of bits in the circular buffer. */
+} srslte_ldpc_rm_t;
+
+/*!
+ * Initializes the Rate Matcher for the maximum rate-matched codeword length
+ * \param[out] q           A pointer to a srslte_ldpc_rm_t structure.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_rm_tx_init(srslte_ldpc_rm_t* q);
+
+/*!
+ * Carries out the actual rate-matching.
+ * \param[in] q            A pointer to the Rate-Matcher (a srslte_ldpc_rm_t structure
+ *                         instance) that carries out the rate matching.
+ * \param[in] input        The codeword obtained from the ldpc encoder.
+ * \param[out] output      The rate-matched codeword resulting from the rate-matching
+ *                         operation.
+ * \param[in]  E           Rate-matched codeword length.
+ * \param[in]  bg;         Current base graph.
+ * \param[in]  ls          Current lifting size.
+ * \param[in]  rv          Redundancy version 0,1,2,3.
+ * \param[in]  mod_type    Modulation type.
+ * \param[in]  Nref        Size of limited buffer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_rm_tx(srslte_ldpc_rm_t*        q,
+                                 const uint8_t*           input,
+                                 uint8_t*                 output,
+                                 const uint32_t           E,
+                                 const srslte_basegraph_t bg,
+                                 const uint32_t           ls,
+                                 const uint8_t            rv,
+                                 const mod_type_t         mod_type,
+                                 const uint32_t           Nref);
+
+/*!
+ * Initializes all the Rate DeMatcher variables.
+ * \param[out] q           A pointer to a srslte_ldpc_rm_t structure.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_rm_rx_init_f(srslte_ldpc_rm_t* q);
+
+/*!
+ * Carries out the actual rate-dematching.
+ * \param[in] q          A pointer to the Rate-DeMatcher (a srslte_ldpc_rm_t structure
+ *                        instance) that carries out the rate matching.
+ * \param[in] input      The LLRs obtained from the channel samples that correspond to
+ *                       the codeword to be first, rate-dematched and then decoded.
+ * \param[out] output    The rate-dematched codeword resulting from the rate-dematching
+ *                       operation.
+ * \param[in] E          Rate-matched codeword length.
+ * \param[in] F          Number of filler bits.
+ * \param[in] bg;        Current base graph.
+ * \param[in] ls         Current lifting size.
+ * \param[in] rv         Redundancy version 0,1,2,3.
+ * \param[in] mod_type   Modulation type.
+ * \param[in] Nref        Size of limited buffer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_rm_rx_f(srslte_ldpc_rm_t*        q,
+                                   const float*             input,
+                                   float*                   output,
+                                   const uint32_t           E,
+                                   const uint32_t           F,
+                                   const srslte_basegraph_t bg,
+                                   const uint32_t           ls,
+                                   const uint8_t            rv,
+                                   const mod_type_t         mod_type,
+                                   const uint32_t           Nref);
+
+/*!
+ * Initializes all the Rate DeMatcher variables (short inputs).
+ * \param[out] q           A pointer to a srslte_ldpc_rm_t structure.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_rm_rx_init_s(srslte_ldpc_rm_t* q);
+
+/*!
+ * Carries out the actual rate-dematching (short symbols).
+ * \param[in] q           A pointer to the Rate-DeMatcher (a srslte_ldpc_rm_t structure
+ *                        instance) that carries out the rate matching.
+ * \param[in] input       The LLRs obtained from the channel samples that correspond to
+ *                        the codeword to be first, rate-dematched and then decoded.
+ * \param[in] E           Rate-matched codeword length.
+ * \param[in] F           Number of filler bits.
+ * \param[in] bg;         Current base graph.
+ * \param[in] ls          Current lifting size.
+ * \param[in] rv          Redundancy version 0,1,2,3.
+ * \param[in] mod_type    Modulation type.
+ * \param[in] Nref        Size of limited buffer.
+ * \param[out] output The rate-dematched codeword resulting from the rate-dematching
+ *    operation.
+ */
+SRSLTE_API int srslte_ldpc_rm_rx_s(srslte_ldpc_rm_t*        q,
+                                   const int16_t*           input,
+                                   int16_t*                 output,
+                                   const uint32_t           E,
+                                   const uint32_t           F,
+                                   const srslte_basegraph_t bg,
+                                   const uint32_t           ls,
+                                   const uint8_t            rv,
+                                   const mod_type_t         mod_type,
+                                   const uint32_t           Nref);
+
+/*!
+ * Initializes all the Rate DeMatcher variables (char inputs).
+ * \param[out] q           A pointer to a srslte_ldpc_rm_t structure.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_rm_rx_init_c(srslte_ldpc_rm_t* q);
+
+/*!
+ * Carries out the actual rate-dematching (int8_t symbols).
+ * \param[in] q           A pointer to the Rate-DeMatcher (a srslte_ldpc_rm_t structure
+ *                        instance) that carries out the rate matching.
+ * \param[in] input       The LLRs obtained from the channel samples that correspond to
+ *                        the codeword to be first, rate-dematched and then decoded.
+ * \param[out] output     The rate-dematched codeword resulting from the rate-dematching
+ *                        operation.
+ * \param[in] E           Rate-matched codeword length.
+ * \param[in] F           Number of filler bits.
+ * \param[in] bg;         Current base graph.
+ * \param[in] ls          Current lifting size.
+ * \param[in] rv          Redundancy version 0,1,2,3.
+ * \param[in] mod_type    Modulation type.
+ * \param[in] Nref        Size of limited buffer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_ldpc_rm_rx_c(srslte_ldpc_rm_t*        q,
+                                   const int8_t*            input,
+                                   int8_t*                  output,
+                                   const uint32_t           E,
+                                   const uint32_t           F,
+                                   const srslte_basegraph_t bg,
+                                   const uint32_t           ls,
+                                   const uint8_t            rv,
+                                   const mod_type_t         mod_type,
+                                   const uint32_t           Nref);
+
+/*!
+ * The Rate Matcher "destructor": it frees all the resources allocated to the rate-matcher.
+ * \param[in] q A pointer to the dismantled rate-matcher.
+ */
+SRSLTE_API void srslte_ldpc_rm_tx_free(srslte_ldpc_rm_t* q);
+
+/*!
+ * The Rate Matcher "destructor": it frees all the resources allocated to the rate-dematcher.
+ * \param[in] q A pointer to the dismantled rate-dematcher.
+ */
+SRSLTE_API void srslte_ldpc_rm_rx_free_f(srslte_ldpc_rm_t* q);
+
+/*!
+ * The Rate Matcher "destructor" for short symbols: it frees all the resources allocated to the rate-dematcher.
+ * \param[in] q A pointer to the dismantled rate-dematcher.
+ */
+SRSLTE_API void srslte_ldpc_rm_rx_free_s(srslte_ldpc_rm_t* q);
+
+/*!
+ * The Rate Matcher "destructor" for int8_t symbols: it frees all the resources allocated to the rate-dematcher.
+ * \param[in] q A pointer to the dismantled rate-dematcher.
+ */
+SRSLTE_API void srslte_ldpc_rm_rx_free_c(srslte_ldpc_rm_t* q);
+
+#endif // SRSLTE_LDPCENCODER_H
--- a/lib/include/srslte/phy/fec/polar/polar_decoder.h
+++ b/lib/include/srslte/phy/fec/polar/polar_decoder.h
@ -0,0 +1,118 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder.h
+ * \brief Declaration of the polar decoder.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * 5G uses a polar decoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
+ *
+ */
+
+#ifndef SRSLTE_POLARDECODER_H
+#define SRSLTE_POLARDECODER_H
+#include "srslte/config.h"
+#include <stdbool.h>
+#include <stdint.h>
+
+/*!
+ * Lists the different types of polar decoder.
+ */
+typedef enum {
+  SRSLTE_POLAR_DECODER_SSC_F = 0, /*!< \brief Floating-point Simplified Successive Cancellation (SSC) decoder. */
+  SRSLTE_POLAR_DECODER_SSC_S = 1, /*!< \brief Fixed-point (16 bit) Simplified Successive Cancellation (SSC) decoder. */
+  SRSLTE_POLAR_DECODER_SSC_C = 2, /*!< \brief Fixed-point (8 bit) Simplified Successive Cancellation (SSC) decoder. */
+  SRSLTE_POLAR_DECODER_SSC_C_AVX2 =
+      3 /*!< \brief Fixed-point (8 bit, avx2) Simplified Successive Cancellation (SSC) decoder. */
+} srslte_polar_decoder_type_t;
+
+/*!
+ * \brief Describes a polar decoder.
+ */
+typedef struct SRSLTE_API {
+  void* ptr; /*!< \brief Pointer to the actual polar decoder structure. */
+  int (*decode_f)(void*        ptr,
+                  const float* symbols,
+                  uint8_t*     data_decoded); /*!< \brief Pointer to the decoder function (float version). */
+  int (*decode_s)(void*          ptr,
+                  const int16_t* symbols,
+                  uint8_t*       data_decoded); /*!< \brief Pointer to the decoder function (16-bit version). */
+  int (*decode_c)(void*         ptr,
+                  const int8_t* symbols,
+                  uint8_t*      data_decoded); /*!< \brief Pointer to the decoder function (8-bit version). */
+  void (*free)(void*);                    /*!< \brief Pointer to a "destructor". */
+} srslte_polar_decoder_t;
+
+/*!
+ * Initializes all the polar decoder variables according to the selected decoding
+ * algorithm and the given code size.
+ * \param[out] q A pointer to the initialized polar decoder.
+ * \param[in] polar_decoder_type Polar decoder type.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
+ * \param[in] frozen_set A pointer to the frozen–bit set (array of indices).
+ * \param[in] frozen_set_size Number of frozen bits.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_polar_decoder_init(srslte_polar_decoder_t*     q,
+                                         srslte_polar_decoder_type_t polar_decoder_type,
+                                         uint16_t                    code_size_log,
+                                         uint16_t*                   frozen_set,
+                                         uint16_t                    frozen_set_size);
+
+/*!
+ * The polar decoder "destructor": it frees all the resources.
+ * \param[in, out] q A pointer to the dismantled decoder.
+ */
+SRSLTE_API void srslte_polar_decoder_free(srslte_polar_decoder_t* q);
+
+/*!
+ * Decodes the input (float) codeword with the specified polar decoder.
+ * \param[in] q A pointer to the desired polar decoder.
+ * \param[in] input_llr The decoder LLR input vector.
+ * \param[out] data_decoded The decoder output vector.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_polar_decoder_decode_f(srslte_polar_decoder_t* q, const float* input_llr, uint8_t* data_decoded);
+
+/*!
+ * Decodes the input (int16_t) codeword with the specified polar decoder.
+ * \param[in] q A pointer to the desired polar decoder.
+ * \param[in] input_llr The decoder LLR input vector.
+ * \param[out] data_decoded The decoder output vector.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int
+srslte_polar_decoder_decode_s(srslte_polar_decoder_t* q, const int16_t* input_llr, uint8_t* data_decoded);
+
+/*!
+ * Decodes the input (int8_t) codeword with the specified polar decoder.
+ * \param[in] q A pointer to the desired polar decoder.
+ * \param[in] input_llr The decoder LLR input vector.
+ * \param[out] data_decoded The decoder output vector.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_polar_decoder_decode_c(srslte_polar_decoder_t* q, const int8_t* input_llr, uint8_t* data_decoded);
+
+#endif // SRSLTE_POLARDECODER_H
--- a/lib/include/srslte/phy/fec/polar/polar_encoder.h
+++ b/lib/include/srslte/phy/fec/polar/polar_encoder.h
@ -0,0 +1,90 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_encoder.h
+ * \brief Declaration of the polar encoder.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
+ *
+ */
+
+#ifndef SRSLTE_POLAR_ENCODER_H
+#define SRSLTE_POLAR_ENCODER_H
+
+#include "srslte/config.h"
+#include <stdint.h>
+
+/*!
+ * Lists the different types of polar decoder.
+ */
+typedef enum SRSLTE_API {
+  SRSLTE_POLAR_ENCODER_PIPELINED = 0, /*!< \brief Non-optimized version of the pipelined polar encoder*/
+  SRSLTE_POLAR_ENCODER_AVX2      = 1, /*!< \brief SIMD implementation of the polar encoder */
+} srslte_polar_encoder_type_t;
+
+/*!
+ * \brief Describes a polar encoder.
+ */
+typedef struct srslte_polar_encoder_t {
+  void* ptr; /*!< \brief Pointer to the actual polar encoder structure. */
+  int (*encode)(void*          ptr,
+                const uint8_t* input,
+                uint8_t*       output,
+                const uint8_t  code_size_log); /*!< \brief Pointer to the encoder function. */
+  void (*free)(void*);                        /*!< \brief Pointer to a "destructor". */
+} srslte_polar_encoder_t;
+
+/*!
+ * Initializes all the polar encoder variables according to the given code size.
+ * \param[out] q A pointer to the initialized polar encoder.
+ * \param[in] polar_encoder_type Polar encoder type.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the encoder input/output vector.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int srslte_polar_encoder_init(srslte_polar_encoder_t*     q,
+                                         srslte_polar_encoder_type_t polar_encoder_type,
+                                         uint8_t                     code_size_log);
+
+/*!
+ * The polar encoder "destructor": it frees all the resources.
+ * \param[in, out] q A pointer to the dismantled encoder.
+ */
+SRSLTE_API void srslte_polar_encoder_free(srslte_polar_encoder_t* q);
+
+/*!
+ * Encodes the input vector into a codeword with the specified polar encoder.
+ * \param[in] q A pointer to the desired polar encoder.
+ * \param[in] input The encoder input vector.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the encoder input/output vector.
+ *     It cannot be larger than the maximum code_size_log specified in q.code_size_log of
+ *     the srslte_polar_encoder_t structure.
+ * \param[out] output The encoder output vector.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+SRSLTE_API int
+srslte_polar_encoder_encode(srslte_polar_encoder_t* q, const uint8_t* input, uint8_t* output, uint8_t code_size_log);
+
+#endif // SRSLTE_POLAR_ENCODER_H
--- a/lib/include/srslte/phy/fec/polar/test/polar_sets.h
+++ b/lib/include/srslte/phy/fec/polar/test/polar_sets.h
@ -0,0 +1,80 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_sets.h
+ * \brief Declaration of the auxiliary function that reads polar index sets from a file.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * The message and parity check sets provided by this functions are needed by
+ * the subchannel allocation block.
+ * The frozen bit set provided by this function is used by the polar decoder.
+ *
+ */
+
+#ifndef SRSLTE_POLAR_SETS_H
+#define SRSLTE_POLAR_SETS_H
+
+#include "srslte/config.h"
+#include <stdint.h>
+
+/*!
+ * \brief Describes a polar set.
+ */
+typedef struct {
+  uint16_t  message_set_size; /*!< \brief Number of message bits (data and CRC). */
+  uint16_t  info_set_size;    /*!< \brief Number of message bits plus parity bits. */
+  uint16_t  parity_set_size;  /*!< \brief Number of parity check bits. */
+  uint16_t  frozen_set_size;  /*!< \brief Number of frozen bits. */
+  uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
+  uint16_t* info_set;    /*!< \brief Pointer to the indices of the encoder input vector containing data, CRC and
+                       parity check bits.*/
+  uint16_t* parity_set;  /*!< \brief Pointer to the indices of the encoder input vector containing the parity bits.*/
+  uint16_t* frozen_set;  /*!< \brief Pointer to the indices of the encoder input vector containing frozen bits.*/
+} srslte_polar_sets_t;
+
+/*!
+ * Initializes the different index sets as needed by the subchannel allocation block and/or by the polar decoder.
+ * \param[out] c A pointer to the initialized polar set.
+ * \param[in] message_size Number of data + CRC bits.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
+ * \param[in] rate_matching_size Number of bits of the codeword after rate matching.
+ * \param[in] parity_set_size Number of parity bits.
+ * \param[in] nWmPC Number of parity bits of minimum weight type.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int srslte_polar_code_sets_read(srslte_polar_sets_t* c,
+                                uint16_t             message_size,
+                                uint8_t              code_size_log,
+                                uint16_t             rate_matching_size,
+                                uint8_t              parity_set_size,
+                                uint8_t              nWmPC);
+
+/*!
+ * The polar set "destructor": it frees all the resources.
+ * \param[in] c A pointer to the dismantled polar set.
+ */
+void srslte_polar_code_sets_free(srslte_polar_sets_t* c);
+
+#endif // SRSLTE_POLAR_SETS_H
--- a/lib/include/srslte/phy/fec/polar/test/subchannel_allocation.h
+++ b/lib/include/srslte/phy/fec/polar/test/subchannel_allocation.h
@ -0,0 +1,86 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file subchannel_allocation.h
+ * \brief Declaration of the auxiliary subchannel allocation block.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * These functions are not fully functional nor tested to be 3gpp-5G compliant.
+ * Please, use only for testing purposes.
+ *
+ */
+
+#ifndef SRSLTE_SUB_CHANNEL_ALLOC_H
+#define SRSLTE_SUB_CHANNEL_ALLOC_H
+
+#include "srslte/config.h"
+#include "stdint.h"
+
+/*!
+ * \brief Describes a subchannel allocation.
+ */
+typedef struct SRSLTE_API srslte_subchn_alloc_t {
+  uint16_t  code_size;    /*!< \brief Number of bits, \f$N\f$, of the encoder input/output vector. */
+  uint16_t  message_size; /*!< \brief Number of bits, \f$K\f$, of data + CRC. */
+  uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
+} srslte_subchn_alloc_t;
+
+/*!
+ * Initializes a subchannel allocation instance.
+ * \param[out] c  A pointer to the  srslte_subchn_alloc_t structure
+ *     containing the parameters needed by the subchannel allocation function.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
+ * \param[in] message_set_size Number of data + CRC bits.
+ * \param[in] message_set Pointer to the indices of the encoder input vector containing
+ * data and CRC bits.
+ */
+void srslte_subchannel_allocation_init(srslte_subchn_alloc_t* c,
+                                       uint8_t                code_size_log,
+                                       uint16_t               message_set_size,
+                                       uint16_t*              message_set);
+
+/*!
+ * Allocates message bits (data + CRC) to the encoder input bit vector at the
+ * positions specified in \a c->message_set and zeros to the remaining
+ * positions. This function is not fully 5G compliant as parity bits positions
+ * are set to 0.
+ * \param[in] c A pointer to the srslte_subchn_alloc_t structure containing
+ *     the parameters needed by the subchannel allocation function.
+ * \param[in] message A pointer to the vector with the message bits (data and CRC).
+ * \param[out] input_encoder A pointer to the encoder input bit vector.
+ */
+void srslte_subchannel_allocation(const srslte_subchn_alloc_t* c, const uint8_t* message, uint8_t* input_encoder);
+
+/*!
+ * Extracts message bits (data + CRC) from the decoder output vector
+ * according to the positions specified in  \a c->message_set.
+ * \param[in] c A pointer to the srslte_subchn_alloc_t structure containing the
+ *     parameters needed by the subchannel allocation function.
+ * \param[in] output_decoder A pointer to the decoder output bit vector.
+ * \param[out] message A pointer to the vector with the message bits (data and CRC).
+ */
+void srslte_subchannel_deallocation(const srslte_subchn_alloc_t* c, const uint8_t* output_decoder, uint8_t* message);
+
+#endif // SRSLTE_SUB_CHANNEL_ALLOC_H
--- a/lib/include/srslte/phy/utils/convolution.h
+++ b/lib/include/srslte/phy/utils/convolution.h
@ -32,6 +32,7 @@

 #include "srslte/config.h"
 #include "srslte/phy/dft/dft.h"
+#include <stdint.h>

 typedef struct SRSLTE_API {
  cf_t*             input_fft;
@ -72,12 +73,12 @@ SRSLTE_API uint32_t srslte_conv_fft_cc_run_opt(srslte_conv_fft_cc_t* q,
                                               cf_t*                 output);

 SRSLTE_API uint32_t
-           srslte_conv_cc(const cf_t* input, const cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
+srslte_conv_cc(const cf_t* input, const cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);

 SRSLTE_API uint32_t
-           srslte_conv_same_cf(cf_t* input, float* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
+srslte_conv_same_cf(cf_t* input, float* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);

 SRSLTE_API uint32_t
-           srslte_conv_same_cc(cf_t* input, cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);
+srslte_conv_same_cc(cf_t* input, cf_t* filter, cf_t* output, uint32_t input_len, uint32_t filter_len);

 #endif // SRSLTE_CONVOLUTION_H
--- a/lib/include/srslte/phy/utils/vector.h
+++ b/lib/include/srslte/phy/utils/vector.h
@ -72,8 +72,14 @@ static inline float srslte_convert_dB_to_power(float v)
  return powf(10.0f, v / 10.0f);
 }

-/*logical operations */
-SRSLTE_API void srslte_vec_xor_bbb(int8_t* x, int8_t* y, int8_t* z, const uint32_t len);
+/*!
+ * Computes \f$ z = x \oplus y \f$ elementwise.
+ * \param[in] x A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] y A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] len Length of vectors x, y and z.
+ */
+SRSLTE_API void srslte_vec_xor_bbb(const uint8_t* x, const uint8_t* y, uint8_t* z, const uint32_t len);

 /** Return the sum of all the elements */
 SRSLTE_API float srslte_vec_acc_ff(const float* x, const uint32_t len);
@ -194,6 +200,32 @@ SRSLTE_API uint32_t srslte_vec_max_fi(const float* x, const uint32_t len);
 SRSLTE_API uint32_t srslte_vec_max_abs_fi(const float* x, const uint32_t len);
 SRSLTE_API uint32_t srslte_vec_max_abs_ci(const cf_t* x, const uint32_t len);

+/*!
+ * Quantizes an array of floats into an array of 16-bit signed integers. It is
+ * ensured that *-inf* and *inf* map to -32767 and 32767, respectively (useful
+ * when quantizing on less than 16 bits).
+ * \param[in]  in     Real values to be quantized.
+ * \param[out] out    Quantized values.
+ * \param[in]  gain   Quantization gain, controls the output range.
+ * \param[in]  offset Quantization offset, for asymmetric quantization.
+ * \param[in]  clip   Saturation value.
+ * \param[in]  len    Number of values to be quantized.
+ */
+SRSLTE_API void srslte_vec_quant_fs(const float* in, int16_t* out, float gain, float offset, float clip, uint32_t len);
+
+/*!
+ * Quantizes an array of floats into an array of 8-bit signed integers. It is
+ * ensured that *-inf* and *inf* map to -127 and 127, respectively (useful
+ * when quantizing on less than 8 bits).
+ * \param[in]  in     Real values to be quantized.
+ * \param[out] out    Quantized values.
+ * \param[in]  gain   Quantization gain, controls the output range.
+ * \param[in]  offset Quantization offset, for asymmetric quantization.
+ * \param[in]  clip   Saturation value.
+ * \param[in]  len    Number of values to be quantized.
+ */
+SRSLTE_API void srslte_vec_quant_fc(const float* in, int8_t* out, float gain, float offset, float clip, uint32_t len);
+
 /* quantify vector of floats or int16 and convert to uint8_t */
 SRSLTE_API void srslte_vec_quant_fuc(const float*   in,
                                     uint8_t*       out,
--- a/lib/include/srslte/phy/utils/vector_simd.h
+++ b/lib/include/srslte/phy/utils/vector_simd.h
@ -31,7 +31,7 @@ extern "C" {
 #include <stdio.h>

 /*SIMD Logical operations*/
-SRSLTE_API void srslte_vec_xor_bbb_simd(const int8_t* x, const int8_t* y, int8_t* z, int len);
+SRSLTE_API void srslte_vec_xor_bbb_simd(const uint8_t* x, const uint8_t* y, uint8_t* z, int len);

 /* SIMD Basic vector math */
 SRSLTE_API void srslte_vec_sum_sss_simd(const int16_t* x, const int16_t* y, int16_t* z, int len);
--- a/lib/src/phy/fec/CMakeLists.txt
+++ b/lib/src/phy/fec/CMakeLists.txt
@ -22,8 +22,10 @@ set(FEC_SOURCES
        crc.c
        softbuffer.c)

-add_subdirectory(test)
 add_subdirectory(convolutional)
+add_subdirectory(ldpc)
+add_subdirectory(polar)
+add_subdirectory(test)
 add_subdirectory(turbo)

 add_library(srslte_fec OBJECT ${FEC_SOURCES})
--- a/lib/src/phy/fec/convolutional/CMakeLists.txt
+++ b/lib/src/phy/fec/convolutional/CMakeLists.txt
@ -29,6 +29,4 @@ set(FEC_SOURCES ${FEC_SOURCES}
        convolutional/viterbi37_sse.c
        PARENT_SCOPE)

-message(STATUS "aaaaa ${FEC_SOURCES}")
-
 add_subdirectory(test)
--- a/lib/src/phy/fec/ldpc/CMakeLists.txt
+++ b/lib/src/phy/fec/ldpc/CMakeLists.txt
@ -0,0 +1,39 @@
+#
+# Copyright 2013-2020 Software Radio Systems Limited
+#
+# This file is part of srsLTE
+#
+# srsLTE is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# srsLTE is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# A copy of the GNU Affero General Public License can be found in
+# the LICENSE file in the top-level directory of this distribution
+# and at http://www.gnu.org/licenses/.
+#
+
+set(FEC_SOURCES ${FEC_SOURCES}
+        ldpc/base_graph.c
+        ldpc/ldpc_dec_f.c
+        ldpc/ldpc_dec_s.c
+        ldpc/ldpc_dec_c.c
+        ldpc/ldpc_dec_c_flood.c
+        ldpc/ldpc_dec_c_avx2.c
+        ldpc/ldpc_dec_c_avx2long.c
+        ldpc/ldpc_dec_c_avx2_flood.c
+        ldpc/ldpc_dec_c_avx2long_flood.c
+        ldpc/ldpc_decoder.c
+        ldpc/ldpc_enc_c.c
+        ldpc/ldpc_enc_avx2.c
+        ldpc/ldpc_enc_avx2long.c
+        ldpc/ldpc_encoder.c
+        ldpc/ldpc_rm.c
+        PARENT_SCOPE)
+
+add_subdirectory(test)
--- a/lib/src/phy/fec/ldpc/base_graph.c
+++ b/lib/src/phy/fec/ldpc/base_graph.c
--- a/lib/src/phy/fec/ldpc/ldpc_avx2_consts.h
+++ b/lib/src/phy/fec/ldpc/ldpc_avx2_consts.h
@ -0,0 +1,170 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_avx2_consts.h
+ * \brief Declaration of constants and masks for the AVX2-based implementation
+ *   of the LDPC encoder and decoder.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef LDPC_AVX2_CONSTS_H
+#define LDPC_AVX2_CONSTS_H
+
+#include <immintrin.h>
+
+#include "../utils_avx2.h"
+
+/*!
+ * \brief Packed 8-bit zeros.
+ */
+static const __m256i zero_epi8 = {0, 0, 0, 0};
+
+/*!
+ * \brief Packed 8-bit ones.
+ */
+static const __m256i one_epi8 = {0x0101010101010101LL,
+                                 0x0101010101010101LL,
+                                 0x0101010101010101LL,
+                                 0x0101010101010101LL};
+
+/*!
+ * \brief Packed 8-bit 127 (that is \f$2^7 - 1\f$).
+ */
+static const __m256i infty8_epi8 = {0x7F7F7F7F7F7F7F7FLL,
+                                    0x7F7F7F7F7F7F7F7FLL,
+                                    0x7F7F7F7F7F7F7F7FLL,
+                                    0x7F7F7F7F7F7F7F7FLL};
+/*!
+ * \brief Packed 8-bit --127 (that is \f$-2^7 + 1\f$).
+ */
+static const __m256i neg_infty8_epi8 = {0x8181818181818181LL,  // NOLINT
+                                        0x8181818181818181LL,  // NOLINT
+                                        0x8181818181818181LL,  // NOLINT
+                                        0x8181818181818181LL}; // NOLINT
+
+/*!
+ * \brief Packed 8-bit 63 (that is \f$2^6 - 1\f$).
+ */
+static const __m256i infty7_epi8 = {0x3F3F3F3F3F3F3F3FLL,
+                                    0x3F3F3F3F3F3F3F3FLL,
+                                    0x3F3F3F3F3F3F3F3FLL,
+                                    0x3F3F3F3F3F3F3F3FLL};
+/*!
+ * \brief Packed 8-bit --63 (that is \f$-2^6 + 1\f$).
+ */
+static const __m256i neg_infty7_epi8 = {0xC1C1C1C1C1C1C1C1LL,  // NOLINT
+                                        0xC1C1C1C1C1C1C1C1LL,  // NOLINT
+                                        0xC1C1C1C1C1C1C1C1LL,  // NOLINT
+                                        0xC1C1C1C1C1C1C1C1LL}; // NOLINT
+
+/*!
+ * \brief Identifies even-indexed 8-bit packets.
+ */
+static const __m256i mask_even_epi8 = {0x00FF00FF00FF00FF,
+                                       0x00FF00FF00FF00FF,
+                                       0x00FF00FF00FF00FF,
+                                       0x00FF00FF00FF00FF}; // NOLINT
+
+/*!
+ * \brief Mask needed for node rotation: mask_least_epi8[i] marks the bits
+ * corresponding to the \b i least significant chars.
+ */
+static const __m256i mask_least_epi8[SRSLTE_AVX2_B_SIZE + 1] = {
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0x00000000000000FF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0x000000000000FFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0x0000000000FFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0x00000000FFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0x000000FFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0x0000FFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0x00FFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x00000000000000FF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x000000000000FFFF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x0000000000FFFFFF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x000000FFFFFFFFFF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x0000FFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000000, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000000000FF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000000000FFFF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000FFFFFF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000FFFFFFFFFF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000FFFFFFFFFFFF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000000000},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000000000FF},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000000000FFFF},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000000000FFFFFF},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFF},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x000000FFFFFFFFFF},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x0000FFFFFFFFFFFF},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x00FFFFFFFFFFFFFF},  // NOLINT
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF}}; // NOLINT
+
+/*!
+ * \brief Mask needed for node rotation: mask_most_epi8[i] marks the bits
+ * corresponding to the SRSLTE_AVX2_B_SIZE - \b i most significant chars.
+ */
+static const __m256i mask_most_epi8[SRSLTE_AVX2_B_SIZE + 1] = {
+    {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0xFFFFFFFFFFFFFF00, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0xFFFFFFFFFF000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0xFFFFFF0000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0xFFFF000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0xFF00000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFFFFFFFFFFFFFF00, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFFFFFFFFFF000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFFFFFF0000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFFFF000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0xFF00000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFF00, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFF0000, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFF000000, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFFFFFF0000000000, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFFFF000000000000, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0xFF00000000000000, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFFFF},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFFFF00},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFFFF0000},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFFFF000000},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFFFF00000000},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFFFF0000000000},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFFFF000000000000},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFF00000000000000},  // NOLINT
+    {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000}}; // NOLINT
+
+#endif // LDPC_AVX2_CONSTS_H
--- a/lib/src/phy/fec/ldpc/ldpc_dec_all.h
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_all.h
@ -0,0 +1,602 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_all.h
+ * \brief Declaration of the LDPC decoder inner functions.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef SRSLTE_LDPCDEC_ALL_H
+#define SRSLTE_LDPCDEC_ALL_H
+
+#include <srslte/phy/fec/ldpc/base_graph.h>
+#include <stdint.h>
+
+/*!
+ * Creates the registers used by the float-based implementation of the LDPC decoder.
+ * \param[in] bgN Codeword length.
+ * \param[in] bgM Number of check nodes.
+ * \param[in] ls  Lifting size.
+ * \param[in]  scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs structure).
+ */
+void* create_ldpc_dec_f(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the float-based LDPC decoder.
+ * \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs structure).
+ */
+void delete_ldpc_dec_f(void* p);
+
+/*!
+ * Initializes the inner registers of the float-based LDPC decoder before
+ * carrying out the actual decoding.
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_f(void* p, const float* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (float version).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_f(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes (float version).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_f(void*           p,
+                               int             i_layer,
+                               const uint16_t* this_pcm,
+                               const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword (float version).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_f(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits.
+ * \param[in]  p       A pointer to the decoder registers (an ldpc_regs structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_f(void* p, uint8_t* message, uint16_t liftK);
+
+/*!
+ * Creates the registers used by the 16-bit-based implementation of the LDPC decoder.
+ * \param[in] bgN          Codeword length.
+ * \param[in] bgM          Number of check nodes.
+ * \param[in] ls           Lifting size.
+ * \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs_s structure).
+ */
+void* create_ldpc_dec_s(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the 16-bit integer-based LDPC decoder.
+ * \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_s structure).
+ */
+void delete_ldpc_dec_s(void* p);
+
+/*!
+ * Initializes the inner registers of the 16-bit integer-based LDPC decoder before
+ * carrying out the actual decoding.
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs_s structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_s(void* p, const int16_t* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (16-bit version).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs_s structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_s(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes (16-bit version).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_s structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_s(void*           p,
+                               int             i_layer,
+                               const uint16_t* this_pcm,
+                               const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword (16-bit version).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_s structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_s(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits.
+ * \param[in]  p       A pointer to the decoder registers (an ldpc_regs_s structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_s(void* p, uint8_t* message, uint16_t liftK);
+
+/*!
+ * Creates the registers used by the 8-bit-based implementation of the LDPC decoder.
+ * \param[in] bgN          Codeword length.
+ * \param[in] bgM          Number of check nodes.
+ * \param[in] ls           Lifting size.
+ * \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs_c structure).
+ */
+void* create_ldpc_dec_c(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the 8-bit integer-based LDPC decoder.
+ * \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c structure).
+ */
+void delete_ldpc_dec_c(void* p);
+
+/*!
+ * Initializes the inner registers of the 8-bit integer-based LDPC decoder before
+ * carrying out the actual decoding.
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs_c structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_c(void* p, const int8_t* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (8-bit version).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs_c structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_c(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes (8-bit version).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_c(void*           p,
+                               int             i_layer,
+                               const uint16_t* this_pcm,
+                               const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword (8-bit version).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_c(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits.
+ * \param[in]  p       A pointer to the decoder registers (an ldpc_regs_c structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_c(void* p, uint8_t* message, uint16_t liftK);
+
+/*!
+ * Creates the registers used by the 8-bit-based implementation of the LDPC decoder (flooded scheduling).
+ * \param[in] bgN          Codeword length.
+ * \param[in] bgM          Number of check nodes.
+ * \param[in] ls           Lifting size.
+ * \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs_c_flood structure).
+ */
+void* create_ldpc_dec_c_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the 8-bit integer-based LDPC decoder (flooded scheduling).
+ * \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_flood structure).
+ */
+void delete_ldpc_dec_c_flood(void* p);
+
+/*!
+ * Initializes the inner registers of the 8-bit integer-based LDPC decoder (flooded scheduling) before
+ * carrying out the actual decoding.
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs_c_flood structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_c_flood(void* p, const int8_t* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (8-bit version, flooded scheduling).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs_c_flood structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_c_flood(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes (8-bit version, flooded scheduling).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_flood structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_c_flood(void*           p,
+                                     int             i_layer,
+                                     const uint16_t* this_pcm,
+                                     const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword (8-bit version, flooded scheduling).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_flood structure).
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to each layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_c_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits.
+ * \param[in]  p       A pointer to the decoder registers (an ldpc_regs_c_flood structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_c_flood(void* p, uint8_t* message, uint16_t liftK);
+
+/*!
+ * Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder (LS <= \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in] bgN          Codeword length. \param[in] bgM          Number of check nodes.
+ * \param[in] ls           Lifting size. \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs_c_avx2 structure).
+ */
+void* create_ldpc_dec_c_avx2(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder (LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2 structure).
+ */
+void delete_ldpc_dec_c_avx2(void* p);
+
+/*!
+ * Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
+ * carrying out the actual decoding (LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_c_avx2(void* p, const int8_t* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_c_avx2(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes (optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_c_avx2(void*           p,
+                                    int             i_layer,
+                                    const uint16_t* this_pcm,
+                                    const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword (optimized 8-bit version, LS <= \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_c_avx2(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits (optimized 8-bit version, LS <= \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in]  p       A pointer to the decoder registers (an ldpc_regs_c_avx2 structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_c_avx2(void* p, uint8_t* message, uint16_t liftK);
+
+/*!
+ * Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder (LS > \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in] bgN          Codeword length. \param[in] bgM          Number of check nodes.
+ * \param[in] ls           Lifting size. \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs_c_avx2long structure).
+ */
+void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder (LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2long structure).
+ */
+void delete_ldpc_dec_c_avx2long(void* p);
+
+/*!
+ * Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
+ * carrying out the actual decoding (LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_c_avx2long(void* p, const int8_t* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (optimized 8-bit version, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_c_avx2long(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes (optimized 8-bit version, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_c_avx2long(void*           p,
+                                        int             i_layer,
+                                        const uint16_t* this_pcm,
+                                        const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword (optimized 8-bit version, LS > \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_c_avx2long(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits (optimized 8-bit version, LS > \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in]  p       A pointer to the decoder registers (an ldpc_regs_c_avx2long structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_c_avx2long(void* p, uint8_t* message, uint16_t liftK);
+
+/*!
+ * Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder
+ * (flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] bgN          Codeword length.
+ * \param[in] bgM          Number of check nodes.
+ * \param[in] ls           Lifting size.
+ * \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs_c_avx2_flood structure).
+ */
+void* create_ldpc_dec_c_avx2_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder
+ * (flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2_flood structure).
+ */
+void delete_ldpc_dec_c_avx2_flood(void* p);
+
+/*!
+ * Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
+ * carrying out the actual decoding (flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_c_avx2_flood(void* p, const int8_t* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_c_avx2_flood(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes
+ * (optimized 8-bit version, flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_c_avx2_flood(void*           p,
+                                          int             i_layer,
+                                          const uint16_t* this_pcm,
+                                          const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword
+ * (optimized 8-bit version, flooded scheduling, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to each layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_c_avx2_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits
+ * (flooded scheduling, optimized 8-bit version, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in]  p       A pointer to the decoder registers (an ldpc_regs_c_avx2_flood structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_c_avx2_flood(void* p, uint8_t* message, uint16_t liftK);
+
+/*!
+ * Creates the registers used by the optimized 8-bit-based implementation of the LDPC decoder
+ * (flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] bgN          Codeword length.
+ * \param[in] bgM          Number of check nodes.
+ * \param[in] ls           Lifting size.
+ * \param[in] scaling_fctr Scaling factor of the normalized min-sum algorithm.
+ * \return A pointer to the created registers (an ldpc_regs_c_avx2long_flood structure).
+ */
+void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr);
+
+/*!
+ * Destroys the inner registers of the optimized 8-bit integer-based LDPC decoder (flooded scheduling, LS > \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in] p A pointer to the dismantled decoder registers (an ldpc_regs_c_avx2long_flood
+ * structure).
+ */
+void delete_ldpc_dec_c_avx2long_flood(void* p);
+
+/*!
+ * Initializes the inner registers of the optimized 8-bit integer-based LDPC decoder before
+ * carrying out the actual decoding (flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p    A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
+ * \param[in]     llrs A pointer to the array of LLR values from the channel.
+ * \param[in]     ls   The lifting size.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_ldpc_dec_c_avx2long_flood(void* p, const int8_t* llrs, uint16_t ls);
+
+/*!
+ * Updates the messages from variable nodes to check nodes (optimized 8-bit version,
+ * flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p       A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
+ * \param[in]     i_layer The index of the variable-to-check layer to update.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_var_to_check_c_avx2long_flood(void* p, int i_layer);
+
+/*!
+ * Updates the messages from check nodes to variable nodes (optimized 8-bit version,
+ * flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
+ * \param[in]     i_layer  The index of the variable-to-check layer to update.
+ * \param[in]     this_pcm A pointer to the row of the parity check matrix (i.e. base
+ *                         graph) corresponding to the selected layer.
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to the current layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_check_to_var_c_avx2long_flood(void*           p,
+                                              int             i_layer,
+                                              const uint16_t* this_pcm,
+                                              const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Updates the current estimate of the (soft) bits of the codeword (optimized 8-bit version,
+ * flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] p        A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
+ * \param[in]     these_var_indices
+ *                         Contains the indices of the variable nodes connected
+ *                         to each layer.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int update_ldpc_soft_bits_c_avx2long_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT]);
+
+/*!
+ * Returns the decoded message (hard bits) from the current soft bits (optimized 8-bit version,
+ * flooded scheduling, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in]  p       A pointer to the decoder registers (an ldpc_regs_c_avx2long_flood structure).
+ * \param[out] message A pointer to the decoded message.
+ * \param[in]  liftK   The length of the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int extract_ldpc_message_c_avx2long_flood(void* p, uint8_t* message, uint16_t liftK);
+
+#endif // SRSLTE_LDPCDEC_ALL_H
--- a/lib/src/phy/fec/ldpc/ldpc_dec_c.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_c.c
@ -0,0 +1,363 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_c.c
+ * \brief Definition of the LDPC decoder inner functions working
+ *    with 8-bit integer-valued LLRs.
+ *
+ * Even if the inner representation is based on 8 bits, check-to-variable and
+ * variable-to-check messages are actually represented with 7 bits, the
+ * remaining bit is used to represent infinity.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/utils/vector.h"
+
+#define F2I 100 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
+
+/*!
+ * \brief Maximum message magnitude.
+ * Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
+ */
+static const int8_t infinity7 = (1U << 6U) - 1;
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
+ */
+struct ldpc_regs_c {
+  int8_t* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  int8_t* check_to_var; /*!< \brief Check-to-variable messages. */
+  int8_t* var_to_check; /*!< \brief Variable-to-check messages. */
+  int8_t (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
+  int* min_v_index;     /*!< \brief Helper register for computing check-to-variable messages. */
+  int* prod_v2c;        /*!< \brief Helper register for computing check-to-variable messages. */
+
+  uint16_t liftN;        /*!< \brief Total number of variable nodes (after lifting). */
+  uint16_t hrrN;         /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
+  uint8_t  bgM;          /*!< \brief Number of check nodes (before lifting). */
+  uint16_t ls;           /*!< \brief Lifting size. */
+  int      scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+};
+
+/*!
+ * Carries out the actual update of the variable-to-check messages. It basically
+ * consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
+ * \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
+ * ensure it is properly propagated. Also, the subtraction is saturated between
+ * \f$- clip\f$ and \f$+ clip\f$.
+ * \param[in] x     Minuend: array we subtract from (in practice, the soft bits).
+ * \param[in] y     Subtrahend: array to be subtracted (in practice, the
+ *                  check-to-variable messages).
+ * \param[out] z    Resulting difference array(in practice, the updated
+ *                  variable-to-check messages).
+ * \param[in]  clip The saturation value.
+ * \param[in]  len  The length of the vectors.
+ */
+static void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, uint8_t clip, uint32_t len);
+
+void* create_ldpc_dec_c(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs_c* vp = NULL;
+
+  uint8_t  bgK   = bgN - bgM;
+  uint16_t liftN = bgN * ls;
+  uint16_t hrrN  = (bgK + 4) * ls;
+
+  if ((vp = malloc(sizeof(struct ldpc_regs_c))) == NULL) {
+    return NULL;
+  }
+
+  if ((vp->soft_bits = srslte_vec_i8_malloc(liftN)) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = srslte_vec_i8_malloc((hrrN + ls) * bgM)) == NULL) {
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = srslte_vec_i8_malloc((hrrN + ls))) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v2c = malloc(ls * sizeof(int8_t[2]))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM   = bgM;
+  vp->liftN = liftN;
+  vp->hrrN  = hrrN;
+  vp->ls    = ls;
+
+  vp->scaling_fctr = (int)(scaling_fctr * F2I);
+
+  return vp;
+}
+
+void delete_ldpc_dec_c(void* p)
+{
+  struct ldpc_regs_c* vp = p;
+
+  if (vp != NULL) {
+    free(vp->prod_v2c);
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_c(void* p, const int8_t* llrs, uint16_t ls)
+{
+  struct ldpc_regs_c* vp   = p;
+  int                 i    = 0;
+  int                 skip = 2 * ls;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  bzero(vp->soft_bits, skip * sizeof(int8_t));
+  for (i = skip; i < vp->liftN; i++) {
+    vp->soft_bits[i] = llrs[i - skip];
+  }
+
+  bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int8_t));
+  bzero(vp->var_to_check, (vp->hrrN + vp->ls) * sizeof(int8_t));
+  return 0;
+}
+
+int update_ldpc_var_to_check_c(void* p, int i_layer)
+{
+  struct ldpc_regs_c* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+
+  // Update the high-rate region.
+  inner_var_to_check_c(vp->soft_bits, this_check_to_var, vp->var_to_check, infinity7, vp->hrrN);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    inner_var_to_check_c(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
+                         this_check_to_var + vp->hrrN,
+                         vp->var_to_check + vp->hrrN,
+                         infinity7,
+                         vp->ls);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_c(void*           p,
+                               int             i_layer,
+                               const uint16_t* this_pcm,
+                               const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+  int j = 0;
+
+  for (i = 0; i < vp->ls; i++) {
+    vp->prod_v2c[i] = 1;
+    for (j = 0; j < 2; j++) {
+      vp->min_v2c[i][j] = INT8_MAX;
+    }
+  }
+
+  uint16_t shift      = 0;
+  int      index      = 0;
+  int8_t   this_v2c   = 0;
+  int      is_min     = 0;
+  int      i_v2c      = 0;
+  int      i_v2c_base = 0;
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index    = (j + vp->ls - shift) % vp->ls;
+      i_v2c    = i_v2c_base + j;
+      this_v2c = abs(vp->var_to_check[i_v2c]);
+      is_min   = this_v2c < vp->min_v2c[index][0];
+      vp->min_v2c[index][1] =
+          (this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
+      vp->min_v2c[index][0]  = is_min ? this_v2c : vp->min_v2c[index][0];
+      vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
+
+      vp->prod_v2c[index] *= (vp->var_to_check[i_v2c] >= 0) ? 1 : -1;
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  current_var_index         = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index = (j + vp->ls - shift) % vp->ls;
+      i_v2c = i_v2c_base + j;
+
+      this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
+      this_check_to_var[i_v2c] = this_check_to_var[i_v2c] * vp->scaling_fctr / F2I;
+
+      this_check_to_var[i_v2c] *= vp->prod_v2c[index] * ((vp->var_to_check[i_v2c] >= 0) ? 1 : -1);
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_c(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  int     i_bit             = 0;
+  int     i_bit_tmp         = 0;
+  int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  int8_t* this_var_to_check = vp->var_to_check;
+
+  long tmp = 0;
+
+  int8_t current_var_index     = (*these_var_indices)[0];
+  int    current_var_index_ext = 0;
+
+  for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    current_var_index_ext = current_var_index * vp->ls;
+    for (int j = 0; j < vp->ls; j++) {
+      i_bit     = current_var_index_ext + j;
+      i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
+      tmp       = (long)this_check_to_var[i_bit_tmp] + this_var_to_check[i_bit_tmp];
+      if (tmp > infinity7) {
+        tmp = INT8_MAX;
+      }
+      if (tmp < -infinity7) {
+        tmp = -INT8_MAX;
+      }
+      vp->soft_bits[i_bit] = (int8_t)tmp;
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_c(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs_c* vp = p;
+
+  for (int i = 0; i < liftK; i++) {
+    message[i] = (vp->soft_bits[i] < 0);
+  }
+
+  return 0;
+}
+
+void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, const uint8_t clip, const uint32_t len)
+{
+  unsigned i   = 0;
+  long     tmp = 0;
+
+  const long infinity8 = (1U << 7U) - 1; // Max positive value in 8-bit representation
+
+  for (i = 0; i < len; i++) {
+    if (x[i] >= infinity8) {
+      z[i] = infinity8;
+      continue;
+    }
+    if (x[i] <= -infinity8) {
+      z[i] = -infinity8;
+      continue;
+    }
+    tmp = (long)x[i] - y[i];
+    if (tmp > clip) {
+      tmp = clip;
+    }
+    if (tmp < -clip) {
+      tmp = -clip;
+    }
+    z[i] = (int8_t)tmp;
+  }
+}
--- a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2.c
@ -0,0 +1,545 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_c_avx2.c
+ * \brief Definition LDPC decoder inner functions working
+ *    with 8-bit integer-valued LLRs (AVX2 version).
+ *
+ * Even if the inner representation is based on 8 bits, check-to-variable and
+ * variable-to-check messages are actually represented with 7 bits, the
+ * remaining bit is used to represent infinity.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/utils/vector.h"
+
+#ifdef LV_HAVE_AVX2
+
+#include <immintrin.h>
+
+#include "ldpc_avx2_consts.h"
+
+#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
+
+/*!
+ * \brief Represents a node of the base factor graph.
+ */
+typedef union bg_node_t {
+  int8_t  c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
+  __m256i v;                     /*!< All the lifted nodes of the current base node as a 256-bit line. */
+} bg_node_t;
+
+/*!
+ * \brief Maximum message magnitude.
+ * Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
+ */
+static const int8_t infinity7 = (1U << 6U) - 1;
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
+ */
+struct ldpc_regs_c_avx2 {
+  __m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+
+  bg_node_t* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  __m256i*   check_to_var; /*!< \brief Check-to-variable messages. */
+  __m256i*   var_to_check; /*!< \brief Variable-to-check messages. */
+  __m256i*   rotated_v2c;  /*!< \brief To store a rotated version of the variable-to-check messages. */
+
+  uint16_t ls;  /*!< \brief Lifting size. */
+  uint8_t  hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
+  uint8_t  bgM; /*!< \brief Number of check nodes (before lifting). */
+  uint8_t  bgN; /*!< \brief Number of variable nodes (before lifting). */
+};
+
+/*!
+ * Carries out the actual update of the variable-to-check messages. It basically
+ * consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
+ * \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
+ * ensure it is properly propagated. Also, the subtraction is saturated between
+ * \f$- clip\f$ and \f$+ clip\f$.
+ * \param[in] x     Minuend: array we subtract from (in practice, the soft bits).
+ * \param[in] y     Subtrahend: array to be subtracted (in practice, the
+ *                  check-to-variable messages).
+ * \param[out] z    Resulting difference array(in practice, the updated
+ *                  variable-to-check messages).
+ * \param[in]  clip The saturation value.
+ * \param[in]  len  The length of the vectors.
+ */
+static void inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
+
+/*!
+ * Rotate the content of an __m256i vector (first input) towards the left by
+ * the number of chars specified by the second input (i.e., the \b imm * 8 least
+ * significant bits become the \b imm * 8 most significant bits).
+ * \param[in]  a    Vector to circularly shift.
+ * \param[in]  imm  The shift order in chars.
+ * \return          The shifted vector.
+ */
+static __m256i _mm256_rotatelli_si256(__m256i a, int imm);
+
+/*!
+ * Rotate the content of an __m256i vector (first input) towards the right by
+ * the number of chars specified by the second input (i.e., the \b imm * 8 most
+ * significant bits become the \b imm * 8 least significant bits).
+ * \param[in]  a    Vector to circularly shift.
+ * \param[in]  imm  The shift order in chars.
+ * \return          The shifted vector.
+ */
+static __m256i _mm256_rotaterli_si256(__m256i a, int imm);
+
+/*!
+ * Rotate the contents of a node towards the left by \b imm chars, that is the
+ * \b imm * 8 most significant bits become the least significant ones.
+ * \param[in]  a    The node to rotate.
+ * \param[in]  imm  The order of the rotation in number of chars.
+ * \param[in]  ls   The size of the node (lifting size).
+ * \return     The rotated node.
+ */
+static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls);
+
+/*!
+ * Rotate the contents of a node towards the right by \b imm chars, that is the
+ * \b imm * 8 most significant bits become the least significant ones.
+ * \param[in]  a    The node to rotate.
+ * \param[in]  imm  The order of the rotation in number of chars.
+ * \param[in]  ls   The size of the node (lifting size).
+ * \return     The rotated node.
+ */
+static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls);
+
+/*!
+ * Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
+ * \param[in] a   Vector of packed 8-bit integers.
+ * \param[in] sf  Scaling factor.
+ * \return    Vector of packed 8-bit integers with the scaling result.
+ */
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
+
+void* create_ldpc_dec_c_avx2(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs_c_avx2* vp = NULL;
+
+  uint8_t  bgK = bgN - bgM;
+  uint16_t hrr = bgK + 4;
+
+  if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2))) == NULL) {
+    return NULL;
+  }
+
+  if ((vp->soft_bits = srslte_vec_malloc(bgN * sizeof(bg_node_t))) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * sizeof(__m256i))) == NULL) {
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * sizeof(__m256i))) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * sizeof(__m256i))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM = bgM;
+  vp->bgN = bgN;
+  vp->hrr = hrr;
+  vp->ls  = ls;
+
+  vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
+
+  return vp;
+}
+
+void delete_ldpc_dec_c_avx2(void* p)
+{
+  struct ldpc_regs_c_avx2* vp = p;
+
+  if (vp != NULL) {
+    free(vp->rotated_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_c_avx2(void* p, const int8_t* llrs, uint16_t ls)
+{
+  struct ldpc_regs_c_avx2* vp = p;
+  int                      i  = 0;
+  int                      j  = 0;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  // the first 2 x LS bits of the codeword are not sent
+  vp->soft_bits[0].v = _mm256_set1_epi8(0);
+  vp->soft_bits[1].v = _mm256_set1_epi8(0);
+  for (i = 2; i < vp->bgN; i++) {
+    for (j = 0; j < ls; j++) {
+      vp->soft_bits[i].c[j] = llrs[(i - 2) * ls + j];
+    }
+    bzero(&(vp->soft_bits[i].c[ls]), (SRSLTE_AVX2_B_SIZE - ls) * sizeof(int8_t));
+  }
+
+  bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * sizeof(__m256i));
+  bzero(vp->var_to_check, (vp->hrr + 1) * sizeof(__m256i));
+  return 0;
+}
+
+int update_ldpc_var_to_check_c_avx2(void* p, int i_layer)
+{
+  struct ldpc_regs_c_avx2* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
+
+  // Update the high-rate region.
+  inner_var_to_check_c_avx2(&(vp->soft_bits[0].v), this_check_to_var, vp->var_to_check, infinity7, vp->hrr);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    inner_var_to_check_c_avx2(&(vp->soft_bits[0].v) + vp->hrr + i_layer - 4,
+                              this_check_to_var + vp->hrr,
+                              vp->var_to_check + vp->hrr,
+                              infinity7,
+                              1);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_c_avx2(void*           p,
+                                    int             i_layer,
+                                    const uint16_t* this_pcm,
+                                    const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+
+  uint16_t shift      = 0;
+  int      i_v2c_base = 0;
+
+  __m256i* this_rotated_v2c = NULL;
+
+  __m256i this_abs_v2c_epi8;
+  __m256i minp_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
+  __m256i mins_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
+  __m256i prod_v2c_epi8 = _mm256_set1_epi8(0);
+  __m256i mask_sign_epi8;
+  __m256i mask_min_epi8;
+  __m256i help_min_epi8;
+  __m256i min_ix_epi8;
+  __m256i current_ix_epi8;
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+
+    current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
+
+    this_rotated_v2c  = vp->rotated_v2c + i;
+    *this_rotated_v2c = rotate_node_right(vp->var_to_check[i_v2c_base], shift, vp->ls);
+    // mask_sign is 1 if this_rotated_v2c is strictly negative
+    mask_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
+    prod_v2c_epi8  = _mm256_xor_si256(prod_v2c_epi8, mask_sign_epi8);
+
+    this_abs_v2c_epi8 = _mm256_abs_epi8(*this_rotated_v2c);
+    // mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
+    mask_min_epi8 = _mm256_cmpgt_epi8(minp_v2c_epi8, this_abs_v2c_epi8);
+    help_min_epi8 = _mm256_blendv_epi8(this_abs_v2c_epi8, minp_v2c_epi8, mask_min_epi8);
+    minp_v2c_epi8 = _mm256_blendv_epi8(minp_v2c_epi8, this_abs_v2c_epi8, mask_min_epi8);
+    min_ix_epi8   = _mm256_blendv_epi8(min_ix_epi8, current_ix_epi8, mask_min_epi8);
+
+    // mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
+    mask_min_epi8 = _mm256_cmpgt_epi8(mins_v2c_epi8, this_abs_v2c_epi8);
+    mins_v2c_epi8 = _mm256_blendv_epi8(mins_v2c_epi8, help_min_epi8, mask_min_epi8);
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
+  current_var_index          = (*these_var_indices)[0];
+
+  __m256i mask_is_min_epi8;
+  __m256i this_c2v_epi8;
+  __m256i help_c2v_epi8;
+  __m256i final_sign_epi8;
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+
+    this_rotated_v2c = vp->rotated_v2c + i;
+    // mask_sign is 1 if this_rotated_v2c is strictly negative
+    final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
+    final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, prod_v2c_epi8);
+
+    current_ix_epi8  = _mm256_set1_epi8((int8_t)i);
+    mask_is_min_epi8 = _mm256_cmpeq_epi8(current_ix_epi8, min_ix_epi8);
+    this_c2v_epi8    = _mm256_blendv_epi8(minp_v2c_epi8, mins_v2c_epi8, mask_is_min_epi8);
+    this_c2v_epi8    = _mm256_scalei_epi8(this_c2v_epi8, vp->scaling_fctr);
+    help_c2v_epi8    = _mm256_sign_epi8(this_c2v_epi8, final_sign_epi8);
+    this_c2v_epi8    = _mm256_blendv_epi8(this_c2v_epi8, help_c2v_epi8, final_sign_epi8);
+
+    this_check_to_var[i_v2c_base] = rotate_node_left(this_c2v_epi8, shift, vp->ls);
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_c_avx2(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
+
+  int i_bit_tmp_base = 0;
+
+  __m256i tmp_epi8;
+  __m256i mask_epi8;
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+
+    tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_tmp_base], vp->var_to_check[i_bit_tmp_base]);
+
+    // tmp = (tmp > infty7) : infty8 ? tmp
+    mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
+    tmp_epi8  = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
+
+    // tmp = (tmp < -infty7) : -infty8 ? tmp
+    mask_epi8                          = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
+    vp->soft_bits[current_var_index].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_c_avx2(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs_c_avx2* vp = p;
+
+  int j = 0;
+
+  for (int i = 0; i < liftK / vp->ls; i++) {
+    for (j = 0; j < vp->ls; j++) {
+      message[i * vp->ls + j] = (vp->soft_bits[i].c[j] < 0);
+    }
+  }
+
+  return 0;
+}
+
+static void
+inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
+{
+  unsigned i = 0;
+
+  __m256i x_epi8;
+  __m256i y_epi8;
+  __m256i z_epi8;
+  __m256i mask_epi8;
+  __m256i help_sub_epi8;
+  __m256i clip_epi8     = _mm256_set1_epi8(clip);
+  __m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
+
+  for (i = 0; i < len; i++) {
+    x_epi8 = x[i];
+    y_epi8 = y[i];
+
+    // z = (x-y > clip) ? clip : x-y
+    help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
+    mask_epi8     = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
+    z_epi8        = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
+
+    // z = (z < -clip) ? -clip : z
+    mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
+    z_epi8    = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
+
+    // ensure that x = +/- infinity => z = +/- infinity
+    // z = (x < infinity) ? z : infinity
+    mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
+    z_epi8    = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
+
+    // z = (x > - infinity) ? z : - infinity
+    mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
+    z[i]      = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
+  }
+}
+
+static __m256i _mm256_rotatelli_si256(__m256i a, int imm)
+{
+  __m256i rotated_block_a[4];
+
+  // rotate left a as if made of 64-bit blocks: rotated_block_a[i] contains the
+  // rotation by i units
+  rotated_block_a[0] = a;                                // blocks 0 - 1 - 2 - 3
+  rotated_block_a[1] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
+  rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78);  // 2 - 3 - 0 - 1
+  rotated_block_a[3] = _mm256_permute4x64_epi64(a, 57);  // 1 - 2 - 3 - 0
+
+  // rotation index we are interested in
+  int step1 = imm / 8;
+  // small-step rotation
+  int left = imm % 8;
+  // next block, for carry-over
+  int step2 = (step1 + 1) % 4;
+
+  // shift right each block
+  __m256i reg1 = _mm256_slli_epi64(rotated_block_a[step1], left * 8);
+  // carry-over from the next block
+  __m256i reg2 = _mm256_srli_epi64(rotated_block_a[step2], (8 - left) * 8);
+
+  return _mm256_xor_si256(reg1, reg2);
+}
+
+static __m256i _mm256_rotaterli_si256(__m256i a, int imm)
+{
+  __m256i rotated_block_a[4];
+
+  // rotate right a as if made of 64-bit blocks: rotated_block_a[i] contains the
+  // rotation by i units
+  rotated_block_a[0] = a;                                // blocks 0 - 1 - 2 - 3
+  rotated_block_a[1] = _mm256_permute4x64_epi64(a, 57);  // 1 - 2 - 3 - 0
+  rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78);  // 2 - 3 - 0 - 1
+  rotated_block_a[3] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
+
+  // rotation index we are interested in
+  int step1 = imm / 8;
+  // small-step rotation
+  int left = imm % 8;
+  // next block, for carry-over
+  int step2 = (step1 + 1) % 4;
+
+  // shift right each block
+  __m256i reg1 = _mm256_srli_epi64(rotated_block_a[step1], left * 8);
+  // carry-over from the next block
+  __m256i reg2 = _mm256_slli_epi64(rotated_block_a[step2], (8 - left) * 8);
+
+  return _mm256_xor_si256(reg1, reg2);
+}
+
+static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls)
+{
+  if (imm == 0) {
+    return a;
+  }
+  __m256i step1 = _mm256_rotatelli_si256(a, imm);
+  if (ls == SRSLTE_AVX2_B_SIZE) {
+    return step1;
+  }
+
+  __m256i step2 = _mm256_rotaterli_si256(a, ls - imm);
+
+  step1 = _mm256_and_si256(step1, mask_most_epi8[imm]);
+  step2 = _mm256_and_si256(step2, mask_least_epi8[imm]);
+
+  step1 = _mm256_xor_si256(step1, step2);
+
+  return step1;
+  ;
+}
+
+static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls)
+{
+  if (imm == 0) {
+    return a;
+  }
+  __m256i step1 = _mm256_rotaterli_si256(a, imm);
+  if (ls == SRSLTE_AVX2_B_SIZE) {
+    return step1;
+  }
+
+  __m256i step2 = _mm256_rotatelli_si256(a, ls - imm);
+
+  step1 = _mm256_and_si256(step1, mask_least_epi8[ls - imm]);
+  step2 = _mm256_and_si256(step2, mask_most_epi8[ls - imm]);
+
+  step1 = _mm256_xor_si256(step1, step2);
+
+  return step1;
+}
+
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
+{
+  __m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
+  __m256i odd_epi16  = _mm256_srli_epi16(a, 8);
+
+  __m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
+  __m256i p_odd_epi16  = _mm256_mulhi_epu16(odd_epi16, sf);
+
+  p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
+
+  return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2_flood.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2_flood.c
@ -0,0 +1,572 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_c_avx2_flood.c
+ * \brief Definition LDPC decoder inner functions working
+ *    with 8-bit integer-valued LLRs (AVX2 version, flooded scheduling).
+ *
+ * Even if the inner representation is based on 8 bits, check-to-variable and
+ * variable-to-check messages are actually represented with 7 bits, the
+ * remaining bit is used to represent infinity.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/utils/vector.h"
+
+#ifdef LV_HAVE_AVX2
+
+#include <immintrin.h>
+
+#include "ldpc_avx2_consts.h"
+
+#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
+
+/*!
+ * \brief Represents a node of the base factor graph.
+ */
+typedef union bg_node_t {
+  int8_t  c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
+  __m256i v;                     /*!< All the lifted nodes of the current base node as a 256-bit line. */
+} bg_node_t;
+
+/*!
+ * \brief Maximum message magnitude.
+ * Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
+ */
+static const int8_t infinity7 = (1U << 6U) - 1;
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
+ */
+struct ldpc_regs_c_avx2_flood {
+  __m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+
+  bg_node_t* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  __m256i*   llrs;         /*!< \brief A-priori log-likelihood ratios. */
+  __m256i*   check_to_var; /*!< \brief Check-to-variable messages. */
+  __m256i*   var_to_check; /*!< \brief Variable-to-check messages. */
+  __m256i*   rotated_v2c;  /*!< \brief To store a rotated version of the variable-to-check messages. */
+
+  uint16_t ls;  /*!< \brief Lifting size. */
+  uint8_t  hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
+  uint8_t  bgM; /*!< \brief Number of check nodes (before lifting). */
+  uint8_t  bgN; /*!< \brief Number of variable nodes (before lifting). */
+};
+
+/*!
+ * Carries out the actual update of the variable-to-check messages. It basically
+ * consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
+ * \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
+ * ensure it is properly propagated. Also, the subtraction is saturated between
+ * \f$- clip\f$ and \f$+ clip\f$.
+ * \param[in] x     Minuend: array we subtract from (in practice, the soft bits).
+ * \param[in] y     Subtrahend: array to be subtracted (in practice, the
+ *                  check-to-variable messages).
+ * \param[out] z    Resulting difference array(in practice, the updated
+ *                  variable-to-check messages).
+ * \param[in]  clip The saturation value.
+ * \param[in]  len  The length of the vectors.
+ */
+static void inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
+
+/*!
+ * Rotate the content of an __m256i vector (first input) towards the left by
+ * the number of chars specified by the second input (i.e., the \b imm * 8 least
+ * significant bits become the \b imm * 8 most significant bits).
+ * \param[in]  a    Vector to circularly shift.
+ * \param[in]  imm  The shift order in chars.
+ * \return          The shifted vector.
+ */
+static __m256i _mm256_rotatelli_si256(__m256i a, int imm);
+
+/*!
+ * Rotate the content of an __m256i vector (first input) towards the right by
+ * the number of chars specified by the second input (i.e., the \b imm * 8 most
+ * significant bits become the \b imm * 8 least significant bits).
+ * \param[in]  a    Vector to circularly shift.
+ * \param[in]  imm  The shift order in chars.
+ * \return          The shifted vector.
+ */
+static __m256i _mm256_rotaterli_si256(__m256i a, int imm);
+
+/*!
+ * Rotate the contents of a node towards the left by \b imm chars, that is the
+ * \b imm * 8 most significant bits become the least significant ones.
+ * \param[in]  a    The node to rotate.
+ * \param[in]  imm  The order of the rotation in number of chars.
+ * \param[in]  ls   The size of the node (lifting size).
+ * \return     The rotated node.
+ */
+static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls);
+
+/*!
+ * Rotate the contents of a node towards the right by \b imm chars, that is the
+ * \b imm * 8 most significant bits become the least significant ones.
+ * \param[in]  a    The node to rotate.
+ * \param[in]  imm  The order of the rotation in number of chars.
+ * \param[in]  ls   The size of the node (lifting size).
+ * \return     The rotated node.
+ */
+static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls);
+
+/*!
+ * Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
+ * \param[in] a   Vector of packed 8-bit integers.
+ * \param[in] sf  Scaling factor.
+ * \return    Vector of packed 8-bit integers with the scaling result.
+ */
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
+
+void* create_ldpc_dec_c_avx2_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs_c_avx2_flood* vp = NULL;
+
+  uint8_t  bgK = bgN - bgM;
+  uint16_t hrr = bgK + 4;
+
+  if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2_flood))) == NULL) {
+    return NULL;
+  }
+
+  if ((vp->llrs = srslte_vec_malloc(bgN * sizeof(__m256i))) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->soft_bits = srslte_vec_malloc(bgN * sizeof(bg_node_t))) == NULL) {
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * sizeof(__m256i))) == NULL) {
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * bgM * sizeof(__m256i))) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * sizeof(__m256i))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM = bgM;
+  vp->bgN = bgN;
+  vp->hrr = hrr;
+  vp->ls  = ls;
+
+  vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
+
+  return vp;
+}
+
+void delete_ldpc_dec_c_avx2_flood(void* p)
+{
+  struct ldpc_regs_c_avx2_flood* vp = p;
+
+  if (vp != NULL) {
+    free(vp->rotated_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_c_avx2_flood(void* p, const int8_t* llrs, uint16_t ls)
+{
+  struct ldpc_regs_c_avx2_flood* vp = p;
+  int                            i  = 0;
+  int                            j  = 0;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  // the first 2 x LS bits of the codeword are not sent
+  vp->soft_bits[0].v = _mm256_set1_epi8(0);
+  vp->soft_bits[1].v = _mm256_set1_epi8(0);
+  vp->llrs[0]        = _mm256_set1_epi8(0);
+  vp->llrs[1]        = _mm256_set1_epi8(0);
+  for (i = 2; i < vp->bgN; i++) {
+    for (j = 0; j < ls; j++) {
+      vp->soft_bits[i].c[j] = llrs[(i - 2) * ls + j];
+    }
+    bzero(&(vp->soft_bits[i].c[ls]), (SRSLTE_AVX2_B_SIZE - ls) * sizeof(int8_t));
+    vp->llrs[i] = vp->soft_bits[i].v;
+  }
+
+  bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * sizeof(__m256i));
+  bzero(vp->var_to_check, (vp->hrr + 1) * vp->bgM * sizeof(__m256i));
+  return 0;
+}
+
+int update_ldpc_var_to_check_c_avx2_flood(void* p, int i_layer)
+{
+  struct ldpc_regs_c_avx2_flood* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
+  __m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1);
+
+  // Update the high-rate region.
+  inner_var_to_check_c_avx2(&(vp->soft_bits[0].v), this_check_to_var, this_var_to_check, infinity7, vp->hrr);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    inner_var_to_check_c_avx2(&(vp->soft_bits[0].v) + vp->hrr + i_layer - 4,
+                              this_check_to_var + vp->hrr,
+                              this_var_to_check + vp->hrr,
+                              infinity7,
+                              1);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_c_avx2_flood(void*           p,
+                                          int             i_layer,
+                                          const uint16_t* this_pcm,
+                                          const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2_flood* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+
+  uint16_t shift      = 0;
+  int      i_v2c_base = 0;
+
+  __m256i* this_rotated_v2c = NULL;
+
+  __m256i this_abs_v2c_epi8;
+  __m256i minp_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
+  __m256i mins_v2c_epi8 = _mm256_set1_epi8(INT8_MAX);
+  __m256i prod_v2c_epi8 = _mm256_set1_epi8(0);
+  __m256i mask_sign_epi8;
+  __m256i mask_min_epi8;
+  __m256i help_min_epi8;
+  __m256i min_ix_epi8;
+  __m256i current_ix_epi8;
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  __m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1);
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+
+    current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
+
+    this_rotated_v2c  = vp->rotated_v2c + i;
+    *this_rotated_v2c = rotate_node_right(this_var_to_check[i_v2c_base], shift, vp->ls);
+    // mask_sign is 1 if this_rotated_v2c is strictly negative
+    mask_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
+    prod_v2c_epi8  = _mm256_xor_si256(prod_v2c_epi8, mask_sign_epi8);
+
+    this_abs_v2c_epi8 = _mm256_abs_epi8(*this_rotated_v2c);
+    // mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
+    mask_min_epi8 = _mm256_cmpgt_epi8(minp_v2c_epi8, this_abs_v2c_epi8);
+    help_min_epi8 = _mm256_blendv_epi8(this_abs_v2c_epi8, minp_v2c_epi8, mask_min_epi8);
+    minp_v2c_epi8 = _mm256_blendv_epi8(minp_v2c_epi8, this_abs_v2c_epi8, mask_min_epi8);
+    min_ix_epi8   = _mm256_blendv_epi8(min_ix_epi8, current_ix_epi8, mask_min_epi8);
+
+    // mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
+    mask_min_epi8 = _mm256_cmpgt_epi8(mins_v2c_epi8, this_abs_v2c_epi8);
+    mins_v2c_epi8 = _mm256_blendv_epi8(mins_v2c_epi8, help_min_epi8, mask_min_epi8);
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
+  current_var_index          = (*these_var_indices)[0];
+
+  __m256i mask_is_min_epi8;
+  __m256i this_c2v_epi8;
+  __m256i help_c2v_epi8;
+  __m256i final_sign_epi8;
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+
+    this_rotated_v2c = vp->rotated_v2c + i;
+    // mask_sign is 1 if this_rotated_v2c is strictly negative
+    final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, *this_rotated_v2c);
+    final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, prod_v2c_epi8);
+
+    current_ix_epi8  = _mm256_set1_epi8((int8_t)i);
+    mask_is_min_epi8 = _mm256_cmpeq_epi8(current_ix_epi8, min_ix_epi8);
+    this_c2v_epi8    = _mm256_blendv_epi8(minp_v2c_epi8, mins_v2c_epi8, mask_is_min_epi8);
+    this_c2v_epi8    = _mm256_scalei_epi8(this_c2v_epi8, vp->scaling_fctr);
+    help_c2v_epi8    = _mm256_sign_epi8(this_c2v_epi8, final_sign_epi8);
+    this_c2v_epi8    = _mm256_blendv_epi8(this_c2v_epi8, help_c2v_epi8, final_sign_epi8);
+
+    this_check_to_var[i_v2c_base] = rotate_node_left(this_c2v_epi8, shift, vp->ls);
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_c_avx2_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2_flood* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  __m256i* this_check_to_var = NULL;
+
+  int    i                 = 0;
+  int    i_layer           = 0;
+  int    i_bit_tmp_base    = 0;
+  int8_t current_var_index = 0;
+
+  __m256i tmp_epi8;
+  __m256i mask_epi8;
+
+  for (i = 0; i < vp->bgN; i++) {
+    vp->soft_bits[i].v = vp->llrs[i];
+  }
+
+  for (i_layer = 0; i_layer < vp->bgM; i_layer++) {
+    current_var_index = these_var_indices[i_layer][0];
+
+    this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1);
+    for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+      i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+
+      tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_tmp_base], vp->soft_bits[current_var_index].v);
+
+      // tmp = (tmp > infty7) : infty8 ? tmp
+      mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
+      tmp_epi8  = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
+
+      // tmp = (tmp < -infty7) : -infty8 ? tmp
+      mask_epi8                          = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
+      vp->soft_bits[current_var_index].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
+
+      current_var_index = these_var_indices[i_layer][i + 1];
+    }
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_c_avx2_flood(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs_c_avx2_flood* vp = p;
+
+  int j = 0;
+
+  for (int i = 0; i < liftK / vp->ls; i++) {
+    for (j = 0; j < vp->ls; j++) {
+      message[i * vp->ls + j] = (vp->soft_bits[i].c[j] < 0);
+    }
+  }
+
+  return 0;
+}
+
+static void
+inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
+{
+  unsigned i = 0;
+
+  __m256i x_epi8;
+  __m256i y_epi8;
+  __m256i z_epi8;
+  __m256i mask_epi8;
+  __m256i help_sub_epi8;
+  __m256i clip_epi8     = _mm256_set1_epi8(clip);
+  __m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
+
+  for (i = 0; i < len; i++) {
+    x_epi8 = x[i];
+    y_epi8 = y[i];
+
+    // z = (x-y > clip) ? clip : x-y
+    help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
+    mask_epi8     = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
+    z_epi8        = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
+
+    // z = (z < -clip) ? -clip : z
+    mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
+    z_epi8    = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
+
+    // ensure that x = +/- infinity => z = +/- infinity
+    // z = (x < infinity) ? z : infinity
+    mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
+    z_epi8    = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
+
+    // z = (x > - infinity) ? z : - infinity
+    mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
+    z[i]      = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
+  }
+}
+
+static __m256i _mm256_rotatelli_si256(__m256i a, int imm)
+{
+  __m256i rotated_block_a[4];
+
+  // rotate left a as if made of 64-bit blocks: rotated_block_a[i] contains the
+  // rotation by i units
+  rotated_block_a[0] = a;                                // blocks 0 - 1 - 2 - 3
+  rotated_block_a[1] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
+  rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78);  // 2 - 3 - 0 - 1
+  rotated_block_a[3] = _mm256_permute4x64_epi64(a, 57);  // 1 - 2 - 3 - 0
+
+  // rotation index we are interested in
+  int step1 = imm / 8;
+  // small-step rotation
+  int left = imm % 8;
+  // next block, for carry-over
+  int step2 = (step1 + 1) % 4;
+
+  // shift right each block
+  __m256i reg1 = _mm256_slli_epi64(rotated_block_a[step1], left * 8);
+  // carry-over from the next block
+  __m256i reg2 = _mm256_srli_epi64(rotated_block_a[step2], (8 - left) * 8);
+
+  return _mm256_xor_si256(reg1, reg2);
+}
+
+static __m256i _mm256_rotaterli_si256(__m256i a, int imm)
+{
+  __m256i rotated_block_a[4];
+
+  // rotate right a as if made of 64-bit blocks: rotated_block_a[i] contains the
+  // rotation by i units
+  rotated_block_a[0] = a;                                // blocks 0 - 1 - 2 - 3
+  rotated_block_a[1] = _mm256_permute4x64_epi64(a, 57);  // 1 - 2 - 3 - 0
+  rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78);  // 2 - 3 - 0 - 1
+  rotated_block_a[3] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
+
+  // rotation index we are interested in
+  int step1 = imm / 8;
+  // small-step rotation
+  int left = imm % 8;
+  // next block, for carry-over
+  int step2 = (step1 + 1) % 4;
+
+  // shift right each block
+  __m256i reg1 = _mm256_srli_epi64(rotated_block_a[step1], left * 8);
+  // carry-over from the next block
+  __m256i reg2 = _mm256_slli_epi64(rotated_block_a[step2], (8 - left) * 8);
+
+  return _mm256_xor_si256(reg1, reg2);
+}
+
+static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls)
+{
+  if (imm == 0) {
+    return a;
+  }
+  __m256i step1 = _mm256_rotatelli_si256(a, imm);
+  if (ls == SRSLTE_AVX2_B_SIZE) {
+    return step1;
+  }
+
+  __m256i step2 = _mm256_rotaterli_si256(a, ls - imm);
+
+  step1 = _mm256_and_si256(step1, mask_most_epi8[imm]);
+  step2 = _mm256_and_si256(step2, mask_least_epi8[imm]);
+
+  step1 = _mm256_xor_si256(step1, step2);
+
+  return step1;
+  ;
+}
+
+static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls)
+{
+  if (imm == 0) {
+    return a;
+  }
+  __m256i step1 = _mm256_rotaterli_si256(a, imm);
+  if (ls == SRSLTE_AVX2_B_SIZE) {
+    return step1;
+  }
+
+  __m256i step2 = _mm256_rotatelli_si256(a, ls - imm);
+
+  step1 = _mm256_and_si256(step1, mask_least_epi8[ls - imm]);
+  step2 = _mm256_and_si256(step2, mask_most_epi8[ls - imm]);
+
+  step1 = _mm256_xor_si256(step1, step2);
+
+  return step1;
+}
+
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
+{
+  __m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
+  __m256i odd_epi16  = _mm256_srli_epi16(a, 8);
+
+  __m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
+  __m256i p_odd_epi16  = _mm256_mulhi_epu16(odd_epi16, sf);
+
+  p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
+
+  return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2long.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2long.c
@ -0,0 +1,541 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_c_avx2long.c
+ * \brief Definition LDPC decoder inner functions working
+ *    with 8-bit integer-valued LLRs (AVX2 version, large lifting size).
+ *
+ * Even if the inner representation is based on 8 bits, check-to-variable and
+ * variable-to-check messages are actually represented with 7 bits, the
+ * remaining bit is used to represent infinity.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/utils/vector.h"
+
+#ifdef LV_HAVE_AVX2
+
+#include <immintrin.h>
+
+#include "ldpc_avx2_consts.h"
+
+#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
+
+/*!
+ * \brief Represents a node of the base factor graph.
+ */
+typedef union bg_node_t {
+  int8_t  c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
+  __m256i v;                     /*!< All the lifted nodes of the current base node as a 256-bit line. */
+} bg_node_t;
+
+/*!
+ * \brief Maximum message magnitude.
+ * Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
+ */
+static const int8_t infinity7 = (1U << 6U) - 1;
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
+ */
+struct ldpc_regs_c_avx2long {
+  __m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+
+  bg_node_t* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  __m256i*   check_to_var; /*!< \brief Check-to-variable messages. */
+  __m256i*   var_to_check; /*!< \brief Variable-to-check messages. */
+
+  __m256i* rotated_v2c;   /*!< \brief To store a rotated version of the variable-to-check messages. */
+  __m256i* this_c2v_epi8; /*!< \brief Helper register for the current c2v node. */
+  __m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
+  __m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
+  __m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
+  __m256i* min_ix_epi8;   /*!< \brief Helper register for the index of the minimum v2c message. */
+
+  uint16_t ls;  /*!< \brief Lifting size. */
+  uint8_t  hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
+  uint8_t  bgM; /*!< \brief Number of check nodes (before lifting). */
+  uint8_t  bgN; /*!< \brief Number of variable nodes (before lifting). */
+
+  uint8_t n_subnodes; /*!< \brief Number of subnodes. */
+};
+
+/*!
+ * Carries out the actual update of the variable-to-check messages. It basically
+ * consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
+ * \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
+ * ensure it is properly propagated. Also, the subtraction is saturated between
+ * \f$- clip\f$ and \f$+ clip\f$.
+ * \param[in] x     Minuend: array we subtract from (in practice, the soft bits).
+ * \param[in] y     Subtrahend: array to be subtracted (in practice, the
+ *                  check-to-variable messages).
+ * \param[out] z    Resulting difference array(in practice, the updated
+ *                  variable-to-check messages).
+ * \param[in]  clip The saturation value.
+ * \param[in]  len  The length of the vectors.
+ */
+static void inner_var_to_check_c_avx2long(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
+
+/*!
+ * Rotate the contents of a node towards the right by \b shift chars, that is the
+ * \b shift * 8 most significant bits become the least significant ones.
+ * \param[in]  in_256i    The node to rotate.
+ * \param[out] out        The rotated node.
+ * \param[in]  shift      The order of the rotation in number of chars.
+ * \param[in]  ls         The size of the node (lifting size).
+ * \param[in]  n_subnodes The number of subnodes in each node.
+ * \return     The rotated node.
+ */
+static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes);
+
+/*!
+ * Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
+ * \param[in] a   Vector of packed 8-bit integers.
+ * \param[in] sf  Scaling factor.
+ * \return    Vector of packed 8-bit integers with the scaling result.
+ */
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
+
+void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs_c_avx2long* vp = NULL;
+
+  uint8_t  bgK = bgN - bgM;
+  uint16_t hrr = bgK + 4;
+
+  if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2long))) == NULL) {
+    return NULL;
+  }
+
+  // compute number of subnodes
+  int left_out   = ls % SRSLTE_AVX2_B_SIZE;
+  int n_subnodes = ls / SRSLTE_AVX2_B_SIZE + (left_out > 0);
+
+  if ((vp->soft_bits = srslte_vec_malloc(bgN * n_subnodes * sizeof(bg_node_t))) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->minp_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->mins_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->prod_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_ix_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->min_ix_epi8);
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->this_c2v_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->rotated_v2c);
+    free(vp->min_ix_epi8);
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM = bgM;
+  vp->bgN = bgN;
+  vp->hrr = hrr;
+  vp->ls  = ls;
+
+  vp->n_subnodes = n_subnodes;
+
+  vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
+
+  return vp;
+}
+
+void delete_ldpc_dec_c_avx2long(void* p)
+{
+  struct ldpc_regs_c_avx2long* vp = p;
+
+  if (vp != NULL) {
+    free(vp->this_c2v_epi8);
+    free(vp->rotated_v2c);
+    free(vp->min_ix_epi8);
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_c_avx2long(void* p, const int8_t* llrs, uint16_t ls)
+{
+  struct ldpc_regs_c_avx2long* vp = p;
+  int                          i  = 0;
+  int                          j  = 0;
+  int                          k  = 0;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  for (k = 0; k < vp->n_subnodes; k++) {
+    vp->soft_bits[k].v                  = _mm256_set1_epi8(0);
+    vp->soft_bits[vp->n_subnodes + k].v = _mm256_set1_epi8(0);
+  }
+  for (i = 2; i < vp->bgN; i++) {
+    for (j = 0; j < vp->n_subnodes; j++) {
+      for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < ls); k++) {
+        vp->soft_bits[i * vp->n_subnodes + j].c[k] = llrs[(i - 2) * ls + j * SRSLTE_AVX2_B_SIZE + k];
+      }
+    }
+    bzero(&(vp->soft_bits[i * vp->n_subnodes + j - 1].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(int8_t));
+  }
+
+  bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * vp->n_subnodes * sizeof(__m256i));
+  bzero(vp->var_to_check, (vp->hrr + 1) * vp->n_subnodes * sizeof(__m256i));
+  return 0;
+}
+
+int update_ldpc_var_to_check_c_avx2long(void* p, int i_layer)
+{
+  struct ldpc_regs_c_avx2long* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+
+  // Update the high-rate region.
+  inner_var_to_check_c_avx2long(
+      &(vp->soft_bits[0].v), this_check_to_var, vp->var_to_check, infinity7, vp->hrr * vp->n_subnodes);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    inner_var_to_check_c_avx2long(&(vp->soft_bits[0].v) + (vp->hrr + i_layer - 4) * vp->n_subnodes,
+                                  this_check_to_var + vp->hrr * vp->n_subnodes,
+                                  vp->var_to_check + vp->hrr * vp->n_subnodes,
+                                  infinity7,
+                                  vp->n_subnodes);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_c_avx2long(void*           p,
+                                        int             i_layer,
+                                        const uint16_t* this_pcm,
+                                        const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2long* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+  int j = 0;
+
+  uint16_t shift      = 0;
+  int      i_v2c_base = 0;
+
+  __m256i* this_rotated_v2c = NULL;
+
+  __m256i this_abs_v2c_epi8;
+  __m256i mask_sign_epi8;
+  __m256i mask_min_epi8;
+  __m256i help_min_epi8;
+  __m256i current_ix_epi8;
+
+  for (j = 0; j < vp->n_subnodes; j++) {
+    vp->minp_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
+    vp->mins_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
+    vp->prod_v2c_epi8[j] = _mm256_set1_epi8(0);
+  }
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+    i_v2c_base *= vp->n_subnodes;
+
+    current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
+
+    this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
+    rotate_node_right(vp->var_to_check + i_v2c_base, this_rotated_v2c, shift, vp->ls, vp->n_subnodes);
+
+    for (j = 0; j < vp->n_subnodes; j++) {
+      // mask_sign is 1 if this_v2c_epi8 is strictly negative
+      mask_sign_epi8       = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
+      vp->prod_v2c_epi8[j] = _mm256_xor_si256(vp->prod_v2c_epi8[j], mask_sign_epi8);
+
+      this_abs_v2c_epi8 = _mm256_abs_epi8(this_rotated_v2c[j]);
+      // mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
+      mask_min_epi8        = _mm256_cmpgt_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8);
+      help_min_epi8        = _mm256_blendv_epi8(this_abs_v2c_epi8, vp->minp_v2c_epi8[j], mask_min_epi8);
+      vp->minp_v2c_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8, mask_min_epi8);
+      vp->min_ix_epi8[j]   = _mm256_blendv_epi8(vp->min_ix_epi8[j], current_ix_epi8, mask_min_epi8);
+
+      // mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
+      mask_min_epi8        = _mm256_cmpgt_epi8(vp->mins_v2c_epi8[j], this_abs_v2c_epi8);
+      vp->mins_v2c_epi8[j] = _mm256_blendv_epi8(vp->mins_v2c_epi8[j], help_min_epi8, mask_min_epi8);
+    }
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+  current_var_index          = (*these_var_indices)[0];
+
+  __m256i mask_is_min_epi8;
+  __m256i help_c2v_epi8;
+  __m256i final_sign_epi8;
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+    i_v2c_base *= vp->n_subnodes;
+
+    this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
+
+    for (j = 0; j < vp->n_subnodes; j++) {
+      // mask_sign is 1 if this_v2c_epi8 is strictly negative
+      final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
+      final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, vp->prod_v2c_epi8[j]);
+
+      current_ix_epi8      = _mm256_set1_epi8((int8_t)i);
+      mask_is_min_epi8     = _mm256_cmpeq_epi8(current_ix_epi8, vp->min_ix_epi8[j]);
+      vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], vp->mins_v2c_epi8[j], mask_is_min_epi8);
+      vp->this_c2v_epi8[j] = _mm256_scalei_epi8(vp->this_c2v_epi8[j], vp->scaling_fctr);
+      help_c2v_epi8        = _mm256_sign_epi8(vp->this_c2v_epi8[j], final_sign_epi8);
+      vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->this_c2v_epi8[j], help_c2v_epi8, final_sign_epi8);
+    }
+    // rotating right LS - shift positions is the same as rotating left shift positions
+    rotate_node_right(vp->this_c2v_epi8, this_check_to_var + i_v2c_base, vp->ls - shift, vp->ls, vp->n_subnodes);
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_c_avx2long(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2long* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  int j = 0;
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+
+  int i_bit_tmp_base = 0;
+  int i_bit_subnode  = 0;
+
+  __m256i tmp_epi8;
+  __m256i mask_epi8;
+
+  int8_t current_var_index         = (*these_var_indices)[0];
+  int    current_var_index_subnode = 0;
+
+  for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    current_var_index_subnode = current_var_index * vp->n_subnodes;
+    for (j = 0; j < vp->n_subnodes; j++) {
+      i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+      i_bit_subnode  = i_bit_tmp_base * vp->n_subnodes + j;
+
+      tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_subnode], vp->var_to_check[i_bit_subnode]);
+
+      mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
+      tmp_epi8  = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
+
+      mask_epi8 = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
+
+      vp->soft_bits[current_var_index_subnode + j].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
+    }
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_c_avx2long(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs_c_avx2long* vp = p;
+
+  int j = 0;
+  int k = 0;
+
+  for (int i = 0; i < liftK / vp->ls; i++) {
+    for (j = 0; j < vp->n_subnodes; j++) {
+      for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < vp->ls); k++) {
+        message[i * vp->ls + j * SRSLTE_AVX2_B_SIZE + k] = (vp->soft_bits[i * vp->n_subnodes + j].c[k] < 0);
+      }
+    }
+  }
+
+  return 0;
+}
+
+static void
+inner_var_to_check_c_avx2long(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
+{
+  unsigned i = 0;
+
+  __m256i x_epi8;
+  __m256i y_epi8;
+  __m256i z_epi8;
+  __m256i mask_epi8;
+  __m256i help_sub_epi8;
+  __m256i clip_epi8     = _mm256_set1_epi8(clip);
+  __m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
+
+  for (i = 0; i < len; i++) {
+    x_epi8 = x[i];
+    y_epi8 = y[i];
+
+    help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
+    mask_epi8     = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
+    z_epi8        = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
+
+    mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
+    z_epi8    = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
+
+    mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
+    z_epi8    = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
+
+    mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
+    z[i]      = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
+  }
+}
+
+static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes)
+{
+  const int8_t* in = (const int8_t*)in_256i;
+
+  int16_t n_type1 = (ls - shift) / SRSLTE_AVX2_B_SIZE - (ls == SRSLTE_AVX2_B_SIZE);
+  int16_t n_type2 = n_subnodes - n_type1 - 1 - (ls == SRSLTE_AVX2_B_SIZE);
+  int16_t gap     = (ls - shift) % SRSLTE_AVX2_B_SIZE;
+
+  int16_t i = 0;
+  for (; i < n_type1; i++) {
+    out[i] = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
+  }
+
+  __m256i tmp1 = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
+  __m256i tmp2 = _mm256_loadu_si256((const __m256i*)(in - gap));
+
+  out[i] = _mm256_blendv_epi8(tmp1, tmp2, mask_most_epi8[gap]);
+
+  for (i = 1; i <= n_type2; i++) {
+    out[n_type1 + i] = _mm256_loadu_si256((const __m256i*)(in - gap + i * SRSLTE_AVX2_B_SIZE));
+  }
+}
+
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
+{
+  __m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
+  __m256i odd_epi16  = _mm256_srli_epi16(a, 8);
+
+  __m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
+  __m256i p_odd_epi16  = _mm256_mulhi_epu16(odd_epi16, sf);
+
+  p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
+
+  return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2long_flood.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx2long_flood.c
@ -0,0 +1,576 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_c_avx2long_flood.c
+ * \brief Definition LDPC decoder inner functions working
+ *    with 8-bit integer-valued LLRs (flooded scheduling, AVX2 version, large lifting size).
+ *
+ * Even if the inner representation is based on 8 bits, check-to-variable and
+ * variable-to-check messages are actually represented with 7 bits, the
+ * remaining bit is used to represent infinity.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/utils/vector.h"
+
+#ifdef LV_HAVE_AVX2
+
+#include <immintrin.h>
+
+#include "ldpc_avx2_consts.h"
+
+#define F2I 65535 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
+
+/*!
+ * \brief Represents a node of the base factor graph.
+ */
+typedef union bg_node_t {
+  int8_t  c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
+  __m256i v;                     /*!< All the lifted nodes of the current base node as a 256-bit line. */
+} bg_node_t;
+
+/*!
+ * \brief Maximum message magnitude.
+ * Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
+ */
+static const int8_t infinity7 = (1U << 6U) - 1;
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs.
+ */
+struct ldpc_regs_c_avx2long_flood {
+  __m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+
+  bg_node_t* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  __m256i*   llrs;         /*!< \brief A-priori log-likelihood ratios. */
+  __m256i*   check_to_var; /*!< \brief Check-to-variable messages. */
+  __m256i*   var_to_check; /*!< \brief Variable-to-check messages. */
+
+  __m256i* rotated_v2c;   /*!< \brief To store a rotated version of the variable-to-check messages. */
+  __m256i* this_c2v_epi8; /*!< \brief Helper register for the current c2v node. */
+  __m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
+  __m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
+  __m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
+  __m256i* min_ix_epi8;   /*!< \brief Helper register for the index of the minimum v2c message. */
+
+  uint16_t ls;         /*!< \brief Lifting size. */
+  uint8_t  n_subnodes; /*!< \brief Number of subnodes. */
+  uint8_t  hrr;        /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
+  uint8_t  bgM;        /*!< \brief Number of check nodes (before lifting). */
+  uint8_t  bgN;        /*!< \brief Number of variable nodes (before lifting). */
+};
+
+/*!
+ * Carries out the actual update of the variable-to-check messages. It basically
+ * consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
+ * \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
+ * ensure it is properly propagated. Also, the subtraction is saturated between
+ * \f$- clip\f$ and \f$+ clip\f$.
+ * \param[in] x     Minuend: array we subtract from (in practice, the soft bits).
+ * \param[in] y     Subtrahend: array to be subtracted (in practice, the
+ *                  check-to-variable messages).
+ * \param[out] z    Resulting difference array(in practice, the updated
+ *                  variable-to-check messages).
+ * \param[in]  clip The saturation value.
+ * \param[in]  len  The length of the vectors.
+ */
+static void inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, uint8_t clip, uint32_t len);
+
+/*!
+ * Rotate the contents of a node towards the right by \b shift chars, that is the
+ * \b shift * 8 most significant bits become the least significant ones.
+ * \param[in]  in_256i    The node to rotate.
+ * \param[out] out        The rotated node.
+ * \param[in]  shift      The order of the rotation in number of chars.
+ * \param[in]  ls         The size of the node (lifting size).
+ * \param[in]  n_subnodes The number of subnodes in each node.
+ * \return     The rotated node.
+ */
+static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes);
+
+/*!
+ * Scale packed 8-bit integers in \b a by the scaling factor \b sf / #F2I.
+ * \param[in] a   Vector of packed 8-bit integers.
+ * \param[in] sf  Scaling factor.
+ * \return    Vector of packed 8-bit integers with the scaling result.
+ */
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf);
+
+void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs_c_avx2long_flood* vp = NULL;
+
+  uint8_t  bgK = bgN - bgM;
+  uint16_t hrr = bgK + 4;
+
+  if ((vp = srslte_vec_malloc(sizeof(struct ldpc_regs_c_avx2long_flood))) == NULL) {
+    return NULL;
+  }
+
+  // compute number of subnodes
+  int left_out   = ls % SRSLTE_AVX2_B_SIZE;
+  int n_subnodes = ls / SRSLTE_AVX2_B_SIZE + (left_out > 0);
+
+  if ((vp->llrs = srslte_vec_malloc(bgN * n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->soft_bits = srslte_vec_malloc(bgN * n_subnodes * sizeof(bg_node_t))) == NULL) {
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = srslte_vec_malloc((hrr + 1) * bgM * n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * bgM * n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->minp_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->mins_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->prod_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_ix_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->rotated_v2c = srslte_vec_malloc((hrr + 1) * n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->min_ix_epi8);
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->this_c2v_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->rotated_v2c);
+    free(vp->min_ix_epi8);
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM = bgM;
+  vp->bgN = bgN;
+  vp->hrr = hrr;
+  vp->ls  = ls;
+
+  vp->n_subnodes = n_subnodes;
+
+  vp->scaling_fctr = _mm256_set1_epi16((uint16_t)(scaling_fctr * F2I));
+
+  return vp;
+}
+
+void delete_ldpc_dec_c_avx2long_flood(void* p)
+{
+  struct ldpc_regs_c_avx2long_flood* vp = p;
+
+  if (vp != NULL) {
+    free(vp->this_c2v_epi8);
+    free(vp->rotated_v2c);
+    free(vp->min_ix_epi8);
+    free(vp->prod_v2c_epi8);
+    free(vp->mins_v2c_epi8);
+    free(vp->minp_v2c_epi8);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_c_avx2long_flood(void* p, const int8_t* llrs, uint16_t ls)
+{
+  struct ldpc_regs_c_avx2long_flood* vp = p;
+  int                                i  = 0;
+  int                                j  = 0;
+  int                                k  = 0;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  for (k = 0; k < vp->n_subnodes; k++) {
+    vp->soft_bits[k].v                  = _mm256_set1_epi8(0);
+    vp->soft_bits[vp->n_subnodes + k].v = _mm256_set1_epi8(0);
+    vp->llrs[k]                         = _mm256_set1_epi8(0);
+    vp->llrs[vp->n_subnodes + k]        = _mm256_set1_epi8(0);
+  }
+  for (i = 2; i < vp->bgN; i++) {
+    for (j = 0; j < vp->n_subnodes; j++) {
+      for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < ls); k++) {
+        vp->soft_bits[i * vp->n_subnodes + j].c[k] = llrs[(i - 2) * ls + j * SRSLTE_AVX2_B_SIZE + k];
+      }
+      vp->llrs[i * vp->n_subnodes + j] = vp->soft_bits[i * vp->n_subnodes + j].v;
+    }
+    bzero(&(vp->soft_bits[i * vp->n_subnodes + j - 1].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(int8_t));
+    bzero((int8_t*)(vp->llrs + i * vp->n_subnodes + j - 1) + k, (SRSLTE_AVX2_B_SIZE - k) * sizeof(int8_t));
+  }
+
+  bzero(vp->check_to_var, (vp->hrr + 1) * vp->bgM * vp->n_subnodes * sizeof(__m256i));
+  bzero(vp->var_to_check, (vp->hrr + 1) * vp->bgM * vp->n_subnodes * sizeof(__m256i));
+  return 0;
+}
+
+int update_ldpc_var_to_check_c_avx2long_flood(void* p, int i_layer)
+{
+  struct ldpc_regs_c_avx2long_flood* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+  __m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+
+  // Update the high-rate region.
+  inner_var_to_check_c_avx2(
+      &(vp->soft_bits[0].v), this_check_to_var, this_var_to_check, infinity7, vp->hrr * vp->n_subnodes);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    inner_var_to_check_c_avx2(&(vp->soft_bits[0].v) + (vp->hrr + i_layer - 4) * vp->n_subnodes,
+                              this_check_to_var + vp->hrr * vp->n_subnodes,
+                              this_var_to_check + vp->hrr * vp->n_subnodes,
+                              infinity7,
+                              vp->n_subnodes);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_c_avx2long_flood(void*           p,
+                                              int             i_layer,
+                                              const uint16_t* this_pcm,
+                                              const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2long_flood* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+  int j = 0;
+
+  uint16_t shift      = 0;
+  int      i_v2c_base = 0;
+
+  __m256i* this_rotated_v2c = NULL;
+
+  __m256i* this_var_to_check = vp->var_to_check + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+
+  __m256i this_abs_v2c_epi8;
+  __m256i mask_sign_epi8;
+  __m256i mask_min_epi8;
+  __m256i help_min_epi8;
+  __m256i current_ix_epi8;
+
+  for (j = 0; j < vp->n_subnodes; j++) {
+    vp->minp_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
+    vp->mins_v2c_epi8[j] = _mm256_set1_epi8(INT8_MAX);
+    vp->prod_v2c_epi8[j] = _mm256_set1_epi8(0);
+  }
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+    i_v2c_base *= vp->n_subnodes;
+
+    current_ix_epi8 = _mm256_set1_epi8((int8_t)i);
+
+    this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
+    rotate_node_right(this_var_to_check + i_v2c_base, this_rotated_v2c, shift, vp->ls, vp->n_subnodes);
+
+    for (j = 0; j < vp->n_subnodes; j++) {
+      // mask_sign is 1 if this_v2c_epi8 is strictly negative
+      mask_sign_epi8       = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
+      vp->prod_v2c_epi8[j] = _mm256_xor_si256(vp->prod_v2c_epi8[j], mask_sign_epi8);
+
+      this_abs_v2c_epi8 = _mm256_abs_epi8(this_rotated_v2c[j]);
+      // mask_min is 1 if this_abs_v2c is strictly smaller tha minp_v2c
+      mask_min_epi8        = _mm256_cmpgt_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8);
+      help_min_epi8        = _mm256_blendv_epi8(this_abs_v2c_epi8, vp->minp_v2c_epi8[j], mask_min_epi8);
+      vp->minp_v2c_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], this_abs_v2c_epi8, mask_min_epi8);
+      vp->min_ix_epi8[j]   = _mm256_blendv_epi8(vp->min_ix_epi8[j], current_ix_epi8, mask_min_epi8);
+
+      // mask_min is 1 if this_abs_v2c is strictly smaller tha mins_v2c
+      mask_min_epi8        = _mm256_cmpgt_epi8(vp->mins_v2c_epi8[j], this_abs_v2c_epi8);
+      vp->mins_v2c_epi8[j] = _mm256_blendv_epi8(vp->mins_v2c_epi8[j], help_min_epi8, mask_min_epi8);
+    }
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  __m256i* this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+  current_var_index          = (*these_var_indices)[0];
+
+  __m256i mask_is_min_epi8;
+  __m256i help_c2v_epi8;
+  __m256i final_sign_epi8;
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+    i_v2c_base *= vp->n_subnodes;
+
+    this_rotated_v2c = vp->rotated_v2c + i * vp->n_subnodes;
+
+    for (j = 0; j < vp->n_subnodes; j++) {
+      // mask_sign is 1 if this_v2c_epi8 is strictly negative
+      final_sign_epi8 = _mm256_cmpgt_epi8(zero_epi8, this_rotated_v2c[j]);
+      final_sign_epi8 = _mm256_xor_si256(final_sign_epi8, vp->prod_v2c_epi8[j]);
+
+      current_ix_epi8      = _mm256_set1_epi8((int8_t)i);
+      mask_is_min_epi8     = _mm256_cmpeq_epi8(current_ix_epi8, vp->min_ix_epi8[j]);
+      vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->minp_v2c_epi8[j], vp->mins_v2c_epi8[j], mask_is_min_epi8);
+      vp->this_c2v_epi8[j] = _mm256_scalei_epi8(vp->this_c2v_epi8[j], vp->scaling_fctr);
+      help_c2v_epi8        = _mm256_sign_epi8(vp->this_c2v_epi8[j], final_sign_epi8);
+      vp->this_c2v_epi8[j] = _mm256_blendv_epi8(vp->this_c2v_epi8[j], help_c2v_epi8, final_sign_epi8);
+    }
+    // rotating right LS - shift positions is the same as rotating left shift positions
+    rotate_node_right(vp->this_c2v_epi8, this_check_to_var + i_v2c_base, vp->ls - shift, vp->ls, vp->n_subnodes);
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_c_avx2long_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_avx2long_flood* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i_layer = 0;
+  int i       = 0;
+  int j       = 0;
+
+  __m256i* this_check_to_var = NULL;
+
+  int i_bit_tmp_base = 0;
+  int i_bit_subnode  = 0;
+
+  __m256i tmp_epi8;
+  __m256i mask_epi8;
+
+  int8_t current_var_index         = 0;
+  int    current_var_index_subnode = 0;
+
+  for (i = 0; i < vp->bgN; i++) {
+    for (j = 0; j < vp->n_subnodes; j++) {
+      vp->soft_bits[i * vp->n_subnodes + j].v = vp->llrs[i * vp->n_subnodes + j];
+    }
+  }
+
+  for (i_layer = 0; i_layer < vp->bgM; i_layer++) {
+    current_var_index = these_var_indices[i_layer][0];
+
+    this_check_to_var = vp->check_to_var + i_layer * (vp->hrr + 1) * vp->n_subnodes;
+    for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+      current_var_index_subnode = current_var_index * vp->n_subnodes;
+      for (j = 0; j < vp->n_subnodes; j++) {
+        i_bit_tmp_base = (current_var_index <= vp->hrr) ? current_var_index : vp->hrr;
+        i_bit_subnode  = i_bit_tmp_base * vp->n_subnodes + j;
+
+        tmp_epi8 = _mm256_adds_epi8(this_check_to_var[i_bit_subnode], vp->soft_bits[current_var_index_subnode + j].v);
+
+        mask_epi8 = _mm256_cmpgt_epi8(tmp_epi8, infty7_epi8);
+        tmp_epi8  = _mm256_blendv_epi8(tmp_epi8, infty8_epi8, mask_epi8);
+
+        mask_epi8 = _mm256_cmpgt_epi8(neg_infty7_epi8, tmp_epi8);
+
+        vp->soft_bits[current_var_index_subnode + j].v = _mm256_blendv_epi8(tmp_epi8, neg_infty8_epi8, mask_epi8);
+      }
+
+      current_var_index = these_var_indices[i_layer][i + 1];
+    }
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_c_avx2long_flood(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs_c_avx2long_flood* vp = p;
+
+  int j = 0;
+  int k = 0;
+
+  for (int i = 0; i < liftK / vp->ls; i++) {
+    for (j = 0; j < vp->n_subnodes; j++) {
+      for (k = 0; (k < SRSLTE_AVX2_B_SIZE) && (j * SRSLTE_AVX2_B_SIZE + k < vp->ls); k++) {
+        message[i * vp->ls + j * SRSLTE_AVX2_B_SIZE + k] = (vp->soft_bits[i * vp->n_subnodes + j].c[k] < 0);
+      }
+    }
+  }
+
+  return 0;
+}
+
+static void
+inner_var_to_check_c_avx2(const __m256i* x, const __m256i* y, __m256i* z, const uint8_t clip, const uint32_t len)
+{
+  unsigned i = 0;
+
+  __m256i x_epi8;
+  __m256i y_epi8;
+  __m256i z_epi8;
+  __m256i mask_epi8;
+  __m256i help_sub_epi8;
+  __m256i clip_epi8     = _mm256_set1_epi8(clip);
+  __m256i neg_clip_epi8 = _mm256_set1_epi8((char)(-clip));
+
+  for (i = 0; i < len; i++) {
+    x_epi8 = x[i];
+    y_epi8 = y[i];
+
+    help_sub_epi8 = _mm256_subs_epi8(x_epi8, y_epi8);
+    mask_epi8     = _mm256_cmpgt_epi8(help_sub_epi8, clip_epi8);
+    z_epi8        = _mm256_blendv_epi8(help_sub_epi8, clip_epi8, mask_epi8);
+
+    mask_epi8 = _mm256_cmpgt_epi8(neg_clip_epi8, z_epi8);
+    z_epi8    = _mm256_blendv_epi8(z_epi8, neg_clip_epi8, mask_epi8);
+
+    mask_epi8 = _mm256_cmpgt_epi8(infty8_epi8, x_epi8);
+    z_epi8    = _mm256_blendv_epi8(infty8_epi8, z_epi8, mask_epi8);
+
+    mask_epi8 = _mm256_cmpgt_epi8(x_epi8, neg_infty8_epi8);
+    z[i]      = _mm256_blendv_epi8(neg_infty8_epi8, z_epi8, mask_epi8);
+  }
+}
+
+static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes)
+{
+  const int8_t* in = (const int8_t*)in_256i;
+
+  int16_t n_type1 = (ls - shift) / SRSLTE_AVX2_B_SIZE - (ls == SRSLTE_AVX2_B_SIZE);
+  int16_t n_type2 = n_subnodes - n_type1 - 1 - (ls == SRSLTE_AVX2_B_SIZE);
+  int16_t gap     = (ls - shift) % SRSLTE_AVX2_B_SIZE;
+
+  int16_t i = 0;
+  for (; i < n_type1; i++) {
+    out[i] = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
+  }
+
+  __m256i tmp1 = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
+  __m256i tmp2 = _mm256_loadu_si256((const __m256i*)(in - gap));
+
+  out[i] = _mm256_blendv_epi8(tmp1, tmp2, mask_most_epi8[gap]);
+
+  for (i = 1; i <= n_type2; i++) {
+    out[n_type1 + i] = _mm256_loadu_si256((const __m256i*)(in - gap + i * SRSLTE_AVX2_B_SIZE));
+  }
+}
+
+static __m256i _mm256_scalei_epi8(__m256i a, __m256i sf)
+{
+  __m256i even_epi16 = _mm256_and_si256(a, mask_even_epi8);
+  __m256i odd_epi16  = _mm256_srli_epi16(a, 8);
+
+  __m256i p_even_epi16 = _mm256_mulhi_epu16(even_epi16, sf);
+  __m256i p_odd_epi16  = _mm256_mulhi_epu16(odd_epi16, sf);
+
+  p_odd_epi16 = _mm256_slli_epi16(p_odd_epi16, 8);
+
+  return _mm256_xor_si256(p_even_epi16, p_odd_epi16);
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/ldpc/ldpc_dec_c_flood.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_c_flood.c
@ -0,0 +1,391 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_c_flood.c
+ * \brief Definition of the LDPC decoder inner functions working
+ *    with 8-bit integer-valued LLRs. Flooded scheduling.
+ *
+ * Even if the inner representation is based on 8 bits, check-to-variable and
+ * variable-to-check messages are actually represented with 7 bits, the
+ * remaining bit is used to represent infinity.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/utils/vector.h"
+
+#define F2I 100 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
+
+/*!
+ * \brief Maximum message magnitude.
+ * Messages use a 7-bit quantization. Soft bits use the remaining bit to denote infinity.
+ */
+static const int8_t infinity7 = (1U << 6U) - 1;
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with 8-bit integer-valued LLRs (flooded scheduling).
+ */
+struct ldpc_regs_c_flood {
+  int8_t* llrs;         /*!< \brief A-priori log-likelihood ratios. */
+  int8_t* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  int8_t* check_to_var; /*!< \brief Check-to-variable messages. */
+  int8_t* var_to_check; /*!< \brief Variable-to-check messages. */
+  int8_t (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
+  int* min_v_index;     /*!< \brief Helper register for computing check-to-variable messages. */
+  int* prod_v2c;        /*!< \brief Helper register for computing check-to-variable messages. */
+
+  uint16_t liftN;        /*!< \brief Total number of variable nodes (after lifting). */
+  uint16_t hrrN;         /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
+  uint8_t  bgM;          /*!< \brief Number of check nodes (before lifting). */
+  uint16_t ls;           /*!< \brief Lifting size. */
+  int      scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+};
+
+/*!
+ * Carries out the actual update of the variable-to-check messages. It basically
+ * consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
+ * \f$\lvert x[i] \rvert = 2^{7}-1 \f$ (our representation of infinity) to
+ * ensure it is properly propagated. Also, the subtraction is saturated between
+ * \f$- clip\f$ and \f$+ clip\f$.
+ * \param[in] x     Minuend: array we subtract from (in practice, the soft bits).
+ * \param[in] y     Subtrahend: array to be subtracted (in practice, the
+ *                  check-to-variable messages).
+ * \param[out] z    Resulting difference array(in practice, the updated
+ *                  variable-to-check messages).
+ * \param[in]  clip The saturation value.
+ * \param[in]  len  The length of the vectors.
+ */
+static void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, uint8_t clip, uint32_t len);
+
+void* create_ldpc_dec_c_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs_c_flood* vp = NULL;
+
+  uint8_t  bgK   = bgN - bgM;
+  uint16_t liftN = bgN * ls;
+  uint16_t hrrN  = (bgK + 4) * ls;
+
+  if ((vp = malloc(sizeof(struct ldpc_regs_c_flood))) == NULL) {
+    return NULL;
+  }
+
+  if ((vp->llrs = srslte_vec_i8_malloc(liftN)) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->soft_bits = srslte_vec_i8_malloc(liftN)) == NULL) {
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = srslte_vec_i8_malloc((hrrN + ls) * bgM)) == NULL) {
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = srslte_vec_i8_malloc((hrrN + ls) * bgM)) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v2c = malloc(ls * sizeof(int8_t[2]))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM   = bgM;
+  vp->liftN = liftN;
+  vp->hrrN  = hrrN;
+  vp->ls    = ls;
+
+  vp->scaling_fctr = (int)(scaling_fctr * F2I);
+
+  return vp;
+}
+
+void delete_ldpc_dec_c_flood(void* p)
+{
+  struct ldpc_regs_c_flood* vp = p;
+
+  if (vp != NULL) {
+    free(vp->prod_v2c);
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp->llrs);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_c_flood(void* p, const int8_t* llrs, uint16_t ls)
+{
+  struct ldpc_regs_c_flood* vp   = p;
+  int                       i    = 0;
+  int                       skip = 2 * ls;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  bzero(vp->llrs, skip * sizeof(int8_t));
+  bzero(vp->soft_bits, skip * sizeof(int8_t));
+  for (i = skip; i < vp->liftN; i++) {
+    vp->llrs[i]      = llrs[i - skip];
+    vp->soft_bits[i] = llrs[i - skip];
+  }
+
+  bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int8_t));
+  bzero(vp->var_to_check, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int8_t));
+  return 0;
+}
+
+int update_ldpc_var_to_check_c_flood(void* p, int i_layer)
+{
+  struct ldpc_regs_c_flood* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  int8_t* this_var_to_check = vp->var_to_check + i_layer * (vp->hrrN + vp->ls);
+
+  // Update the high-rate region.
+  inner_var_to_check_c(vp->soft_bits, this_check_to_var, this_var_to_check, infinity7, vp->hrrN);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    inner_var_to_check_c(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
+                         this_check_to_var + vp->hrrN,
+                         this_var_to_check + vp->hrrN,
+                         infinity7,
+                         vp->ls);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_c_flood(void*           p,
+                                     int             i_layer,
+                                     const uint16_t* this_pcm,
+                                     const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_flood* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+  int j = 0;
+
+  for (i = 0; i < vp->ls; i++) {
+    vp->prod_v2c[i] = 1;
+    for (j = 0; j < 2; j++) {
+      vp->min_v2c[i][j] = INT8_MAX;
+    }
+  }
+
+  uint16_t shift      = 0;
+  int      index      = 0;
+  int8_t   this_v2c   = 0;
+  int      is_min     = 0;
+  int      i_v2c      = 0;
+  int      i_v2c_base = 0;
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  int8_t* this_var_to_check = vp->var_to_check + i_layer * (vp->hrrN + vp->ls);
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index    = (j + vp->ls - shift) % vp->ls;
+      i_v2c    = i_v2c_base + j;
+      this_v2c = abs(this_var_to_check[i_v2c]);
+      is_min   = this_v2c < vp->min_v2c[index][0];
+      vp->min_v2c[index][1] =
+          (this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
+      vp->min_v2c[index][0]  = is_min ? this_v2c : vp->min_v2c[index][0];
+      vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
+
+      vp->prod_v2c[index] *= (this_var_to_check[i_v2c] >= 0) ? 1 : -1;
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  int8_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  current_var_index         = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index = (j + vp->ls - shift) % vp->ls;
+      i_v2c = i_v2c_base + j;
+
+      this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
+      this_check_to_var[i_v2c] = this_check_to_var[i_v2c] * vp->scaling_fctr / F2I;
+
+      this_check_to_var[i_v2c] *= vp->prod_v2c[index] * ((this_var_to_check[i_v2c] >= 0) ? 1 : -1);
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_c_flood(void* p, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_c_flood* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  int     i                     = 0;
+  int     j                     = 0;
+  int     i_layer               = 0;
+  int     i_bit                 = 0;
+  int     i_bit_tmp             = 0;
+  int8_t  current_var_index     = 0;
+  int     current_var_index_ext = 0;
+  int8_t* this_check_to_var     = NULL;
+
+  long tmp = 0;
+
+  for (i = 0; i < vp->liftN; i++) {
+    vp->soft_bits[i] = vp->llrs[i];
+  }
+
+  for (i_layer = 0; i_layer < vp->bgM; i_layer++) {
+    current_var_index = these_var_indices[i_layer][0];
+    this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+    for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+      // recall that current_var_index depends on i!
+      current_var_index_ext = current_var_index * vp->ls;
+      for (j = 0; j < vp->ls; j++) {
+        i_bit     = current_var_index_ext + j;
+        i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
+        tmp       = (long)this_check_to_var[i_bit_tmp] + vp->soft_bits[i_bit];
+        if (tmp > infinity7) {
+          tmp = INT8_MAX;
+        }
+        if (tmp < -infinity7) {
+          tmp = -INT8_MAX;
+        }
+        vp->soft_bits[i_bit] = (int8_t)tmp;
+      }
+      current_var_index = these_var_indices[i_layer][i + 1];
+    }
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_c_flood(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs_c_flood* vp = p;
+
+  for (int i = 0; i < liftK; i++) {
+    message[i] = (vp->soft_bits[i] < 0);
+  }
+
+  return 0;
+}
+
+void inner_var_to_check_c(const int8_t* x, const int8_t* y, int8_t* z, const uint8_t clip, const uint32_t len)
+{
+  unsigned i   = 0;
+  long     tmp = 0;
+
+  const long infinity8 = (1U << 7U) - 1; // Max positive value in 8-bit representation
+
+  for (i = 0; i < len; i++) {
+    if (x[i] >= infinity8) {
+      z[i] = infinity8;
+      continue;
+    }
+    if (x[i] <= -infinity8) {
+      z[i] = -infinity8;
+      continue;
+    }
+    tmp = (long)x[i] - y[i];
+    if (tmp > clip) {
+      tmp = clip;
+    }
+    if (tmp < -clip) {
+      tmp = -clip;
+    }
+    z[i] = (int8_t)tmp;
+  }
+}
--- a/lib/src/phy/fec/ldpc/ldpc_dec_f.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_f.c
@ -0,0 +1,302 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_f.c
+ * \brief Definition of the LDPC decoder inner functions working
+ *    with float-valued LLRs.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "ldpc_dec_all.h"
+#include "math.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+
+#include "srslte/phy/utils/vector.h"
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with real-valued LLRs.
+ */
+struct ldpc_regs {
+  float* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  float* check_to_var; /*!< \brief Check-to-variable messages. */
+  float* var_to_check; /*!< \brief Variable-to-check messages. */
+  float (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
+  int* min_v_index;    /*!< \brief Helper register for computing check-to-variable messages. */
+  int* prod_v2c;       /*!< \brief Helper register for computing check-to-variable messages. */
+
+  uint16_t liftN;        /*!< \brief Total number of variable nodes (after lifting). */
+  uint16_t hrrN;         /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
+  uint8_t  bgM;          /*!< \brief Number of check nodes (before lifting). */
+  uint16_t ls;           /*!< \brief Lifting size. */
+  float    scaling_fctr; /*!< Scaling factor for the normalized min-sum decoding algorithm. */
+};
+
+void* create_ldpc_dec_f(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs* vp = NULL;
+
+  uint8_t  bgK   = bgN - bgM;
+  uint16_t liftN = bgN * ls;
+  uint16_t hrrN  = (bgK + 4) * ls;
+
+  if ((vp = malloc(sizeof(struct ldpc_regs))) == NULL) {
+    return NULL;
+  }
+
+  if ((vp->soft_bits = srslte_vec_f_malloc(liftN)) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = srslte_vec_f_malloc((hrrN + ls) * bgM)) == NULL) {
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = srslte_vec_f_malloc((hrrN + ls))) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v2c = malloc(ls * sizeof(float[2]))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM          = bgM;
+  vp->liftN        = liftN;
+  vp->hrrN         = hrrN;
+  vp->ls           = ls;
+  vp->scaling_fctr = scaling_fctr;
+
+  return vp;
+}
+
+void delete_ldpc_dec_f(void* p)
+{
+  struct ldpc_regs* vp = p;
+
+  if (vp != NULL) {
+    free(vp->prod_v2c);
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_f(void* p, const float* llrs, uint16_t ls)
+{
+  struct ldpc_regs* vp   = p;
+  int               i    = 0;
+  int               skip = 2 * ls;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  bzero(vp->soft_bits, skip * sizeof(float));
+  for (i = skip; i < vp->liftN; i++) {
+    vp->soft_bits[i] = llrs[i - skip];
+  }
+
+  bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(float));
+  bzero(vp->var_to_check, (vp->hrrN + vp->ls) * sizeof(float));
+  return 0;
+}
+
+int update_ldpc_var_to_check_f(void* p, int i_layer)
+{
+  struct ldpc_regs* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  float* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+
+  // Update the high-rate region.
+  srslte_vec_sub_fff(vp->soft_bits, this_check_to_var, vp->var_to_check, vp->hrrN);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    srslte_vec_sub_fff(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
+                       this_check_to_var + vp->hrrN,
+                       vp->var_to_check + vp->hrrN,
+                       vp->ls);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_f(void*           p,
+                               int             i_layer,
+                               const uint16_t* this_pcm,
+                               const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+  int j = 0;
+
+  for (i = 0; i < vp->ls; i++) {
+    vp->prod_v2c[i] = 1;
+    for (j = 0; j < 2; j++) {
+      vp->min_v2c[i][j] = INFINITY;
+    }
+  }
+
+  uint16_t shift      = 0;
+  int      index      = 0;
+  float    this_v2c   = NAN;
+  int      is_min     = 0;
+  int      i_v2c_base = 0;
+  int      i_v2c      = 0;
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index    = (j + vp->ls - shift) % vp->ls;
+      i_v2c    = i_v2c_base + j;
+      this_v2c = fabsf(vp->var_to_check[i_v2c]);
+      is_min   = this_v2c < vp->min_v2c[index][0];
+      vp->min_v2c[index][1] =
+          (this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
+      vp->min_v2c[index][0]  = is_min ? this_v2c : vp->min_v2c[index][0];
+      vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
+
+      vp->prod_v2c[index] *= (vp->var_to_check[i_v2c] >= 0) ? 1 : -1;
+    }
+
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  float* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  current_var_index        = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index = (j + vp->ls - shift) % vp->ls;
+      i_v2c = i_v2c_base + j;
+
+      this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
+      this_check_to_var[i_v2c] *= vp->scaling_fctr;
+
+      this_check_to_var[i_v2c] *= (float)vp->prod_v2c[index] * ((vp->var_to_check[i_v2c] >= 0) ? 1.F : -1.F);
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_f(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  int    i_bit             = 0;
+  int    i_bit_tmp         = 0;
+  float* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  float* this_var_to_check = vp->var_to_check;
+
+  int8_t current_var_index     = (*these_var_indices)[0];
+  int    current_var_index_ext = 0;
+
+  for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    current_var_index_ext = current_var_index * vp->ls;
+    for (int j = 0; j < vp->ls; j++) {
+      i_bit     = current_var_index_ext + j;
+      i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
+
+      vp->soft_bits[i_bit] = this_check_to_var[i_bit_tmp] + this_var_to_check[i_bit_tmp];
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_f(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs* vp = p;
+
+  for (int i = 0; i < liftK; i++) {
+    message[i] = (vp->soft_bits[i] < 0);
+  }
+
+  return 0;
+}
--- a/lib/src/phy/fec/ldpc/ldpc_dec_s.c
+++ b/lib/src/phy/fec/ldpc/ldpc_dec_s.c
@ -0,0 +1,364 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_s.c
+ * \brief Definition of the LDPC decoder inner functions working
+ *    with 16-bit integer-valued LLRs.
+ *
+ * Even if the inner representation is based on 16 bits, check-to-variable and
+ * variable-to-check messages are actually represented with 15 bits, the
+ * remaining bit is used to represent infinity.
+ *
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <strings.h>
+
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/utils/vector.h"
+
+#define F2I 100 /*!< \brief Used for float to int conversion---float f is stored as (int)(f*F2I). */
+
+/*!
+ * \brief Maximum message magnitude.
+ * Messages use a 15-bit quantization. Soft bits use the remaining bit to denote infinity.
+ */
+const int16_t infinity15 = (1U << 14U) - 1;
+
+/*!
+ * \brief Inner registers for the LDPC decoder that works with 16-bit integer-valued LLRs.
+ */
+struct ldpc_regs_s {
+  int16_t* soft_bits;    /*!< \brief A-posteriori log-likelihood ratios. */
+  int16_t* check_to_var; /*!< \brief Check-to-variable messages. */
+  int16_t* var_to_check; /*!< \brief Variable-to-check messages. */
+  int16_t (*min_v2c)[2]; /*!< \brief Helper register for computing check-to-variable messages. */
+  int* min_v_index;      /*!< \brief Helper register for computing check-to-variable messages. */
+  int* prod_v2c;         /*!< \brief Helper register for computing check-to-variable messages. */
+
+  uint16_t liftN;        /*!< \brief Total number of variable nodes (after lifting). */
+  uint16_t hrrN;         /*!< \brief Number of variable nodes in the high-rate region (after lifing). */
+  uint8_t  bgM;          /*!< \brief Number of check nodes (before lifting). */
+  uint16_t ls;           /*!< \brief Lifting size. */
+  int      scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+};
+
+/*!
+ * Carries out the actual update of the variable-to-check messages. It basically
+ * consists in \f$ z = x - y \f$ (as vectors). However, first it checks whether
+ * \f$\lvert x[i] \rvert = 2^{15}-1 \f$ (our representation of infinity) to
+ * ensure it is properly propagated. Also, the subtraction is saturated between
+ * \f$- clip\f$ and \f$+ clip\f$.
+ * \param[in] x     Minuend: array we subtract from (in practice, the soft bits).
+ * \param[in] y     Subtrahend: array to be subtracted (in practice, the
+ *                  check-to-variable messages).
+ * \param[out] z    Resulting difference array(in practice, the updated
+ *                  variable-to-check messages).
+ * \param[in]  clip The saturation value.
+ * \param[in]  len  The length of the vectors.
+ */
+static void inner_var_to_check_s(const int16_t* x, const int16_t* y, int16_t* z, uint16_t clip, uint32_t len);
+
+void* create_ldpc_dec_s(uint8_t bgN, uint8_t bgM, uint16_t ls, float scaling_fctr)
+{
+  struct ldpc_regs_s* vp = NULL;
+
+  uint8_t  bgK   = bgN - bgM;
+  uint16_t liftN = bgN * ls;
+  uint16_t hrrN  = (bgK + 4) * ls;
+
+  if ((vp = malloc(sizeof(struct ldpc_regs_s))) == NULL) {
+    return NULL;
+  }
+
+  if ((vp->soft_bits = malloc(liftN * sizeof(int16_t))) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->check_to_var = malloc((hrrN + ls) * bgM * sizeof(int16_t))) == NULL) {
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->var_to_check = malloc((hrrN + ls) * sizeof(int16_t))) == NULL) {
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v2c = malloc(ls * sizeof(int16_t[2]))) == NULL) {
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->min_v_index = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->prod_v2c = srslte_vec_i32_malloc(ls)) == NULL) {
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+    return NULL;
+  }
+
+  vp->bgM   = bgM;
+  vp->liftN = liftN;
+  vp->hrrN  = hrrN;
+  vp->ls    = ls;
+
+  vp->scaling_fctr = (int)(scaling_fctr * F2I);
+
+  return vp;
+}
+
+void delete_ldpc_dec_s(void* p)
+{
+  struct ldpc_regs_s* vp = p;
+
+  if (vp != NULL) {
+    free(vp->prod_v2c);
+    free(vp->min_v_index);
+    free(vp->min_v2c);
+    free(vp->var_to_check);
+    free(vp->check_to_var);
+    free(vp->soft_bits);
+    free(vp);
+  }
+}
+
+int init_ldpc_dec_s(void* p, const int16_t* llrs, uint16_t ls)
+{
+  struct ldpc_regs_s* vp   = p;
+  int                 i    = 0;
+  int                 skip = 2 * ls;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  bzero(vp->soft_bits, skip * sizeof(int16_t));
+  for (i = skip; i < vp->liftN; i++) {
+    vp->soft_bits[i] = llrs[i - skip];
+  }
+
+  bzero(vp->check_to_var, (vp->hrrN + vp->ls) * vp->bgM * sizeof(int16_t));
+  bzero(vp->var_to_check, (vp->hrrN + vp->ls) * sizeof(int16_t));
+  return 0;
+}
+
+int update_ldpc_var_to_check_s(void* p, int i_layer)
+{
+  struct ldpc_regs_s* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int16_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+
+  // Update the high-rate region.
+  inner_var_to_check_s(vp->soft_bits, this_check_to_var, vp->var_to_check, infinity15, vp->hrrN);
+
+  if (i_layer >= 4) {
+    // Update the extension region.
+    inner_var_to_check_s(vp->soft_bits + vp->hrrN + (i_layer - 4) * vp->ls,
+                         this_check_to_var + vp->hrrN,
+                         vp->var_to_check + vp->hrrN,
+                         infinity15,
+                         vp->ls);
+  }
+
+  return 0;
+}
+
+int update_ldpc_check_to_var_s(void*           p,
+                               int             i_layer,
+                               const uint16_t* this_pcm,
+                               const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_s* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+  int j = 0;
+
+  for (i = 0; i < vp->ls; i++) {
+    vp->prod_v2c[i] = 1;
+    for (j = 0; j < 2; j++) {
+      vp->min_v2c[i][j] = INT16_MAX;
+    }
+  }
+
+  uint16_t shift      = 0;
+  int      index      = 0;
+  int16_t  this_v2c   = 0;
+  int      is_min     = 0;
+  int      i_v2c      = 0;
+  int      i_v2c_base = 0;
+
+  int8_t current_var_index = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index    = (j + vp->ls - shift) % vp->ls;
+      i_v2c    = i_v2c_base + j;
+      this_v2c = abs(vp->var_to_check[i_v2c]);
+      is_min   = this_v2c < vp->min_v2c[index][0];
+      vp->min_v2c[index][1] =
+          (this_v2c >= vp->min_v2c[index][1]) ? vp->min_v2c[index][1] : (is_min ? vp->min_v2c[index][0] : this_v2c);
+      vp->min_v2c[index][0]  = is_min ? this_v2c : vp->min_v2c[index][0];
+      vp->min_v_index[index] = is_min ? i_v2c : vp->min_v_index[index];
+
+      vp->prod_v2c[index] *= (vp->var_to_check[i_v2c] >= 0) ? 1 : -1;
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  int16_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  current_var_index          = (*these_var_indices)[0];
+
+  for (i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    shift      = this_pcm[current_var_index];
+    i_v2c_base = current_var_index * vp->ls;
+    i_v2c_base = (i_v2c_base <= vp->hrrN) ? i_v2c_base : vp->hrrN;
+    for (j = 0; j < vp->ls; j++) {
+      index = (j + vp->ls - shift) % vp->ls;
+      i_v2c = i_v2c_base + j;
+
+      this_check_to_var[i_v2c] = (i_v2c != vp->min_v_index[index]) ? vp->min_v2c[index][0] : vp->min_v2c[index][1];
+      this_check_to_var[i_v2c] = this_check_to_var[i_v2c] * vp->scaling_fctr / F2I;
+
+      this_check_to_var[i_v2c] *= vp->prod_v2c[index] * ((vp->var_to_check[i_v2c] >= 0) ? 1 : -1);
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int update_ldpc_soft_bits_s(void* p, int i_layer, const int8_t (*these_var_indices)[MAX_CNCT])
+{
+  struct ldpc_regs_s* vp = p;
+  if (p == NULL) {
+    return -1;
+  }
+
+  int      i_bit             = 0;
+  int      i_bit_tmp         = 0;
+  int16_t* this_check_to_var = vp->check_to_var + i_layer * (vp->hrrN + vp->ls);
+  int16_t* this_var_to_check = vp->var_to_check;
+
+  long tmp = 0;
+
+  int8_t current_var_index     = (*these_var_indices)[0];
+  int    current_var_index_ext = 0;
+
+  for (int i = 0; (current_var_index != -1) && (i < MAX_CNCT); i++) {
+    current_var_index_ext = current_var_index * vp->ls;
+    for (int j = 0; j < vp->ls; j++) {
+      i_bit     = current_var_index_ext + j;
+      i_bit_tmp = (current_var_index_ext <= vp->hrrN) ? i_bit : vp->hrrN + j;
+
+      tmp = (long)this_check_to_var[i_bit_tmp] + this_var_to_check[i_bit_tmp];
+      if (tmp > infinity15) {
+        tmp = INT16_MAX;
+      }
+      if (tmp < -infinity15) {
+        tmp = -INT16_MAX;
+      }
+      vp->soft_bits[i_bit] = (int16_t)tmp;
+    }
+    current_var_index = (*these_var_indices)[i + 1];
+  }
+
+  return 0;
+}
+
+int extract_ldpc_message_s(void* p, uint8_t* message, uint16_t liftK)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct ldpc_regs_s* vp = p;
+
+  for (int i = 0; i < liftK; i++) {
+    message[i] = (vp->soft_bits[i] < 0);
+  }
+
+  return 0;
+}
+
+void inner_var_to_check_s(const int16_t* x, const int16_t* y, int16_t* z, const uint16_t clip, const uint32_t len)
+{
+  unsigned i   = 0;
+  long     tmp = 0;
+
+  const long infinity16 = (1U << 15U) - 1; // Max positive value in 16-bit representation
+
+  for (i = 0; i < len; i++) {
+    if (x[i] >= infinity16) {
+      z[i] = infinity16;
+      continue;
+    }
+    if (x[i] <= -infinity16) {
+      z[i] = -infinity16;
+      continue;
+    }
+    tmp = (long)x[i] - y[i];
+    if (tmp > clip) {
+      tmp = clip;
+    }
+    if (tmp < -clip) {
+      tmp = -clip;
+    }
+    z[i] = (int16_t)tmp;
+  }
+}
--- a/lib/src/phy/fec/ldpc/ldpc_decoder.c
+++ b/lib/src/phy/fec/ldpc/ldpc_decoder.c
@ -0,0 +1,785 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_decoder.c
+ * \brief Definition of the LDPC decoder.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_dec_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/vector.h"
+
+#define MAX_ITERATIONS 10 /*!< \brief Iterations of the BP algorithm. */
+
+/*! Carries out the actual destruction of the memory allocated to the decoder, float-LLR case. */
+static void free_dec_f(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_f(q->ptr);
+}
+
+/*! Carries out the decoding with real-valued LLRs. */
+static int decode_f(void* o, const float* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+
+  init_ldpc_dec_f(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_f(q->ptr, i_layer);
+
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_f(q->ptr, i_layer, this_pcm, these_var_indices);
+
+      update_ldpc_soft_bits_f(q->ptr, i_layer, these_var_indices);
+    }
+  }
+
+  extract_ldpc_message_f(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with real valued LLRs. */
+static int init_f(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_f;
+
+  if ((q->ptr = create_ldpc_dec_f(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_f(q);
+    return -1;
+  }
+
+  q->decode_f = decode_f;
+
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the decoder, 16-bit-LLR case. */
+static void free_dec_s(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_s(q->ptr);
+}
+
+/*! Carries out the decoding with 16-bit integer-valued LLRs. */
+static int decode_s(void* o, const int16_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+
+  init_ldpc_dec_s(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_s(q->ptr, i_layer);
+
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_s(q->ptr, i_layer, this_pcm, these_var_indices);
+
+      update_ldpc_soft_bits_s(q->ptr, i_layer, these_var_indices);
+    }
+  }
+
+  extract_ldpc_message_s(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with 16-bit integer-valued LLRs. */
+static int init_s(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_s;
+
+  if ((q->ptr = create_ldpc_dec_s(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_s(q);
+    return -1;
+  }
+
+  q->decode_s = decode_s;
+
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case. */
+static void free_dec_c(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_c(q->ptr);
+}
+
+/*! Carries out the decoding with 8-bit integer-valued LLRs. */
+static int decode_c(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+
+  init_ldpc_dec_c(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_c(q->ptr, i_layer);
+
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_c(q->ptr, i_layer, this_pcm, these_var_indices);
+
+      update_ldpc_soft_bits_c(q->ptr, i_layer, these_var_indices);
+    }
+  }
+
+  extract_ldpc_message_c(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with 8-bit integer-valued LLRs. */
+static int init_c(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_c;
+
+  if ((q->ptr = create_ldpc_dec_c(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_c(q);
+    return -1;
+  }
+
+  q->decode_c = decode_c;
+
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR flooded case. */
+static void free_dec_c_flood(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_c_flood(q->ptr);
+}
+
+/*! Carries out the decoding with 8-bit integer-valued LLRs, flooded scheduling. */
+static int decode_c_flood(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+  init_ldpc_dec_c_flood(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < 2 * MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_c_flood(q->ptr, i_layer);
+    }
+
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_c_flood(q->ptr, i_layer, this_pcm, these_var_indices);
+    }
+    update_ldpc_soft_bits_c_flood(q->ptr, q->var_indices);
+  }
+
+  extract_ldpc_message_c_flood(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with 8-bit integer-valued LLRs. */
+static int init_c_flood(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_c_flood;
+
+  if ((q->ptr = create_ldpc_dec_c_flood(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_c_flood(q);
+    return -1;
+  }
+
+  q->decode_c = decode_c_flood;
+
+  return 0;
+}
+
+#ifdef LV_HAVE_AVX2
+/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case (AVX2 implementation). */
+static void free_dec_c_avx2(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_c_avx2(q->ptr);
+}
+
+/*! Carries out the decoding with 8-bit integer-valued LLRs (AVX2 implementation). */
+static int decode_c_avx2(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+  init_ldpc_dec_c_avx2(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_c_avx2(q->ptr, i_layer);
+
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_c_avx2(q->ptr, i_layer, this_pcm, these_var_indices);
+
+      update_ldpc_soft_bits_c_avx2(q->ptr, i_layer, these_var_indices);
+    }
+  }
+
+  extract_ldpc_message_c_avx2(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with 8-bit integer-valued LLRs (AVX2 implementation). */
+static int init_c_avx2(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_c_avx2;
+
+  if ((q->ptr = create_ldpc_dec_c_avx2(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_c_avx2(q);
+    return -1;
+  }
+
+  q->decode_c = decode_c_avx2;
+
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case (AVX2 implementation,
+ * large lifting size). */
+static void free_dec_c_avx2long(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_c_avx2long(q->ptr);
+}
+
+/*! Carries out the decoding with 8-bit integer-valued LLRs (AVX2 implementation, large lifting size). */
+static int decode_c_avx2long(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+  init_ldpc_dec_c_avx2long(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_c_avx2long(q->ptr, i_layer);
+
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_c_avx2long(q->ptr, i_layer, this_pcm, these_var_indices);
+
+      update_ldpc_soft_bits_c_avx2long(q->ptr, i_layer, these_var_indices);
+    }
+  }
+
+  extract_ldpc_message_c_avx2long(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with 8-bit integer-valued LLRs (AVX2 implementation, large lifting size). */
+static int init_c_avx2long(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_c_avx2long;
+
+  if ((q->ptr = create_ldpc_dec_c_avx2long(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_c_avx2long(q);
+    return -1;
+  }
+
+  q->decode_c = decode_c_avx2long;
+
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case (AVX2 implementation,
+ * flooded scheduling). */
+static void free_dec_c_avx2_flood(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_c_avx2_flood(q->ptr);
+}
+
+/*! Carries out the decoding with 8-bit integer-valued LLRs (AVX2 implementation, flooded scheduling). */
+static int decode_c_avx2_flood(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+  init_ldpc_dec_c_avx2_flood(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < 2 * MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_c_avx2_flood(q->ptr, i_layer);
+    }
+
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_c_avx2_flood(q->ptr, i_layer, this_pcm, these_var_indices);
+    }
+    update_ldpc_soft_bits_c_avx2_flood(q->ptr, q->var_indices);
+  }
+
+  extract_ldpc_message_c_avx2_flood(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with 8-bit integer-valued LLRs (AVX2 implementation, flooded scheduling). */
+static int init_c_avx2_flood(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_c_avx2_flood;
+
+  if ((q->ptr = create_ldpc_dec_c_avx2_flood(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_c_avx2_flood(q);
+    return -1;
+  }
+
+  q->decode_c = decode_c_avx2_flood;
+
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the decoder, 8-bit-LLR case
+ * (flooded scheduling, AVX2 implementation, large lifting size). */
+static void free_dec_c_avx2long_flood(void* o)
+{
+  srslte_ldpc_decoder_t* q = o;
+  if (q->var_indices) {
+    free(q->var_indices);
+  }
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  delete_ldpc_dec_c_avx2long_flood(q->ptr);
+}
+
+/*! Carries out the decoding with 8-bit integer-valued LLRs (flooded scheduling, AVX2 implementation, large lifting
+ * size). */
+static int decode_c_avx2long_flood(void* o, const int8_t* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_decoder_t* q = o;
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+  init_ldpc_dec_c_avx2long_flood(q->ptr, llrs, q->ls);
+
+  uint16_t* this_pcm                   = NULL;
+  int8_t(*these_var_indices)[MAX_CNCT] = NULL;
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  for (int i_iteration = 0; i_iteration < 2 * MAX_ITERATIONS; i_iteration++) {
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      update_ldpc_var_to_check_c_avx2long_flood(q->ptr, i_layer);
+    }
+
+    for (int i_layer = 0; i_layer < n_layers; i_layer++) {
+      this_pcm          = q->pcm + i_layer * q->bgN;
+      these_var_indices = q->var_indices + i_layer;
+
+      update_ldpc_check_to_var_c_avx2long_flood(q->ptr, i_layer, this_pcm, these_var_indices);
+    }
+
+    update_ldpc_soft_bits_c_avx2long_flood(q->ptr, q->var_indices);
+  }
+
+  extract_ldpc_message_c_avx2long_flood(q->ptr, message, q->liftK);
+
+  return 0;
+}
+
+/*! Initializes the decoder to work with 8-bit integer-valued LLRs
+ * (flooded scheduling, AVX2 implementation, large lifting size). */
+static int init_c_avx2long_flood(srslte_ldpc_decoder_t* q)
+{
+  q->free = free_dec_c_avx2long_flood;
+
+  if ((q->ptr = create_ldpc_dec_c_avx2long_flood(q->bgN, q->bgM, q->ls, q->scaling_fctr)) == NULL) {
+    ERROR("Create_ldpc_dec failed\n");
+    free_dec_c_avx2long(q);
+    return -1;
+  }
+
+  q->decode_c = decode_c_avx2long_flood;
+
+  return 0;
+}
+#endif // LV_HAVE_AVX2
+
+int srslte_ldpc_decoder_init(srslte_ldpc_decoder_t*     q,
+                             srslte_ldpc_decoder_type_t type,
+                             srslte_basegraph_t         bg,
+                             uint16_t                   ls,
+                             float                      scaling_fctr)
+{
+  int ls_index = get_ls_index(ls);
+
+  if (ls_index == VOID_LIFTSIZE) {
+    ERROR("Invalid lifting size %d\n", ls);
+    return -1;
+  }
+
+  switch (bg) {
+    case BG1:
+      q->bgN = BG1Nfull;
+      q->bgM = BG1M;
+      break;
+    case BG2:
+      q->bgN = BG2Nfull;
+      q->bgM = BG2M;
+      break;
+    default:
+      ERROR("Base Graph BG%d does not exist\n", bg + 1);
+      return -1;
+  }
+  q->bg  = bg;
+  q->bgK = q->bgN - q->bgM;
+
+  q->ls    = ls;
+  q->liftK = ls * q->bgK;
+  q->liftM = ls * q->bgM;
+  q->liftN = ls * q->bgN;
+
+  q->pcm = srslte_vec_malloc(q->bgM * q->bgN * sizeof(uint16_t));
+  if (!q->pcm) {
+    perror("malloc");
+    return -1;
+  }
+
+  q->var_indices = srslte_vec_malloc(q->bgM * sizeof(int8_t[MAX_CNCT]));
+
+  if (create_compact_pcm(q->pcm, q->var_indices, q->bg, q->ls) != 0) {
+    perror("Create PCM");
+    free(q->var_indices);
+    free(q->pcm);
+    return -1;
+  }
+
+  if ((scaling_fctr <= 0) || (scaling_fctr > 1)) {
+    perror("The scaling factor of the min-sum algorithm should be larger than 0 and not larger than 1.");
+    free(q->var_indices);
+    free(q->pcm);
+    return -1;
+  }
+  q->scaling_fctr = scaling_fctr;
+
+  switch (type) {
+    case SRSLTE_LDPC_DECODER_F:
+      return init_f(q);
+    case SRSLTE_LDPC_DECODER_S:
+      return init_s(q);
+    case SRSLTE_LDPC_DECODER_C:
+      return init_c(q);
+    case SRSLTE_LDPC_DECODER_C_FLOOD:
+      return init_c_flood(q);
+#ifdef LV_HAVE_AVX2
+    case SRSLTE_LDPC_DECODER_C_AVX2:
+      if (ls <= SRSLTE_AVX2_B_SIZE) {
+        return init_c_avx2(q);
+      } else {
+        return init_c_avx2long(q);
+      }
+    case SRSLTE_LDPC_DECODER_C_AVX2_FLOOD:
+      if (ls <= SRSLTE_AVX2_B_SIZE) {
+        return init_c_avx2_flood(q);
+      } else {
+        return init_c_avx2long_flood(q);
+      }
+#endif // LV_HAVE_AVX2
+    default:
+      ERROR("Unknown decoder.\n");
+      return -1;
+  }
+}
+
+void srslte_ldpc_decoder_free(srslte_ldpc_decoder_t* q)
+{
+  if (q->free) {
+    q->free(q);
+  }
+  bzero(q, sizeof(srslte_ldpc_decoder_t));
+}
+
+int srslte_ldpc_decoder_decode_f(srslte_ldpc_decoder_t* q, const float* llrs, uint8_t* message, uint32_t cdwd_rm_length)
+{
+  return q->decode_f(q, llrs, message, cdwd_rm_length);
+}
+
+int srslte_ldpc_decoder_decode_s(srslte_ldpc_decoder_t* q,
+                                 const int16_t*         llrs,
+                                 uint8_t*               message,
+                                 uint32_t               cdwd_rm_length)
+{
+  return q->decode_s(q, llrs, message, cdwd_rm_length);
+}
+
+int srslte_ldpc_decoder_decode_c(srslte_ldpc_decoder_t* q,
+                                 const int8_t*          llrs,
+                                 uint8_t*               message,
+                                 uint32_t               cdwd_rm_length)
+{
+  return q->decode_c(q, llrs, message, cdwd_rm_length);
+}
--- a/lib/src/phy/fec/ldpc/ldpc_enc_all.h
+++ b/lib/src/phy/fec/ldpc/ldpc_enc_all.h
@ -0,0 +1,211 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_enc_all.h
+ * \brief Declaration of the LDPC encoder inner functions.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef SRSLTE_LDPCENC_ALL_H
+#define SRSLTE_LDPCENC_ALL_H
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+
+/*! Computes the product between the first (K - 2) columns of the PCM and the systematic bits.
+ * \param[in,out] q     A pointer to an encoder.
+ * \param[in]     input The message to encode.
+ */
+void preprocess_systematic_bits(srslte_ldpc_encoder_t* q, const uint8_t* input);
+
+/*! Computes the high-rate parity bits for BG1 and ls_index in {0, 1, 2, 3, 4, 5, 7}.
+ * \param[in]  o      A pointer to an encoder.
+ * \param[out] output The resulting codeword.
+ */
+void encode_high_rate_case1(void* o, uint8_t* output);
+
+/*! Computes the high-rate parity bits for BG1 and ls_index in {6}.
+ * \param[in]  o      A pointer to an encoder.
+ * \param[out] output The resulting codeword.
+ */
+void encode_high_rate_case2(void* o, uint8_t* output);
+
+/*! Computes the high-rate parity bits for BG2 and ls_index in {0, 1, 2, 4, 5, 6}.
+ * \param[in]  o      A pointer to an encoder.
+ * \param[out] output The resulting codeword.
+ */
+void encode_high_rate_case3(void* o, uint8_t* output);
+
+/*! Computes the high-rate parity bits for BG2 and ls_index in {3, 7}.
+ * \param[in]  o      A pointer to an encoder.
+ * \param[out] output The resulting codeword.
+ */
+void encode_high_rate_case4(void* o, uint8_t* output);
+
+/*! Computes the extended-region parity bits.
+ * \param[in]  q      A pointer to an encoder.
+ * \param[out] output The resulting codeword.
+ * \param[in]  n_layers The number of layers to process (when doing rate matching not all
+ *                       layers are needed).
+ */
+void encode_ext_region(srslte_ldpc_encoder_t* q, uint8_t* output, uint8_t n_layers);
+
+/*!
+ * Creates the inner registers required by the optimized LDPC encoder (LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] q A pointer to an encoder.
+ * \return A pointer to the newly created structure of registers.
+ */
+void* create_ldpc_enc_avx2(srslte_ldpc_encoder_t* q);
+
+/*!
+ * Deletes the inner registers of an optimized LDPC encoder (LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] p A pointer to the register structure.
+ */
+void delete_ldpc_enc_avx2(void* p);
+
+/*!
+ * Loads the message in the opimized encoder registers (LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] p        The register structure.
+ * \param[in] input    The message to encode.
+ * \param[in] msg_len  Number of variable nodes in one message.
+ * \param[in] cdwd_len Number of variable nodes in one message.
+ * \param[in] ls       The lifting size.
+ * \return Error code: 0 if correct, -1 otherwise.
+ */
+int load_avx2(void* p, const uint8_t* input, uint8_t msg_len, uint8_t cdwd_len, uint16_t ls);
+
+/*! Extracts the final codeword from the optimized encoder registers (LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in]  p        The register structure.
+ * \param[out] output   The output codeword.
+ * \param[in]  cdwd_len The number of variable nodes (after rate-matching, if enabled).
+ * \param[in]  ls       The lifting size.
+ * \return Error code: 0 if correct, -1 otherwise.
+ */
+int return_codeword_avx2(void* p, uint8_t* output, uint8_t cdwd_len, uint16_t ls);
+
+/*! Computes the product between the first (K - 2) columns of the PCM and the
+ * systematic bits (SIMD-optimized version, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] q     A pointer to an encoder.
+ */
+void preprocess_systematic_bits_avx2(srslte_ldpc_encoder_t* q);
+
+/*! Computes the high-rate parity bits for BG1 and ls_index in {0, 1, 2, 3, 4, 5, 7}
+ * (SIMD-optimized version, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out]  o  A pointer to an encoder.
+ */
+void encode_high_rate_case1_avx2(void* o);
+
+/*! Computes the high-rate parity bits for BG1 and ls_index in {6} (SIMD-optimized version, LS <= \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in,out]  q  A pointer to an encoder.
+ */
+void encode_high_rate_case2_avx2(void* o);
+
+/*! Computes the high-rate parity bits for BG2 and ls_index in {0, 1, 2, 4, 5, 6} (SIMD-optimized version, LS <= \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in,out]  q  A pointer to an encoder.
+ */
+void encode_high_rate_case3_avx2(void* o);
+
+/*! Computes the high-rate parity bits for BG2 and ls_index in {3, 7} (SIMD-optimized version, LS <= \ref
+ * SRSLTE_AVX2_B_SIZE). \param[in,out]  q  A pointer to an encoder.
+ */
+void encode_high_rate_case4_avx2(void* o);
+
+/*! Computes the extended-region parity bits (SIMD-optimized version, LS <= \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out]  q      A pointer to an encoder.
+ * \param[in]  n_layers The number of layers to process (when doing rate matching not all
+ *                       layers are needed).
+ */
+void encode_ext_region_avx2(srslte_ldpc_encoder_t* q, uint8_t n_layers);
+
+/*!
+ * Creates the inner registers required by the optimized LDPC encoder (for LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] q A pointer to an encoder.
+ * \return A pointer to the newly created structure of registers.
+ */
+void* create_ldpc_enc_avx2long(srslte_ldpc_encoder_t* q);
+
+/*!
+ * Deletes the inner registers of an optimized LDPC encoder (LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] p A pointer to the register structure.
+ */
+void delete_ldpc_enc_avx2long(void* p);
+
+/*!
+ * Loads the message in the optimized encoder registers (LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in] p        The register structure.
+ * \param[in] input    The message to encode.
+ * \param[in] msg_len  Number of variable nodes in one message.
+ * \param[in] cdwd_len Number of variable nodes in one message.
+ * \param[in] ls       The lifting size.
+ * \return Error code: 0 if correct, -1 otherwise.
+ */
+int load_avx2long(void* p, const uint8_t* input, uint8_t msg_len, uint8_t cdwd_len, uint16_t ls);
+
+/*! Extracts the final codeword from the optimized encoder registers (LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in]  p        The register structure.
+ * \param[out] output   The output codeword.
+ * \param[in]  cdwd_len The number of variable nodes (after rate-matching, if enabled).
+ * \param[in]  ls       The lifting size.
+ * \return Error code: 0 if correct, -1 otherwise.
+ */
+int return_codeword_avx2long(void* p, uint8_t* output, uint8_t cdwd_len, uint16_t ls);
+
+/*! Computes the product between the first (K - 2) columns of the PCM and the
+ * systematic bits (SIMD-optimized version, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out] q     A pointer to an encoder.
+ */
+void preprocess_systematic_bits_avx2long(srslte_ldpc_encoder_t* q);
+
+/*! Computes the high-rate parity bits for BG1 and ls_index in {0, 1, 2, 3, 4, 5, 7}
+ * (SIMD-optimized version, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out]  o  A pointer to an encoder.
+ */
+void encode_high_rate_case1_avx2long(void* o);
+
+/*! Computes the high-rate parity bits for BG1 and ls_index in {6} (SIMD-optimized version, LS > \ref
+ * SRSLTE_AVX2_B_SIZE).
+ * \param[in,out]  o  A pointer to an encoder.
+ */
+void encode_high_rate_case2_avx2long(void* o);
+
+/*! Computes the high-rate parity bits for BG2 and ls_index in {0, 1, 2, 4, 5, 6} (SIMD-optimized version, LS > \ref
+ * SRSLTE_AVX2_B_SIZE).
+ * \param[in,out]  o  A pointer to an encoder.
+ */
+void encode_high_rate_case3_avx2long(void* o);
+
+/*! Computes the high-rate parity bits for BG2 and ls_index in {3, 7} (SIMD-optimized version, LS > \ref
+ * SRSLTE_AVX2_B_SIZE).
+ * \param[in,out]  o  A pointer to an encoder.
+ */
+void encode_high_rate_case4_avx2long(void* o);
+
+/*! Computes the extended-region parity bits (SIMD-optimized version, LS > \ref SRSLTE_AVX2_B_SIZE).
+ * \param[in,out]  q      A pointer to an encoder.
+ * \param[in]  n_layers The number of layers to process (when doing rate matching not all
+ *                       layers are needed).
+ */
+void encode_ext_region_avx2long(srslte_ldpc_encoder_t* q, uint8_t n_layers);
+
+#endif // SRSLTE_LDPCENC_ALL_H
--- a/lib/src/phy/fec/ldpc/ldpc_enc_avx2.c
+++ b/lib/src/phy/fec/ldpc/ldpc_enc_avx2.c
@ -0,0 +1,442 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_enc_avx2.c
+ * \brief Definition of the LDPC encoder inner functions (AVX2 version, small lifting size).
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_enc_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/vector.h"
+
+#ifdef LV_HAVE_AVX2
+
+#include <immintrin.h>
+
+#include "ldpc_avx2_consts.h"
+
+/*!
+ * \brief Represents a node of the base factor graph.
+ */
+typedef union bg_node_t {
+  uint8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
+  __m256i v;                     /*!< All the lifted nodes of the current base node as a 256-bit line. */
+} bg_node_t;
+
+/*!
+ * \brief Inner registers for the optimized LDPC encoder.
+ */
+struct ldpc_enc_avx2 {
+  bg_node_t* codeword; /*!< \brief Contains the entire codeword, before puncturing. */
+  __m256i*   aux;      /*!< \brief Auxiliary register. */
+};
+
+/*!
+ * Rotate the content of an __m256i vector (first input) towards the left by
+ * the number of chars specified by the second input (i.e., the \b imm * 8 least
+ * significant bits become the \b imm * 8 most significant bits).
+ * \param[in]  a    Vector to circularly shift.
+ * \param[in]  imm  The shift order in chars.
+ * \return          The shifted vector.
+ */
+static __m256i _mm256_rotatelli_si256(__m256i a, int imm);
+
+/*!
+ * Rotate the content of an __m256i vector (first input) towards the right by
+ * the number of chars specified by the second input (i.e., the \b imm * 8 most
+ * significant bits become the \b imm * 8 least significant bits).
+ * \param[in]  a    Vector to circularly shift.
+ * \param[in]  imm  The shift order in chars.
+ * \return          The shifted vector.
+ */
+static __m256i _mm256_rotaterli_si256(__m256i a, int imm);
+
+/*!
+ * Rotate the contents of a node towards the left by \b imm chars, that is the
+ * \b imm * 8 most significant bits become the least significant ones.
+ * \param[in]  a    The node to rotate.
+ * \param[in]  imm  The order of the rotation in number of chars.
+ * \param[in]  ls   The size of the node (lifting size).
+ * \return     The rotated node.
+ */
+static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls);
+
+/*!
+ * Rotate the contents of a node towards the right by \b imm chars, that is the
+ * \b imm * 8 most significant bits become the least significant ones.
+ * \param[in]  a    The node to rotate.
+ * \param[in]  imm  The order of the rotation in number of chars.
+ * \param[in]  ls   The size of the node (lifting size).
+ * \return     The rotated node.
+ */
+static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls);
+
+void* create_ldpc_enc_avx2(srslte_ldpc_encoder_t* q)
+{
+  struct ldpc_enc_avx2* vp = NULL;
+
+  if ((vp = malloc(sizeof(struct ldpc_enc_avx2))) == NULL) {
+    return NULL;
+  }
+
+  if ((vp->codeword = srslte_vec_malloc(q->bgN * sizeof(bg_node_t))) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->aux = srslte_vec_malloc(q->bgM * sizeof(__m256i))) == NULL) {
+    free(vp->codeword);
+    free(vp);
+    return NULL;
+  }
+
+  return vp;
+}
+
+void delete_ldpc_enc_avx2(void* p)
+{
+  struct ldpc_enc_avx2* vp = p;
+
+  if (vp != NULL) {
+    free(vp->aux);
+    free(vp->codeword);
+    free(vp);
+  }
+}
+
+int load_avx2(void* p, const uint8_t* input, const uint8_t msg_len, const uint8_t cdwd_len, const uint16_t ls)
+{
+  struct ldpc_enc_avx2* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int i = 0;
+  int k = 0;
+  for (; i < msg_len; i++) {
+    for (k = 0; k < ls; k++) {
+      vp->codeword[i].c[k] = input[i * ls + k];
+    }
+    bzero(&(vp->codeword[i].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(uint8_t));
+  }
+
+  bzero(vp->codeword + i, (cdwd_len - msg_len) * sizeof(__m256i));
+
+  return 0;
+}
+
+int return_codeword_avx2(void* p, uint8_t* output, const uint8_t cdwd_len, const uint16_t ls)
+{
+  struct ldpc_enc_avx2* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int k = 0;
+  for (int i = 0; i < cdwd_len - 2; i++) {
+    for (k = 0; k < ls; k++) {
+      output[i * ls + k] = vp->codeword[i + 2].c[k];
+    }
+  }
+  return 0;
+}
+
+void encode_ext_region_avx2(srslte_ldpc_encoder_t* q, uint8_t n_layers)
+{
+  struct ldpc_enc_avx2* vp = q->ptr;
+
+  int m    = 0;
+  int skip = 0;
+  int k    = 0;
+
+  uint16_t* this_shift = NULL;
+
+  __m256i tmp_epi8;
+
+  // Encode the extended region. In case of puncturing or IR-HARQ, we could focus on
+  // specific check nodes instead of processing all of them from m = 4 to m = M - 1.
+  for (m = 4; m < n_layers; m++) {
+    skip = q->bgK + m;
+
+    // the systematic part has already been computed
+    vp->codeword[skip].v = vp->aux[m];
+
+    // sum the contribution due to the high-rate region, with the proper circular shifts
+    for (k = 0; k < 4; k++) {
+      this_shift = q->pcm + q->bgK + k + m * q->bgN;
+      if (*this_shift != NO_CNCT) {
+        tmp_epi8             = rotate_node_right(vp->codeword[q->bgK + k].v, *this_shift, q->ls);
+        vp->codeword[skip].v = _mm256_xor_si256(vp->codeword[skip].v, tmp_epi8);
+      }
+    }
+  }
+}
+
+void preprocess_systematic_bits_avx2(srslte_ldpc_encoder_t* q)
+{
+  struct ldpc_enc_avx2* vp = q->ptr;
+
+  int       N   = q->bgN;
+  int       K   = q->bgK;
+  int       M   = q->bgM;
+  int       ls  = q->ls;
+  uint16_t* pcm = q->pcm;
+
+  int       k          = 0;
+  int       m          = 0;
+  uint16_t* this_shift = NULL;
+
+  __m256i tmp_epi8;
+
+  bzero(vp->aux, M * sizeof(__m256i));
+
+  // split the input message into K chunks of ls bits each and, for all chunks
+  for (k = 0; k < K; k++) {
+    // for all check nodes
+    // NB: if looking for performance you can do the following loop only over the high-rate
+    // region of the PCM (m=0,1,2,3) and over the check nodes that result in a transmitted
+    // coded bit after puncturing or IR-HARQ (see Deliverable D1 Section 3.4).
+    for (m = 0; m < M; m++) {
+      // entry of pcm corresponding to the current input chunk and the current check node
+      this_shift = pcm + k + m * N;
+
+      // xor array aux[m] with a circularly shifted version of the current input chunk, unless
+      // the current check node and variable node are not connected.
+      if (*this_shift != NO_CNCT) {
+        tmp_epi8   = rotate_node_right(vp->codeword[k].v, *this_shift, ls);
+        tmp_epi8   = _mm256_and_si256(tmp_epi8, one_epi8);
+        vp->aux[m] = _mm256_xor_si256(vp->aux[m], tmp_epi8);
+      }
+    }
+  }
+}
+
+void encode_high_rate_case1_avx2(void* o)
+{
+  srslte_ldpc_encoder_t* q  = o;
+  struct ldpc_enc_avx2*  vp = q->ptr;
+
+  int ls = q->ls;
+
+  int skip0 = q->bgK;
+  int skip1 = q->bgK + 1;
+  int skip2 = q->bgK + 2;
+  int skip3 = q->bgK + 3;
+
+  // first chunk of parity bits
+  vp->codeword[skip0].v = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
+  vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[2]);
+  vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[3]);
+
+  __m256i tmp_epi8 = rotate_node_right(vp->codeword[skip0].v, 1, ls);
+  // second chunk of parity bits
+  vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], tmp_epi8);
+  // fourth chunk of parity bits
+  vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], tmp_epi8);
+  // third chunk of parity bits
+  vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[2], vp->codeword[skip3].v);
+}
+
+void encode_high_rate_case2_avx2(void* o)
+{
+  srslte_ldpc_encoder_t* q  = o;
+  struct ldpc_enc_avx2*  vp = q->ptr;
+
+  int ls = q->ls;
+
+  int skip0 = q->bgK;
+  int skip1 = q->bgK + 1;
+  int skip2 = q->bgK + 2;
+  int skip3 = q->bgK + 3;
+
+  // first chunk of parity bits
+  __m256i tmp_epi8      = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
+  tmp_epi8              = _mm256_xor_si256(tmp_epi8, vp->aux[2]);
+  tmp_epi8              = _mm256_xor_si256(tmp_epi8, vp->aux[3]);
+  vp->codeword[skip0].v = rotate_node_left(tmp_epi8, 105 % ls, ls);
+
+  // second chunk of parity bits
+  vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], vp->codeword[skip0].v);
+  // fourth chunk of parity bits
+  vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], vp->codeword[skip0].v);
+  // third chunk of parity bits
+  vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[2], vp->codeword[skip3].v);
+}
+
+void encode_high_rate_case3_avx2(void* o)
+{
+  srslte_ldpc_encoder_t* q  = o;
+  struct ldpc_enc_avx2*  vp = q->ptr;
+
+  int ls = q->ls;
+
+  int skip0 = q->bgK;
+  int skip1 = q->bgK + 1;
+  int skip2 = q->bgK + 2;
+  int skip3 = q->bgK + 3;
+
+  // first chunk of parity bits
+  __m256i tmp_epi8      = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
+  tmp_epi8              = _mm256_xor_si256(tmp_epi8, vp->aux[2]);
+  tmp_epi8              = _mm256_xor_si256(tmp_epi8, vp->aux[3]);
+  vp->codeword[skip0].v = rotate_node_left(tmp_epi8, 1, ls);
+
+  // second chunk of parity bits
+  vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], vp->codeword[skip0].v);
+  // third chunk of parity bits
+  vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[1], vp->codeword[skip1].v);
+  // fourth chunk of parity bits
+  vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], vp->codeword[skip0].v);
+}
+
+void encode_high_rate_case4_avx2(void* o)
+{
+  srslte_ldpc_encoder_t* q  = o;
+  struct ldpc_enc_avx2*  vp = q->ptr;
+
+  int ls = q->ls;
+
+  int skip0 = q->bgK;
+  int skip1 = q->bgK + 1;
+  int skip2 = q->bgK + 2;
+  int skip3 = q->bgK + 3;
+
+  // first chunk of parity bits
+  vp->codeword[skip0].v = _mm256_xor_si256(vp->aux[0], vp->aux[1]);
+  vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[2]);
+  vp->codeword[skip0].v = _mm256_xor_si256(vp->codeword[skip0].v, vp->aux[3]);
+
+  __m256i tmp_epi8 = rotate_node_right(vp->codeword[skip0].v, 1, ls);
+  // second chunk of parity bits
+  vp->codeword[skip1].v = _mm256_xor_si256(vp->aux[0], tmp_epi8);
+  // third chunk of parity bits
+  vp->codeword[skip2].v = _mm256_xor_si256(vp->aux[1], vp->codeword[skip1].v);
+  // fourth chunk of parity bits
+  vp->codeword[skip3].v = _mm256_xor_si256(vp->aux[3], tmp_epi8);
+}
+
+static __m256i _mm256_rotatelli_si256(__m256i a, int imm)
+{
+  __m256i rotated_block_a[4];
+
+  // rotate left a as if made of 64-bit blocks: rotated_block_a[i] contains the
+  // rotation by i units
+  rotated_block_a[0] = a;                                // blocks 0 - 1 - 2 - 3
+  rotated_block_a[1] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
+  rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78);  // 2 - 3 - 0 - 1
+  rotated_block_a[3] = _mm256_permute4x64_epi64(a, 57);  // 1 - 2 - 3 - 0
+
+  // rotation index we are interested in
+  int step1 = imm / 8;
+  // small-step rotation
+  int left = imm % 8;
+  // next block, for carry-over
+  int step2 = (step1 + 1) % 4;
+
+  // shift right each block
+  __m256i reg1 = _mm256_slli_epi64(rotated_block_a[step1], left * 8);
+  // carry-over from the next block
+  __m256i reg2 = _mm256_srli_epi64(rotated_block_a[step2], (8 - left) * 8);
+
+  return _mm256_xor_si256(reg1, reg2);
+}
+
+static __m256i _mm256_rotaterli_si256(__m256i a, int imm)
+{
+  __m256i rotated_block_a[4];
+
+  // rotate right a as if made of 64-bit blocks: rotated_block_a[i] contains the
+  // rotation by i units
+  rotated_block_a[0] = a;                                // blocks 0 - 1 - 2 - 3
+  rotated_block_a[1] = _mm256_permute4x64_epi64(a, 57);  // 1 - 2 - 3 - 0
+  rotated_block_a[2] = _mm256_permute4x64_epi64(a, 78);  // 2 - 3 - 0 - 1
+  rotated_block_a[3] = _mm256_permute4x64_epi64(a, 147); // 3 - 0 - 1 - 2
+
+  // rotation index we are interested in
+  int step1 = imm / 8;
+  // small-step rotation
+  int left = imm % 8;
+  // next block, for carry-over
+  int step2 = (step1 + 1) % 4;
+
+  // shift right each block
+  __m256i reg1 = _mm256_srli_epi64(rotated_block_a[step1], left * 8);
+  // carry-over from the next block
+  __m256i reg2 = _mm256_slli_epi64(rotated_block_a[step2], (8 - left) * 8);
+
+  return _mm256_xor_si256(reg1, reg2);
+}
+
+static __m256i rotate_node_left(__m256i a, int imm, uint16_t ls)
+{
+  if (imm == 0) {
+    return a;
+  }
+  __m256i step1 = _mm256_rotatelli_si256(a, imm);
+  if (ls == SRSLTE_AVX2_B_SIZE) {
+    return step1;
+  }
+
+  __m256i step2 = _mm256_rotaterli_si256(a, ls - imm);
+
+  step1 = _mm256_and_si256(step1, mask_most_epi8[imm]);
+  step2 = _mm256_and_si256(step2, mask_least_epi8[imm]);
+
+  step1 = _mm256_xor_si256(step1, step2);
+
+  return step1;
+  ;
+}
+
+static __m256i rotate_node_right(__m256i a, int imm, uint16_t ls)
+{
+  if (imm == 0) {
+    return a;
+  }
+  __m256i step1 = _mm256_rotaterli_si256(a, imm);
+  if (ls == SRSLTE_AVX2_B_SIZE) {
+    return step1;
+  }
+
+  __m256i step2 = _mm256_rotatelli_si256(a, ls - imm);
+
+  step1 = _mm256_and_si256(step1, mask_least_epi8[ls - imm]);
+  step2 = _mm256_and_si256(step2, mask_most_epi8[ls - imm]);
+
+  step1 = _mm256_xor_si256(step1, step2);
+
+  return step1;
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/ldpc/ldpc_enc_avx2long.c
+++ b/lib/src/phy/fec/ldpc/ldpc_enc_avx2long.c
@ -0,0 +1,403 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_enc_avx2long.c
+ * \brief Definition of the LDPC encoder inner functions (AVX2 version, large lifting size).
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_enc_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/vector.h"
+
+#ifdef LV_HAVE_AVX2
+
+#include <immintrin.h>
+
+#include "ldpc_avx2_consts.h"
+
+/*!
+ * \brief Represents a node of the base factor graph.
+ */
+typedef union bg_node_t {
+  uint8_t c[SRSLTE_AVX2_B_SIZE]; /*!< Each base node may contain up to \ref SRSLTE_AVX2_B_SIZE lifted nodes. */
+  __m256i v;                     /*!< All the lifted nodes of the current base node as a 256-bit line. */
+} bg_node_t;
+
+/*!
+ * \brief Inner registers for the optimized LDPC encoder.
+ */
+struct ldpc_enc_avx2long {
+  bg_node_t* codeword;     /*!< \brief Contains the entire codeword, before puncturing. */
+  __m256i*   aux;          /*!< \brief Auxiliary register. */
+  __m256i*   rotated_node; /*!< \brief To store rotated versions of the nodes. */
+
+  uint8_t n_subnodes; /*!< \brief Number of subnodes. */
+};
+
+/*!
+ * Rotate the contents of a node towards the right by \b shift chars, that is the
+ * \b shift * 8 most significant bits become the least significant ones.
+ * \param[in]  in_256i    The node to rotate.
+ * \param[out] out        The rotated node.
+ * \param[in]  shift      The order of the rotation in number of chars.
+ * \param[in]  ls         The size of the node (lifting size).
+ * \param[in]  n_subnodes The number of subnodes in each node.
+ * \return     The rotated node.
+ */
+static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes);
+
+void* create_ldpc_enc_avx2long(srslte_ldpc_encoder_t* q)
+{
+  struct ldpc_enc_avx2long* vp = NULL;
+
+  if ((vp = malloc(sizeof(struct ldpc_enc_avx2long))) == NULL) {
+    return NULL;
+  }
+
+  int left_out   = q->ls % SRSLTE_AVX2_B_SIZE;
+  vp->n_subnodes = q->ls / SRSLTE_AVX2_B_SIZE + (left_out > 0);
+
+  if ((vp->codeword = srslte_vec_malloc(q->bgN * vp->n_subnodes * sizeof(bg_node_t))) == NULL) {
+    free(vp);
+    return NULL;
+  }
+
+  if ((vp->aux = srslte_vec_malloc(q->bgM * vp->n_subnodes * sizeof(__m256i))) == NULL) {
+    free(vp->codeword);
+    free(vp);
+    return NULL;
+  }
+
+  // for some reason, the software stops with a segmentation fault when ls is a multiple of 32
+  // if we don't add the extra block.
+  if ((vp->rotated_node = srslte_vec_malloc((vp->n_subnodes + 1) * sizeof(__m256i))) == NULL) {
+    free(vp->aux);
+    free(vp->codeword);
+    free(vp);
+    return NULL;
+  }
+
+  return vp;
+}
+
+void delete_ldpc_enc_avx2long(void* p)
+{
+  struct ldpc_enc_avx2long* vp = p;
+
+  if (vp != NULL) {
+    free(vp->rotated_node);
+    free(vp->aux);
+    free(vp->codeword);
+    free(vp);
+  }
+}
+
+int load_avx2long(void* p, const uint8_t* input, const uint8_t msg_len, const uint8_t cdwd_len, const uint16_t ls)
+{
+  struct ldpc_enc_avx2long* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int k = 0;
+  int j = 0;
+  int i = 0;
+  for (; i < msg_len; i++) {
+    for (j = 0; j < vp->n_subnodes - 1; j++) {
+      for (k = 0; k < SRSLTE_AVX2_B_SIZE; k++) {
+        vp->codeword[i * vp->n_subnodes + j].c[k] = input[i * ls + j * SRSLTE_AVX2_B_SIZE + k];
+      }
+    }
+    // j is now equal to (vp->n_subnodes - 1)
+    for (k = 0; k < ls - j * SRSLTE_AVX2_B_SIZE; k++) {
+      vp->codeword[i * vp->n_subnodes + j].c[k] = input[i * ls + j * SRSLTE_AVX2_B_SIZE + k];
+    }
+    bzero(&(vp->codeword[i * vp->n_subnodes + j].c[k]), (SRSLTE_AVX2_B_SIZE - k) * sizeof(uint8_t));
+  }
+
+  bzero(vp->codeword + i * vp->n_subnodes, (cdwd_len - msg_len) * vp->n_subnodes * sizeof(__m256i));
+  return 0;
+}
+
+int return_codeword_avx2long(void* p, uint8_t* output, const uint8_t cdwd_len, const uint16_t ls)
+{
+  struct ldpc_enc_avx2long* vp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  int k = 0;
+  int j = 0;
+  for (int i = 0; i < cdwd_len - 2; i++) {
+    for (j = 0; j < vp->n_subnodes - 1; j++) {
+      for (k = 0; k < SRSLTE_AVX2_B_SIZE; k++) {
+        output[i * ls + j * SRSLTE_AVX2_B_SIZE + k] = vp->codeword[(i + 2) * vp->n_subnodes + j].c[k];
+      }
+    }
+    // j is now equal to vp->n_subndes-1
+    for (k = 0; k < ls - j * SRSLTE_AVX2_B_SIZE; k++) {
+      output[i * ls + j * SRSLTE_AVX2_B_SIZE + k] = vp->codeword[(i + 2) * vp->n_subnodes + j].c[k];
+    }
+  }
+  return 0;
+}
+
+void encode_ext_region_avx2long(srslte_ldpc_encoder_t* q, uint8_t n_layers)
+{
+  struct ldpc_enc_avx2long* vp = q->ptr;
+
+  int m    = 0;
+  int skip = 0;
+  int k    = 0;
+  int j    = 0;
+
+  uint16_t* this_shift = NULL;
+
+  // Encode the extended region. In case of puncturing or IR-HARQ, we could focus on
+  // specific check nodes instead of processing all of them from m = 4 to m = M - 1.
+  for (m = 4; m < n_layers; m++) {
+    skip = (q->bgK + m) * vp->n_subnodes;
+
+    // the systematic part has already been computed
+    for (j = 0; j < vp->n_subnodes; j++) {
+      vp->codeword[skip + j].v = vp->aux[m * vp->n_subnodes + j];
+    }
+
+    // sum the contribution due to the high-rate region, with the proper circular shifts
+    for (k = 0; k < 4; k++) {
+      this_shift = q->pcm + q->bgK + k + m * q->bgN;
+
+      // xor array aux[m] with a circularly shifted version of the current input chunk, unless
+      // the current check node and variable node are not connected.
+      if (*this_shift != NO_CNCT) {
+        rotate_node_right(
+            &(vp->codeword[(q->bgK + k) * vp->n_subnodes].v), vp->rotated_node, *this_shift, q->ls, vp->n_subnodes);
+        for (j = 0; j < vp->n_subnodes; j++) {
+          vp->codeword[skip + j].v = _mm256_xor_si256(vp->codeword[skip + j].v, vp->rotated_node[j]);
+        }
+      }
+    }
+  }
+}
+
+void preprocess_systematic_bits_avx2long(srslte_ldpc_encoder_t* q)
+{
+  struct ldpc_enc_avx2long* vp = q->ptr;
+
+  int       N   = q->bgN;
+  int       K   = q->bgK;
+  int       M   = q->bgM;
+  int       ls  = q->ls;
+  uint16_t* pcm = q->pcm;
+
+  int       k          = 0;
+  int       m          = 0;
+  int       j          = 0;
+  uint16_t* this_shift = NULL;
+
+  __m256i tmp_epi8;
+
+  bzero(vp->aux, M * vp->n_subnodes * sizeof(__m256i));
+
+  // split the input message into K chunks of ls bits each and, for all chunks
+  for (k = 0; k < K; k++) {
+    // for all check nodes
+    // NB: if looking for performance you can do the following loop only over the high-rate
+    // region of the PCM (m=0,1,2,3) and over the check nodes that result in a transmitted
+    // coded bit after puncturing or IR-HARQ (see Deliverable D1 Section 3.4).
+    for (m = 0; m < M; m++) {
+      // entry of pcm corresponding to the current input chunk and the current check node
+      this_shift = pcm + k + m * N;
+
+      // xor array aux[m] with a circularly shifted version of the current input chunk, unless
+      // the current check node and variable node are not connected.
+      if (*this_shift != NO_CNCT) {
+        rotate_node_right(&(vp->codeword[k * vp->n_subnodes].v), vp->rotated_node, *this_shift, ls, vp->n_subnodes);
+        for (j = 0; j < vp->n_subnodes; j++) {
+          tmp_epi8                        = _mm256_and_si256(vp->rotated_node[j], one_epi8);
+          vp->aux[m * vp->n_subnodes + j] = _mm256_xor_si256(vp->aux[m * vp->n_subnodes + j], tmp_epi8);
+        }
+      }
+    }
+  }
+}
+
+void encode_high_rate_case1_avx2long(void* o)
+{
+  srslte_ldpc_encoder_t*    q  = o;
+  struct ldpc_enc_avx2long* vp = q->ptr;
+
+  int ls = q->ls;
+  int j  = 0;
+
+  int skip0 = q->bgK * vp->n_subnodes;
+  int skip1 = (q->bgK + 1) * vp->n_subnodes;
+  int skip2 = (q->bgK + 2) * vp->n_subnodes;
+  int skip3 = (q->bgK + 3) * vp->n_subnodes;
+
+  // first chunk of parity bits
+  for (j = 0; j < vp->n_subnodes; j++) {
+    vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
+    vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[2 * vp->n_subnodes + j]);
+    vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[3 * vp->n_subnodes + j]);
+  }
+
+  rotate_node_right(&(vp->codeword[skip0].v), vp->rotated_node, 1, ls, vp->n_subnodes);
+  for (j = 0; j < vp->n_subnodes; j++) {
+    // second chunk of parity bits
+    vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->rotated_node[j]);
+    // fourth chunk of parity bits
+    vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->rotated_node[j]);
+    // third chunk of parity bits
+    vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[2 * vp->n_subnodes + j], vp->codeword[skip3 + j].v);
+  }
+}
+
+void encode_high_rate_case2_avx2long(void* o)
+{
+  srslte_ldpc_encoder_t*    q  = o;
+  struct ldpc_enc_avx2long* vp = q->ptr;
+
+  int ls = q->ls;
+  int j  = 0;
+
+  int skip0 = q->bgK * vp->n_subnodes;
+  int skip1 = (q->bgK + 1) * vp->n_subnodes;
+  int skip2 = (q->bgK + 2) * vp->n_subnodes;
+  int skip3 = (q->bgK + 3) * vp->n_subnodes;
+
+  // first chunk of parity bits
+  for (j = 0; j < vp->n_subnodes; j++) {
+    vp->rotated_node[j] = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
+    vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[2 * vp->n_subnodes + j]);
+    vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[3 * vp->n_subnodes + j]);
+  }
+  rotate_node_right(vp->rotated_node, &(vp->codeword[skip0].v), ls - 105 % ls, ls, vp->n_subnodes);
+
+  for (j = 0; j < vp->n_subnodes; j++) {
+    // second chunk of parity bits
+    vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->codeword[skip0 + j].v);
+    // fourth chunk of parity bits
+    vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->codeword[skip0 + j].v);
+    // third chunk of parity bits
+    vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[2 * vp->n_subnodes + j], vp->codeword[skip3 + j].v);
+  }
+}
+
+void encode_high_rate_case3_avx2long(void* o)
+{
+  srslte_ldpc_encoder_t*    q  = o;
+  struct ldpc_enc_avx2long* vp = q->ptr;
+
+  int ls = q->ls;
+  int j  = 0;
+
+  int skip0 = q->bgK * vp->n_subnodes;
+  int skip1 = (q->bgK + 1) * vp->n_subnodes;
+  int skip2 = (q->bgK + 2) * vp->n_subnodes;
+  int skip3 = (q->bgK + 3) * vp->n_subnodes;
+
+  // first chunk of parity bits
+  for (j = 0; j < vp->n_subnodes; j++) {
+    vp->rotated_node[j] = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
+    vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[2 * vp->n_subnodes + j]);
+    vp->rotated_node[j] = _mm256_xor_si256(vp->rotated_node[j], vp->aux[3 * vp->n_subnodes + j]);
+  }
+  rotate_node_right(vp->rotated_node, &(vp->codeword[skip0].v), ls - 1, ls, vp->n_subnodes);
+
+  for (j = 0; j < vp->n_subnodes; j++) {
+    // second chunk of parity bits
+    vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->codeword[skip0 + j].v);
+    // third chunk of parity bits
+    vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[vp->n_subnodes + j], vp->codeword[skip1 + j].v);
+    // fourth chunk of parity bits
+    vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->codeword[skip0 + j].v);
+  }
+}
+
+void encode_high_rate_case4_avx2long(void* o)
+{
+  srslte_ldpc_encoder_t*    q  = o;
+  struct ldpc_enc_avx2long* vp = q->ptr;
+
+  int ls = q->ls;
+  int j  = 0;
+
+  int skip0 = q->bgK * vp->n_subnodes;
+  int skip1 = (q->bgK + 1) * vp->n_subnodes;
+  int skip2 = (q->bgK + 2) * vp->n_subnodes;
+  int skip3 = (q->bgK + 3) * vp->n_subnodes;
+
+  // first chunk of parity bits
+  for (j = 0; j < vp->n_subnodes; j++) {
+    vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->aux[j], vp->aux[vp->n_subnodes + j]);
+    vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[2 * vp->n_subnodes + j]);
+    vp->codeword[skip0 + j].v = _mm256_xor_si256(vp->codeword[skip0 + j].v, vp->aux[3 * vp->n_subnodes + j]);
+  }
+
+  rotate_node_right(&(vp->codeword[skip0].v), vp->rotated_node, 1, ls, vp->n_subnodes);
+  for (j = 0; j < vp->n_subnodes; j++) {
+    // second chunk of parity bits
+    vp->codeword[skip1 + j].v = _mm256_xor_si256(vp->aux[j], vp->rotated_node[j]);
+    // third chunk of parity bits
+    vp->codeword[skip2 + j].v = _mm256_xor_si256(vp->aux[vp->n_subnodes + j], vp->codeword[skip1 + j].v);
+    // fourth chunk of parity bits
+    vp->codeword[skip3 + j].v = _mm256_xor_si256(vp->aux[3 * vp->n_subnodes + j], vp->rotated_node[j]);
+  }
+}
+
+static void rotate_node_right(const __m256i* in_256i, __m256i* out, uint16_t shift, uint16_t ls, int8_t n_subnodes)
+{
+  const int8_t* in = (const int8_t*)in_256i;
+
+  int16_t n_type1 = (ls - shift) / SRSLTE_AVX2_B_SIZE - (ls == SRSLTE_AVX2_B_SIZE);
+  int16_t n_type2 = n_subnodes - n_type1 - 1 - (ls == SRSLTE_AVX2_B_SIZE);
+  int16_t gap     = (ls - shift) % SRSLTE_AVX2_B_SIZE;
+
+  int16_t i = 0;
+  for (; i < n_type1; i++) {
+    out[i] = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
+  }
+
+  __m256i tmp1 = _mm256_loadu_si256((const __m256i*)(in + shift + i * SRSLTE_AVX2_B_SIZE));
+  __m256i tmp2 = _mm256_loadu_si256((const __m256i*)(in - gap));
+
+  out[i] = _mm256_blendv_epi8(tmp1, tmp2, mask_most_epi8[gap]);
+
+  for (i = 1; i <= n_type2; i++) {
+    out[n_type1 + i] = _mm256_loadu_si256((const __m256i*)(in - gap + i * SRSLTE_AVX2_B_SIZE));
+  }
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/ldpc/ldpc_enc_c.c
+++ b/lib/src/phy/fec/ldpc/ldpc_enc_c.c
@ -0,0 +1,221 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_enc_c.c
+ * \brief Definition of the LDPC encoder inner functions (not optimized).
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/utils/debug.h"
+
+void encode_ext_region(srslte_ldpc_encoder_t* q, uint8_t* output, uint8_t n_layers)
+{
+
+  uint8_t(*aux)[q->ls] = q->ptr;
+
+  int m    = 0;
+  int skip = 0;
+  int i    = 0;
+  int k    = 0;
+
+  uint16_t* this_shift = NULL;
+
+  uint8_t tmp_out = 0;
+
+  // Encode the extended region. In case of puncturing or IR-HARQ, we could focus on
+  // specific check nodes instead of processing all of them from m = 4 to m = M - 1.
+  for (m = 4; m < n_layers; m++) {
+    skip = (q->bgK + m - 2) * q->ls;
+    for (i = 0; i < q->ls; i++) {
+      // the systematic part has already been computed
+      output[skip + i] = aux[m][i];
+      // sum the contribution due to the high-rate region, with the proper circular shifts
+      for (k = 0; k < 4; k++) {
+        this_shift = q->pcm + q->bgK + k + m * q->bgN;
+        if (*this_shift != NO_CNCT) {
+          tmp_out = *(output + (q->bgK - 2 + k) * q->ls + ((i + *this_shift) % q->ls));
+          output[skip + i] ^= tmp_out;
+        }
+      }
+    }
+  }
+}
+
+void preprocess_systematic_bits(srslte_ldpc_encoder_t* q, const uint8_t* input)
+{
+  uint8_t(*aux)[q->ls] = q->ptr;
+
+  int       N   = q->bgN;
+  int       K   = q->bgK;
+  int       M   = q->bgM;
+  int       ls  = q->ls;
+  uint16_t* pcm = q->pcm;
+
+  int            i             = 0;
+  int            k             = 0;
+  int            m             = 0;
+  uint16_t*      this_shift    = NULL;
+  const uint8_t* this_in_chunk = NULL;
+
+  bzero(aux, M * ls * sizeof(uint8_t));
+
+  // split the input message into K chunks of ls bits each and, for all chunks
+  for (k = 0; k < K; k++) {
+    this_in_chunk = input + k * ls;
+    // for all check nodes
+    // NB: if looking for performance you can do the following loop only over the high-rate
+    // region of the PCM (m=0,1,2,3) and over the check nodes that result in a transmitted
+    // coded bit after puncturing or IR-HARQ (see Deliverable D1 Section 3.4).
+    for (m = 0; m < M; m++) {
+      // entry of pcm corresponding to the current input chunk and the current check node
+      this_shift = pcm + k + m * N;
+
+      // xor array aux[m] with a circularly shifted version of the current input chunk, unless
+      // the current check node and variable node are not connected.
+      for (i = 0; i < ls; i++) {
+        // mask with 1 to remove the filler bit flag
+        aux[m][i] ^= *this_shift != NO_CNCT ? 1U & (*(this_in_chunk + ((i + *this_shift) % ls))) : 0;
+      }
+    }
+  }
+}
+
+void encode_high_rate_case1(void* q_, uint8_t* output)
+{
+  srslte_ldpc_encoder_t* q = (srslte_ldpc_encoder_t*)q_;
+  uint8_t(*aux)[q->ls]     = q->ptr;
+
+  int ls = q->ls;
+  int k  = 0;
+
+  int skip0 = (q->bgK - 2) * ls;
+  int skip1 = (q->bgK - 1) * ls;
+  int skip2 = q->bgK * ls;
+  int skip3 = (q->bgK + 1) * ls;
+  for (k = 0; k < ls; k++) {
+    // first chunk of parity bits
+    output[skip0 + k] = aux[0][k] ^ aux[1][k];
+    output[skip0 + k] ^= aux[2][k];
+    output[skip0 + k] ^= aux[3][k];
+  }
+  for (k = 0; k < ls; k++) {
+    // second chunk of parity bits
+    output[skip1 + k] = aux[0][k] ^ output[skip0 + ((k + 1) % ls)];
+    // fourth chunk of parity bits
+    output[skip3 + k] = aux[3][k] ^ output[skip0 + ((k + 1) % ls)];
+    // third chunk of parity bits
+    output[skip2 + k] = aux[2][k] ^ output[skip3 + k];
+  }
+}
+
+void encode_high_rate_case2(srslte_ldpc_encoder_t* q, uint8_t* output)
+{
+  uint8_t(*aux)[q->ls] = q->ptr;
+
+  int ls = q->ls;
+  int i  = 0;
+  int k  = 0;
+
+  int skip0 = (q->bgK - 2) * ls;
+  int skip1 = (q->bgK - 1) * ls;
+  int skip2 = q->bgK * ls;
+  int skip3 = (q->bgK + 1) * ls;
+  for (k = 0; k < ls; k++) {
+    i = (k - 105) % ls;
+    i = i >= 0 ? i : i + ls;
+
+    // first chunk of parity bits
+    output[skip0 + k] = aux[0][i] ^ aux[1][i];
+    output[skip0 + k] ^= aux[2][i];
+    output[skip0 + k] ^= aux[3][i];
+    // second chunk of parity bits
+    output[skip1 + k] = aux[0][k] ^ output[skip0 + k];
+    // fourth chunk of parity bits
+    output[skip3 + k] = aux[3][k] ^ output[skip0 + k];
+    // third chunk of parity bits
+    output[skip2 + k] = aux[2][k] ^ output[skip3 + k];
+  }
+}
+
+void encode_high_rate_case3(srslte_ldpc_encoder_t* q, uint8_t* output)
+{
+  uint8_t(*aux)[q->ls] = q->ptr;
+
+  int ls = q->ls;
+  int i  = 0;
+  int k  = 0;
+
+  int skip0 = (q->bgK - 2) * ls;
+  int skip1 = (q->bgK - 1) * ls;
+  int skip2 = q->bgK * ls;
+  int skip3 = (q->bgK + 1) * ls;
+  for (k = 0; k < ls; k++) {
+    i = (k - 1) % ls;
+    i = i >= 0 ? i : i + ls;
+
+    // first chunk of parity bits
+    output[skip0 + k] = aux[0][i] ^ aux[1][i];
+    output[skip0 + k] ^= aux[2][i];
+    output[skip0 + k] ^= aux[3][i];
+    // second chunk of parity bits
+    output[skip1 + k] = aux[0][k] ^ output[skip0 + k];
+    // third chunk of parity bits
+    output[skip2 + k] = aux[1][k] ^ output[skip1 + k];
+    // fourth chunk of parity bits
+    output[skip3 + k] = aux[3][k] ^ output[skip0 + k];
+  }
+}
+
+void encode_high_rate_case4(srslte_ldpc_encoder_t* q, uint8_t* output)
+{
+  uint8_t(*aux)[q->ls] = q->ptr;
+
+  int ls = q->ls;
+  int k  = 0;
+
+  int skip0 = (q->bgK - 2) * ls;
+  int skip1 = (q->bgK - 1) * ls;
+  int skip2 = q->bgK * ls;
+  int skip3 = (q->bgK + 1) * ls;
+  for (k = 0; k < ls; k++) {
+    // first chunk of parity bits
+    output[skip0 + k] = aux[0][k] ^ aux[1][k];
+    output[skip0 + k] ^= aux[2][k];
+    output[skip0 + k] ^= aux[3][k];
+  }
+  for (k = 0; k < ls; k++) {
+    // second chunk of parity bits
+    output[skip1 + k] = aux[0][k] ^ output[skip0 + ((k + 1) % ls)];
+    // third chunk of parity bits
+    output[skip2 + k] = aux[1][k] ^ output[skip1 + k];
+    // fourth chunk of parity bits
+    output[skip3 + k] = aux[3][k] ^ output[skip0 + ((k + 1) % ls)];
+  }
+}
--- a/lib/src/phy/fec/ldpc/ldpc_encoder.c
+++ b/lib/src/phy/fec/ldpc/ldpc_encoder.c
@ -0,0 +1,390 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_encoder.c
+ * \brief Definition of the LDPC encoder.
+ * \author David Gregoratti (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <stdint.h>
+
+#include "../utils_avx2.h"
+#include "ldpc_enc_all.h"
+#include "srslte/phy/fec/ldpc/base_graph.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/vector.h"
+
+/*! Carries out the actual destruction of the memory allocated to the encoder. */
+static void free_enc_c(void* o)
+{
+  srslte_ldpc_encoder_t* q = o;
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  if (q->ptr) {
+    free(q->ptr);
+  }
+}
+
+/*! Carries out the actual encoding with a non-optimized encoder. */
+static int encode_c(void* o, const uint8_t* input, uint8_t* output, uint32_t input_length, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_encoder_t* q = o;
+
+  if (input_length / q->bgK != q->ls) {
+    perror("Dimension mismatch.\n");
+    return -1;
+  }
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+
+  // systematic bits
+  int skip_in = 2 * q->ls;
+  for (int k = 0; k < (q->bgK - 2) * q->ls; k++) {
+    output[k] = input[skip_in + k];
+  }
+
+  preprocess_systematic_bits(q, input);
+
+  q->encode_high_rate(q, output);
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  encode_ext_region(q, output, n_layers);
+
+  return 0;
+}
+
+/*! Initializes a non-optimized encoder. */
+static int init_c(srslte_ldpc_encoder_t* q)
+{
+  int ls_index = get_ls_index(q->ls);
+
+  if (ls_index == VOID_LIFTSIZE) {
+    ERROR("Invalid lifting size %d\n", q->ls);
+    return -1;
+  }
+
+  if (q->bg == BG1 && ls_index != 6) {
+    q->encode_high_rate = encode_high_rate_case1;
+  } else if (q->bg == BG1 && ls_index == 6) {
+    q->encode_high_rate = encode_high_rate_case2;
+  } else if (q->bg == BG2 && ls_index != 3 && ls_index != 7) {
+    q->encode_high_rate = encode_high_rate_case3;
+  } else if (q->bg == BG2 && (ls_index == 3 || ls_index == 7)) {
+    q->encode_high_rate = encode_high_rate_case4;
+  } else {
+    ERROR("Invalid lifting size %d and/or Base Graph %d\n", q->ls, q->bg + 1);
+    return -1;
+  }
+
+  q->free = free_enc_c;
+
+  q->ptr = srslte_vec_malloc(q->bgM * q->ls * sizeof(uint8_t));
+  if (!q->ptr) {
+    perror("malloc");
+    free_enc_c(q);
+    return -1;
+  }
+
+  q->encode = encode_c;
+
+  return 0;
+}
+
+#ifdef LV_HAVE_AVX2
+/*! Carries out the actual destruction of the memory allocated to the encoder. */
+static void free_enc_avx2(void* o)
+{
+  srslte_ldpc_encoder_t* q = o;
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  if (q->ptr) {
+    delete_ldpc_enc_avx2(q->ptr);
+  }
+}
+
+/*! Carries out the actual encoding with an optimized encoder. */
+static int encode_avx2(void* o, const uint8_t* input, uint8_t* output, uint32_t input_length, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_encoder_t* q = o;
+
+  if (input_length / q->bgK != q->ls) {
+    perror("Dimension mismatch.\n");
+    return -1;
+  }
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+
+  load_avx2(q->ptr, input, q->bgK, q->bgN, q->ls);
+
+  preprocess_systematic_bits_avx2(q);
+
+  q->encode_high_rate_avx2(q);
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  encode_ext_region_avx2(q, n_layers);
+
+  return_codeword_avx2(q->ptr, output, n_layers + q->bgK, q->ls);
+
+  return 0;
+}
+
+/*! Initializes an optimized encoder. */
+static int init_avx2(srslte_ldpc_encoder_t* q)
+{
+  int ls_index = get_ls_index(q->ls);
+
+  if (ls_index == VOID_LIFTSIZE) {
+    ERROR("Invalid lifting size %d\n", q->ls);
+    return -1;
+  }
+
+  if (q->bg == BG1 && ls_index != 6) {
+    q->encode_high_rate_avx2 = encode_high_rate_case1_avx2;
+  } else if (q->bg == BG1 && ls_index == 6) {
+    q->encode_high_rate_avx2 = encode_high_rate_case2_avx2;
+  } else if (q->bg == BG2 && ls_index != 3 && ls_index != 7) {
+    q->encode_high_rate_avx2 = encode_high_rate_case3_avx2;
+  } else if (q->bg == BG2 && (ls_index == 3 || ls_index == 7)) {
+    q->encode_high_rate_avx2 = encode_high_rate_case4_avx2;
+  } else {
+    ERROR("Invalid lifting size %d and/or Base Graph %d\n", q->ls, q->bg + 1);
+    return -1;
+  }
+
+  q->free = free_enc_avx2;
+
+  if ((q->ptr = create_ldpc_enc_avx2(q)) == NULL) {
+    perror("Create_ldpc_enc\n");
+    free_enc_avx2(q);
+    return -1;
+  }
+
+  q->encode = encode_avx2;
+
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the encoder. */
+static void free_enc_avx2long(void* o)
+{
+  srslte_ldpc_encoder_t* q = o;
+  if (q->pcm) {
+    free(q->pcm);
+  }
+  if (q->ptr) {
+    delete_ldpc_enc_avx2long(q->ptr);
+  }
+}
+
+/*! Carries out the actual encoding with an optimized encoder. */
+static int
+encode_avx2long(void* o, const uint8_t* input, uint8_t* output, uint32_t input_length, uint32_t cdwd_rm_length)
+{
+  srslte_ldpc_encoder_t* q = o;
+
+  if (input_length / q->bgK != q->ls) {
+    perror("Dimension mismatch.\n");
+    return -1;
+  }
+
+  // it must be smaller than the codeword size
+  if (cdwd_rm_length > q->liftN - 2 * q->ls) {
+    cdwd_rm_length = q->liftN - 2 * q->ls;
+  }
+  // We need at least q->bgK + 4 variable nodes to cover the high-rate region. However,
+  // 2 variable nodes are systematically punctured by the encoder.
+  if (cdwd_rm_length < (q->bgK + 2) * q->ls) {
+    // ERROR("The rate-matched codeword should have a length at least equal to the high-rate region.\n");
+    cdwd_rm_length = (q->bgK + 2) * q->ls;
+    // return -1;
+  }
+  if (cdwd_rm_length % q->ls) {
+    cdwd_rm_length = (cdwd_rm_length / q->ls + 1) * q->ls;
+    // ERROR("The rate-matched codeword length should be a multiple of the lifting size.\n");
+    // return -1;
+  }
+  load_avx2long(q->ptr, input, q->bgK, q->bgN, q->ls);
+
+  preprocess_systematic_bits_avx2long(q);
+
+  q->encode_high_rate_avx2(q);
+
+  // When computing the number of layers, we need to recall that the standard always removes
+  // the first two variable nodes from the final codeword.
+  uint8_t n_layers = cdwd_rm_length / q->ls - q->bgK + 2;
+
+  encode_ext_region_avx2long(q, n_layers);
+
+  return_codeword_avx2long(q->ptr, output, n_layers + q->bgK, q->ls);
+
+  return 0;
+}
+
+/*! Initializes an optimized encoder. */
+static int init_avx2long(srslte_ldpc_encoder_t* q)
+{
+  int ls_index = get_ls_index(q->ls);
+
+  if (ls_index == VOID_LIFTSIZE) {
+    ERROR("Invalid lifting size %d\n", q->ls);
+    return -1;
+  }
+
+  if (q->bg == BG1 && ls_index != 6) {
+    q->encode_high_rate_avx2 = encode_high_rate_case1_avx2long;
+  } else if (q->bg == BG1 && ls_index == 6) {
+    q->encode_high_rate_avx2 = encode_high_rate_case2_avx2long;
+  } else if (q->bg == BG2 && ls_index != 3 && ls_index != 7) {
+    q->encode_high_rate_avx2 = encode_high_rate_case3_avx2long;
+  } else if (q->bg == BG2 && (ls_index == 3 || ls_index == 7)) {
+    q->encode_high_rate_avx2 = encode_high_rate_case4_avx2long;
+  } else {
+    ERROR("Invalid lifting size %d and/or Base Graph %d\n", q->ls, q->bg + 1);
+    return -1;
+  }
+
+  q->free = free_enc_avx2long;
+
+  if ((q->ptr = create_ldpc_enc_avx2long(q)) == NULL) {
+    perror("Create_ldpc_enc\n");
+    free_enc_avx2long(q);
+    return -1;
+  }
+
+  q->encode = encode_avx2long;
+
+  return 0;
+}
+
+#endif
+
+int srslte_ldpc_encoder_init(srslte_ldpc_encoder_t*     q,
+                             srslte_ldpc_encoder_type_t type,
+                             srslte_basegraph_t         bg,
+                             uint16_t                   ls)
+{
+
+  switch (bg) {
+    case BG1:
+      q->bgN = BG1Nfull;
+      q->bgM = BG1M;
+      break;
+    case BG2:
+      q->bgN = BG2Nfull;
+      q->bgM = BG2M;
+      break;
+    default:
+      ERROR("Base Graph BG%d does not exist\n", bg + 1);
+      return -1;
+  }
+  q->bg  = bg;
+  q->bgK = q->bgN - q->bgM;
+
+  q->ls    = ls;
+  q->liftK = ls * q->bgK;
+  q->liftM = ls * q->bgM;
+  q->liftN = ls * q->bgN;
+
+  q->pcm = srslte_vec_malloc(q->bgM * q->bgN * sizeof(uint16_t));
+  if (!q->pcm) {
+    perror("malloc");
+    return -1;
+  }
+  if (create_compact_pcm(q->pcm, NULL, q->bg, q->ls) != 0) {
+    perror("Create PCM");
+    return -1;
+  }
+
+  switch (type) {
+    case SRSLTE_LDPC_ENCODER_C:
+      return init_c(q);
+#ifdef LV_HAVE_AVX2
+    case SRSLTE_LDPC_ENCODER_AVX2:
+      if (ls <= SRSLTE_AVX2_B_SIZE) {
+        return init_avx2(q);
+      } else {
+        return init_avx2long(q);
+      }
+#endif // LV_HAVE_AVX2
+    default:
+      return -1;
+  }
+}
+
+void srslte_ldpc_encoder_free(srslte_ldpc_encoder_t* q)
+{
+  if (q->free) {
+    q->free(q);
+  }
+  bzero(q, sizeof(srslte_ldpc_encoder_t));
+}
+
+int srslte_ldpc_encoder_encode(srslte_ldpc_encoder_t* q,
+                               const uint8_t*         input,
+                               uint8_t*               output,
+                               uint32_t               input_length,
+                               uint32_t               cdwd_rm_length)
+{
+  return q->encode(q, input, output, input_length, cdwd_rm_length);
+}
--- a/lib/src/phy/fec/ldpc/ldpc_rm.c
+++ b/lib/src/phy/fec/ldpc/ldpc_rm.c
@ -0,0 +1,696 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_rm.c
+ * \brief Definition of the LDPC Rate Matcher and Rate Demacher (float-valued, int16_t and int8_t)
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include <math.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h" //FILLER_BIT definition
+#include "srslte/phy/fec/ldpc/ldpc_rm.h"
+#include "srslte/phy/utils/vector.h"
+
+#include "srslte/phy/utils/debug.h"
+
+//#define debug
+/*!
+ * \brief Look-up table: k0 indices
+ *
+ * For each rv, the corresponding row contains the indices of the
+ * two base graphs.
+ */
+static const uint32_t BASEK0[4][2] = {{0, 0}, {17, 13}, {33, 25}, {56, 43}};
+
+/*!
+ * \brief Look-up table: base codeword lengths N/LS
+ *
+ */
+static const uint32_t BASEN[2] = {66, 50};
+
+/*!
+ * \brief Look-up table: base codeblock lengths K/LS
+ *
+ */
+static const uint32_t BASEK[2] = {22, 10};
+
+/*!
+ * \brief Look-up table: Retuns the mod order associated to a mod_type_t
+ *
+ */
+static const uint32_t MODORD[5] = {1, 2, 4, 6, 8};
+
+/*!
+ * \brief Look-up table: Maximum number of coded bits available for transmission in a
+ * transport block
+ *If there is only one user
+ * in the system using the maximum bandwidth, then
+ * MAXE is smaller than nRB * nDS * nSC_RB * modOrd * nLayers
+ * where nLayers is the number of layer (<=4), modOrder is the modulation order (<=8)
+ * nSC_RB is the number of subcarriers per resource block (RB) (<=12), nDS is the number of symbols
+ * to transmit data in a RB (13) and nRB is the maximum number of resource blocks in the system,
+ * this depends on the channel bandwidth and subcarrier spacing and according to
+ * Table 5.3.2-1: Maximum transmission bandwidth configuration NRB : FR1 ,
+ * it is not larger than 273 (i.e. for subcarrier spacing 10 and bandwidth 100MHz)
+ */
+static const uint32_t MAXE = 273 * 13 * 12 * 8 * 4;
+
+/*!
+ * \brief Describes an rate matcher.
+ */
+struct pRM_tx {
+  uint8_t* tmp_rm_codeword; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
+};
+
+/*!
+ * \brief Describes an rate dematcher (float version).
+ */
+struct pRM_rx_f {
+  float*    tmp_rm_symbol; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
+  uint32_t* indices;       /*!< \brief Pointer to a temporal buffer with the indices for bit-selection. */
+};
+
+/*!
+ * \brief Describes an rate dematcher (short version).
+ */
+struct pRM_rx_s {
+  int16_t*  tmp_rm_symbol; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
+  uint32_t* indices;       /*!< \brief Pointer to a temporal buffer with the indices for bit-selection. */
+};
+
+/*!
+ * \brief Describes an rate dematcher (char version).
+ */
+struct pRM_rx_c {
+  int8_t*   tmp_rm_symbol; /*!< \brief Pointer to a temporal buffer between bit-selection and interleaver. */
+  uint32_t* indices;       /*!< \brief Pointer to a temporal buffer with the indices for bit-selection. */
+};
+
+/*!
+ * Initialize rate-matching parameters
+ */
+static int init_rm(srslte_ldpc_rm_t* p,
+                   const uint32_t    E,
+                   const uint32_t    F,
+                   const uint32_t    bg,
+                   const uint32_t    ls,
+                   const uint8_t     rv,
+                   const mod_type_t  mod_type,
+                   const uint32_t    Nref)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  uint32_t basek0    = BASEK0[rv][bg];
+  uint32_t mod_order = MODORD[mod_type];
+  uint32_t N         = ls * BASEN[bg];
+  uint32_t K         = ls * BASEK[bg];
+
+  // check E smaller than MAXE
+  if ((E > MAXE) != 0) { //
+    ERROR("Wrong RM codeword length (E) = %d. It must be smaller than %d for base graph %d\n", E, MAXE, bg + 1);
+    return -1;
+  }
+
+  // check out_len is multiple of mod_order
+  if ((E % mod_order) != 0) { // N can only be a multiple of either BASEN[0] or BASEN[1], but not both
+    ERROR("Wrong RM codeword length (E) = %d. It must be a multiple of modulation order = %d\n", E, mod_order);
+    return -1;
+  }
+
+  // Set parameters
+  p->N         = N;
+  p->E         = E;
+  p->K         = K;
+  p->F         = F;
+  p->ls        = ls;
+  p->mod_order = mod_order;
+  p->bg        = bg;
+  // Determine Ncb and k0
+  if (N <= Nref) {
+    p->Ncb = N;
+    p->k0  = ls * basek0;
+  } else {
+    p->Ncb = Nref;
+    p->k0  = ls * ((basek0 * Nref) / N);
+  }
+
+  return 0;
+}
+
+/*!
+ * Bit selection for the rate-matching block. Selects out_len bits, starting from
+ * the k0th, ingoring filler bits, and consider an input buffer of length Ncb.
+ */
+static void bit_selection_rm_tx(const uint8_t* input,
+                                uint8_t*       output,
+                                const uint32_t out_len,
+                                const uint32_t k0,
+                                const uint32_t Ncb)
+{
+  uint32_t E = out_len;
+
+  uint32_t k    = 0;
+  uint32_t j    = 0;
+  uint32_t icwd = 0;
+
+  while (k < E) {
+    icwd = (k0 + j) % Ncb;
+    if (input[icwd] != FILLER_BIT) {
+      output[k] = input[icwd];
+      k         = k + 1;
+    }
+    j = j + 1;
+  } // while
+}
+
+/*!
+ * Undoes bit selection for the rate-dematching block.
+ * The output has the codeword length N. It inserts filler bits as INFINITY symbols
+ * (to indicate very reliable 0 bit), and set to 0 (completely unknown bit) all
+ * missing symbol. Repeated symbols are added.
+ */
+static void bit_selection_rm_rx(const float*   input,
+                                const uint32_t in_len,
+                                float*         output,
+                                const uint32_t out_len,
+                                uint32_t*      indices,
+                                const uint32_t ini_exclude,
+                                const uint32_t end_exclude,
+                                const uint32_t k0,
+                                const uint32_t Ncb)
+{
+  uint32_t E = in_len;
+  uint32_t N = out_len;
+
+  uint32_t k    = 0;
+  uint32_t j    = 0;
+  uint32_t icwd = 0;
+  while (k < E) {
+    icwd = (k0 + j) % Ncb;
+    if (!(icwd >= ini_exclude && icwd < end_exclude)) { // avoid filler bits
+      indices[k] = icwd;
+      k          = k + 1;
+    }
+    j = j + 1;
+  } // while
+
+  // Initializes the data_decoded_vector to all zeros
+  bzero(output, N * sizeof(float));
+
+  // set filler bits to INFINITY
+  for (uint32_t i = ini_exclude; i < end_exclude; i++) {
+    output[i] = INFINITY;
+  }
+
+  // Add soft bits, in case of repetition
+  for (uint32_t i = 0; i < E; i++) {
+    output[indices[i]] = output[indices[i]] + input[i];
+  }
+}
+
+/*!
+ * Undoes bit selection for the rate-dematching block.
+ * The output has the codeword length N. It inserts filler bits as INFINITY symbols
+ * (to indicate very reliable 0 bit), and set to 0 (completely unknown bit) all
+ * missing symbol. Repeated symbols are added.
+ */
+static void bit_selection_rm_rx_s(const int16_t* input,
+                                  const uint32_t in_len,
+                                  int16_t*       output,
+                                  const uint32_t out_len,
+                                  uint32_t*      indices,
+                                  const uint32_t ini_exclude,
+                                  const uint32_t end_exclude,
+                                  const uint32_t k0,
+                                  const uint32_t Ncb)
+{
+  uint32_t E = in_len;
+  uint32_t N = out_len;
+
+  uint32_t k    = 0;
+  uint32_t j    = 0;
+  uint32_t icwd = 0;
+  while (k < E) {
+    icwd = (k0 + j) % Ncb;
+    if (!(icwd >= ini_exclude && icwd < end_exclude)) { // avoid filler bits
+      indices[k] = icwd;
+      k          = k + 1;
+    }
+    j = j + 1;
+  } // while
+
+  // Initializes the data_decoded_vector to all zeros
+  bzero(output, N * sizeof(int16_t));
+
+  // set filler bits to INFINITY
+  const long infinity16 = (1U << 15U) - 1; // Max positive value in 16-bit representation
+  for (uint32_t i = ini_exclude; i < end_exclude; i++) {
+    output[i] = infinity16;
+  }
+
+  // Add soft bits, in case of repetition
+  const int16_t infinity15 =
+      (1U << 14U) - 1; // Messages use a 15-bit quantization. Soft bits use the remaining bit to denote infinity.
+  // input is assume to be quantized from -infinity15 to infinity15. Only filler bits can be infinity16
+  long tmp = 0;
+  for (uint32_t i = 0; i < E; i++) {
+    tmp = (long)output[indices[i]] + input[i];
+    if (tmp > infinity15) {
+      tmp = infinity15;
+    }
+    if (tmp < -infinity15) {
+      tmp = -infinity15;
+    }
+    output[indices[i]] = (int16_t)tmp;
+  }
+}
+
+/*!
+ * Undoes bit selection for the rate-dematching block (int8_t).
+ * The output has the codeword length N. It inserts filler bits as INFINITY symbols
+ * (to indicate very reliable 0 bit), and set to 0 (completely unknown bit) all
+ * missing symbol. Repeated symbols are added.
+ */
+static void bit_selection_rm_rx_c(const int8_t*  input,
+                                  const uint32_t in_len,
+                                  int8_t*        output,
+                                  const uint32_t out_len,
+                                  uint32_t*      indices,
+                                  const uint32_t ini_exclude,
+                                  const uint32_t end_exclude,
+                                  const uint32_t k0,
+                                  const uint32_t Ncb)
+{
+  uint32_t E = in_len;
+  uint32_t N = out_len;
+
+  uint32_t k    = 0;
+  uint32_t j    = 0;
+  uint32_t icwd = 0;
+  while (k < E) {
+    icwd = (k0 + j) % Ncb;
+    if (!(icwd >= ini_exclude && icwd < end_exclude)) { // avoid filler bits
+      indices[k] = icwd;
+      k          = k + 1;
+    }
+    j = j + 1;
+  } // while
+
+  // Initializes the data_decoded_vector to all zeros
+  bzero(output, N * sizeof(int8_t));
+
+  // set filler bits to INFINITY
+  const long infinity8 = (1U << 7U) - 1; // Max positive value in 8-bit representation
+  for (uint32_t i = ini_exclude; i < end_exclude; i++) {
+    output[i] = infinity8;
+  }
+
+  // Add soft bits, in case of repetition
+  const int16_t infinity7 =
+      (1U << 6U) - 1; // Messages use a 15-bit quantization. Soft bits use the remaining bit to denote infinity.
+  // input is assume to be quantized from -infinity15 to infinity15. Only filler bits can be infinity16
+  long tmp = 0;
+  for (uint32_t i = 0; i < E; i++) {
+    tmp = (long)output[indices[i]] + input[i];
+    if (tmp > infinity7) {
+      tmp = infinity7;
+    }
+    if (tmp < -infinity7) {
+      tmp = -infinity7;
+    }
+    output[indices[i]] = (int8_t)tmp;
+  }
+}
+
+/*!
+ * Bit interleaver
+ */
+static void
+bit_interleaver_rm_tx(const uint8_t* input, uint8_t* output, const uint32_t in_out_len, const uint32_t mod_order)
+{
+  uint32_t cols = 0;
+  uint32_t rows = 0;
+  rows          = mod_order;
+  cols          = in_out_len / rows;
+  for (uint32_t j = 0; j < cols; j++) {
+    for (uint32_t i = 0; i < rows; i++) {
+      output[i + j * rows] = input[i * cols + j];
+    }
+  }
+}
+
+/*!
+ * Bit deinterleaver (float)
+ */
+static void
+bit_interleaver_rm_rx(const float* input, float* output, const uint32_t in_out_len, const uint32_t mod_order)
+{
+  uint32_t cols = 0;
+  uint32_t rows = 0;
+  rows          = mod_order;
+  cols          = in_out_len / rows;
+  for (uint32_t j = 0; j < cols; j++) {
+    for (uint32_t i = 0; i < rows; i++) {
+      output[i * cols + j] = input[j * rows + i];
+    }
+  }
+}
+
+/*!
+ * Bit deinterleaver (short)
+ */
+static void
+bit_interleaver_rm_rx_s(const int16_t* input, int16_t* output, const uint32_t in_out_len, const uint32_t mod_order)
+{
+  uint32_t cols = 0;
+  uint32_t rows = 0;
+  rows          = mod_order;
+  cols          = in_out_len / rows;
+  for (uint32_t j = 0; j < cols; j++) {
+    for (uint32_t i = 0; i < rows; i++) {
+      output[i * cols + j] = input[j * rows + i];
+    }
+  }
+}
+
+/*!
+ * Bit deinterleaver (short)
+ */
+static void
+bit_interleaver_rm_rx_c(const int8_t* input, int8_t* output, const uint32_t in_out_len, const uint32_t mod_order)
+{
+  uint32_t cols = 0;
+  uint32_t rows = 0;
+  rows          = mod_order;
+  cols          = in_out_len / rows;
+  for (uint32_t j = 0; j < cols; j++) {
+    for (uint32_t i = 0; i < rows; i++) {
+      output[i * cols + j] = input[j * rows + i];
+    }
+  }
+}
+
+int srslte_ldpc_rm_tx_init(srslte_ldpc_rm_t* p)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct pRM_tx* pp = NULL; // pointer to the rate matcher instance
+
+  // allocate memory to the rate-matcher instance
+  if ((pp = malloc(sizeof(struct pRM_tx))) == NULL) {
+    return -1;
+  }
+  p->ptr = pp;
+
+  // allocate memory to the rm_codeword after bit selection.
+  if ((pp->tmp_rm_codeword = srslte_vec_u8_malloc(MAXE)) == NULL) {
+    free(pp);
+    return -1;
+  }
+
+  return 0;
+}
+
+int srslte_ldpc_rm_rx_init_f(srslte_ldpc_rm_t* p)
+{
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct pRM_rx_f* pp = NULL; // pointer to the rate matcher instance
+
+  // allocate memory to ther rate-demacher instance
+  if ((pp = malloc(sizeof(struct pRM_rx_f))) == NULL) {
+    return -1;
+  }
+  p->ptr = pp;
+
+  // allocate memory to the temporal buffer
+  if ((pp->tmp_rm_symbol = srslte_vec_f_malloc(MAXE)) == NULL) {
+    free(pp);
+    return -1;
+  }
+
+  if ((pp->indices = srslte_vec_u32_malloc(MAXE)) == NULL) {
+    free(pp->tmp_rm_symbol);
+    free(pp);
+    return -1;
+  }
+  return 0;
+}
+
+int srslte_ldpc_rm_rx_init_s(srslte_ldpc_rm_t* p)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct pRM_rx_s* pp = NULL; // pointer to the rate matcher instance
+
+  // allocate memory to ther rate-demacher instance
+  if ((pp = malloc(sizeof(struct pRM_rx_s))) == NULL) {
+    return -1;
+  }
+  p->ptr = pp;
+
+  // allocate memory to the temporal buffer
+  if ((pp->tmp_rm_symbol = srslte_vec_i16_malloc(MAXE)) == NULL) {
+    free(pp);
+    return -1;
+  }
+
+  if ((pp->indices = srslte_vec_u32_malloc(MAXE)) == NULL) {
+    free(pp->tmp_rm_symbol);
+    free(pp);
+    return -1;
+  }
+
+  return 0;
+}
+int srslte_ldpc_rm_rx_init_c(srslte_ldpc_rm_t* p)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct pRM_rx_c* pp = NULL; // pointer to the rate matcher instance
+
+  // allocate memory to ther rate-demacher instance
+  if ((pp = malloc(sizeof(struct pRM_rx_c))) == NULL) {
+    return -1;
+  }
+  p->ptr = pp;
+
+  // allocate memory to the temporal buffer
+  if ((pp->tmp_rm_symbol = srslte_vec_i8_malloc(MAXE)) == NULL) {
+    free(pp);
+    return -1;
+  }
+
+  if ((pp->indices = srslte_vec_u32_malloc(MAXE)) == NULL) {
+    free(pp->tmp_rm_symbol);
+    free(pp);
+    return -1;
+  }
+
+  return 0;
+}
+
+void srslte_ldpc_rm_tx_free(srslte_ldpc_rm_t* q)
+{
+  if (q != NULL) {
+    struct pRM_tx* qq = q->ptr;
+    free(qq->tmp_rm_codeword);
+    free(qq);
+  }
+}
+
+void srslte_ldpc_rm_rx_free_f(srslte_ldpc_rm_t* q)
+{
+  if (q != NULL) {
+    struct pRM_rx_f* qq = q->ptr;
+    free(qq->tmp_rm_symbol);
+    free(qq->indices);
+    free(qq);
+  }
+}
+
+void srslte_ldpc_rm_rx_free_s(srslte_ldpc_rm_t* q)
+{
+  if (q != NULL) {
+    struct pRM_rx_s* qq = q->ptr;
+    free(qq->tmp_rm_symbol);
+    free(qq->indices);
+    free(qq);
+  }
+}
+
+void srslte_ldpc_rm_rx_free_c(srslte_ldpc_rm_t* q)
+{
+  if (q != NULL) {
+    struct pRM_rx_c* qq = q->ptr;
+    free(qq->tmp_rm_symbol);
+    free(qq->indices);
+    free(qq);
+  }
+}
+
+int srslte_ldpc_rm_tx(srslte_ldpc_rm_t*        q,
+                      const uint8_t*           input,
+                      uint8_t*                 output,
+                      const uint32_t           E,
+                      const srslte_basegraph_t bg,
+                      const uint32_t           ls,
+                      const uint8_t            rv,
+                      const mod_type_t         mod_type,
+                      const uint32_t           Nref)
+
+{
+  // initialize parameters. The filler bit is ignored
+  if (init_rm(q, E, 0, bg, ls, rv, mod_type, Nref) != 0) {
+    perror("rate matcher init");
+    exit(-1);
+  }
+
+  struct pRM_tx* pp              = q->ptr;
+  uint8_t*       tmp_rm_codeword = pp->tmp_rm_codeword;
+
+  if (q->mod_order == 1) { // interleaver can be skipped
+    bit_selection_rm_tx(input, output, q->E, q->k0, q->Ncb);
+  } else {
+    bit_selection_rm_tx(input, tmp_rm_codeword, q->E, q->k0, q->Ncb);
+    bit_interleaver_rm_tx(tmp_rm_codeword, output, q->E, q->mod_order);
+  }
+
+  return 0;
+}
+
+int srslte_ldpc_rm_rx_f(srslte_ldpc_rm_t*        q,
+                        const float*             input,
+                        float*                   output,
+                        const uint32_t           E,
+                        const uint32_t           F,
+                        const srslte_basegraph_t bg,
+                        const uint32_t           ls,
+                        const uint8_t            rv,
+                        const mod_type_t         mod_type,
+                        const uint32_t           Nref)
+{
+
+  if (init_rm(q, E, F, bg, ls, rv, mod_type, Nref) != 0) {
+    perror("rate matcher init");
+    exit(-1);
+  }
+
+  struct pRM_rx_f* pp            = q->ptr;
+  float*           tmp_rm_symbol = pp->tmp_rm_symbol;
+  uint32_t*        indices       = pp->indices;
+  uint32_t         end_exclude   = q->K - 2 * q->ls;
+  uint32_t         ini_exclude   = end_exclude - q->F;
+
+  if (q->mod_order == 1) { // interleaver can be skipped
+    bit_selection_rm_rx(input, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
+  } else {
+    bit_interleaver_rm_rx(input, tmp_rm_symbol, q->E, q->mod_order);
+    bit_selection_rm_rx(tmp_rm_symbol, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
+  }
+  return 0;
+}
+
+int srslte_ldpc_rm_rx_s(srslte_ldpc_rm_t*        q,
+                        const int16_t*           input,
+                        int16_t*                 output,
+                        const uint32_t           E,
+                        const uint32_t           F,
+                        const srslte_basegraph_t bg,
+                        const uint32_t           ls,
+                        const uint8_t            rv,
+                        const mod_type_t         mod_type,
+                        const uint32_t           Nref)
+{
+
+  if (init_rm(q, E, F, bg, ls, rv, mod_type, Nref) != 0) {
+    perror("rate matcher init");
+    exit(-1);
+  }
+
+  struct pRM_rx_f* pp            = q->ptr;
+  int16_t*         tmp_rm_symbol = (int16_t*)pp->tmp_rm_symbol;
+  uint32_t*        indices       = pp->indices;
+  uint32_t         end_exclude   = q->K - 2 * q->ls;
+  uint32_t         ini_exclude   = end_exclude - q->F;
+
+  if (q->mod_order == 1) { // interleaver can be skipped
+    bit_selection_rm_rx_s(input, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
+  } else {
+    bit_interleaver_rm_rx_s(input, tmp_rm_symbol, q->E, q->mod_order);
+    bit_selection_rm_rx_s(tmp_rm_symbol, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
+  }
+
+  return 0;
+}
+
+int srslte_ldpc_rm_rx_c(srslte_ldpc_rm_t*        q,
+                        const int8_t*            input,
+                        int8_t*                  output,
+                        const uint32_t           E,
+                        const uint32_t           F,
+                        const srslte_basegraph_t bg,
+                        const uint32_t           ls,
+                        const uint8_t            rv,
+                        const mod_type_t         mod_type,
+                        const uint32_t           Nref)
+{
+
+  if (init_rm(q, E, F, bg, ls, rv, mod_type, Nref) != 0) {
+    perror("rate matcher init");
+    exit(-1);
+  }
+
+  struct pRM_rx_c* pp            = q->ptr;
+  int8_t*          tmp_rm_symbol = pp->tmp_rm_symbol;
+  uint32_t*        indices       = pp->indices;
+  uint32_t         end_exclude   = q->K - 2 * q->ls;
+  uint32_t         ini_exclude   = end_exclude - q->F;
+
+  if (q->mod_order == 1) { // interleaver can be skipped
+    bit_selection_rm_rx_c(input, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
+  } else {
+    bit_interleaver_rm_rx_c(input, tmp_rm_symbol, q->E, q->mod_order);
+    bit_selection_rm_rx_c(tmp_rm_symbol, q->E, output, q->N, indices, ini_exclude, end_exclude, q->k0, q->Ncb);
+  }
+
+  return 0;
+}
--- a/lib/src/phy/fec/ldpc/test/CMakeLists.txt
+++ b/lib/src/phy/fec/ldpc/test/CMakeLists.txt
@ -0,0 +1,193 @@
+#
+# Copyright 2013-2020 Software Radio Systems Limited
+#
+# This file is part of srsLTE
+#
+# srsLTE is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as
+# published by the Free Software Foundation, either version 3 of
+# the License, or (at your option) any later version.
+#
+# srsLTE is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# A copy of the GNU Affero General Public License can be found in
+# the LICENSE file in the top-level directory of this distribution
+# and at http://www.gnu.org/licenses/.
+#
+
+add_executable(ldpc_enc_test ldpc_enc_test.c)
+target_link_libraries(ldpc_enc_test srslte_phy)
+
+add_executable(ldpc_dec_test ldpc_dec_test.c)
+target_link_libraries(ldpc_dec_test srslte_phy)
+
+add_executable(ldpc_dec_s_test ldpc_dec_s_test.c)
+target_link_libraries(ldpc_dec_s_test srslte_phy)
+
+add_executable(ldpc_dec_c_test ldpc_dec_c_test.c)
+target_link_libraries(ldpc_dec_c_test srslte_phy)
+
+add_executable(ldpc_chain_test ldpc_chain_test.c)
+target_link_libraries(ldpc_chain_test srslte_phy)
+
+add_executable(ldpc_rm_test ldpc_rm_test.c)
+target_link_libraries(ldpc_rm_test srslte_phy)
+
+add_executable(ldpc_rm_chain_test ldpc_rm_chain_test.c)
+target_link_libraries(ldpc_rm_chain_test srslte_phy)
+
+set_target_properties(ldpc_enc_test ldpc_dec_test ldpc_dec_s_test ldpc_dec_c_test ldpc_chain_test ldpc_rm_test ldpc_rm_chain_test
+  PROPERTIES
+  RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/tests/ldpc"
+)
+
+if(NOT DISABLE_SIMD)
+  add_executable(ldpc_enc_avx2_test ldpc_enc_avx2_test.c)
+  target_link_libraries(ldpc_enc_avx2_test srslte_phy)
+
+  add_executable(ldpc_dec_avx2_test ldpc_dec_avx2_test.c)
+  target_link_libraries(ldpc_dec_avx2_test srslte_phy)
+
+  set_target_properties(ldpc_dec_avx2_test ldpc_enc_avx2_test
+    PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/tests/ldpc"
+  )
+endif(NOT DISABLE_SIMD)
+
+add_custom_command(
+  OUTPUT ${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG1.dat
+         ${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG2.dat
+  COMMAND cp examplesBG?.dat ${PROJECT_SOURCE_DIR}/tests/ldpc
+  DEPENDS examplesBG1.dat examplesBG2.dat
+  WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+  COMMENT "Copying LDPC test reference files"
+  VERBATIM
+)
+
+add_custom_target(ldpc_test_files
+  DEPENDS ${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG1.dat
+          ${PROJECT_SOURCE_DIR}/tests/ldpc/examplesBG2.dat
+)
+
+add_dependencies(ldpc_dec_test ldpc_test_files)
+add_dependencies(ldpc_enc_test ldpc_test_files)
+add_dependencies(ldpc_rm_test ldpc_test_files)
+
+### Test LDPC libs
+function(ldpc_unit_tests)
+  foreach(i IN LISTS ARGN)
+    add_test(NAME ${test_name}-LS${i} COMMAND ${test_command} -l${i}
+            WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/ldpc
+            )
+  endforeach()
+endfunction()
+
+set(lifting_sizes
+        2 4 8 16 32 64 128 256
+        3 6 12 24 48 96 192 384
+        5 10 20 40 80 160 320
+        7 14 28 56 112 224
+        9 18 36 72 144 288
+        11 22 44 88 176 352
+        13 26 52 104 208
+        15 30 60 120 240
+        )
+
+set(test_name LDPC-ENC-BG1)
+set(test_command ldpc_enc_test -b1)
+ldpc_unit_tests(${lifting_sizes})
+
+set(test_name LDPC-ENC-BG2)
+set(test_command ldpc_enc_test -b2)
+ldpc_unit_tests(${lifting_sizes})
+
+set(test_name LDPC-DEC-BG1)
+set(test_command ldpc_dec_test -b1)
+ldpc_unit_tests(${lifting_sizes})
+
+set(test_name LDPC-DEC-BG2)
+set(test_command ldpc_dec_test -b2)
+ldpc_unit_tests(${lifting_sizes})
+
+add_test(NAME LDPC-chain COMMAND ldpc_chain_test)
+
+### Test LDPC Rate Matching UNIT tests
+set(mod_order
+        1 2 4 6 8
+        )
+function(ldpc_rm_unit_tests)
+  #foreach(j IN LIST ${ARGV0})
+  set(listMod  0  1  2  3  4)
+  set(listModord 1 2 4 6 8)
+  set(listrv 0 1 2 3)
+  set(listbg 1 2)
+  set(listbaseN 66 50)
+  set(listbaseK 22 10)
+  list(LENGTH listMod modlen)
+  list(LENGTH listrv rvlen)
+  math(EXPR modlen "${modlen} - 1")
+  math(EXPR rvlen "${rvlen} - 1")
+  foreach(i IN LISTS ARGN)
+
+    foreach(numbg RANGE ${bglen}) #bg
+      foreach(numrv RANGE ${rvlen}) #rv
+        foreach(nummod RANGE ${modlen})
+          list(GET listbaseN ${numbg} baseNval)
+          list(GET listbaseK ${numbg} baseKval)
+          list(GET listbg ${numbg} bgval)
+          math(EXPR N "${i} * ${baseNval}")
+          math(EXPR K "${i} * ${baseKval}")
+
+          list(GET listMod ${nummod} Modval)
+          list(GET listModord ${nummod} Ordval)
+          list(GET listrv ${numrv} rvval)
+
+          math(EXPR Div "${Ordval}")
+          math(EXPR tmpN "${N} - (${N} % ${Div})")
+          math(EXPR E "${Ordval}*(${tmpN})/${Div}") #twice the rate
+
+          add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${N} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${N}
+                  WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
+                  )
+          math(EXPR M "${N} / 2")
+          # Half size buffer
+          add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${M} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${M}
+                  WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
+                  )
+          math(EXPR Div "2*${Ordval}")
+          math(EXPR tmpN "${N} - (${N} % ${Div})")
+          math(EXPR E "${Ordval}*(${tmpN})/${Div}") #twice the rate
+          add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${N} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${N}
+                  WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
+                  )
+          math(EXPR M "${N}/ 2")
+          # Half size buffer
+          add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${M} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${M}
+                  WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
+                  )
+
+          math(EXPR Div "${Ordval}")
+          math(EXPR tmpN "2*${N} - (2*${N} % ${Div})") #Half the rate
+          math(EXPR E "${Ordval}*(${tmpN})/${Div}")
+          add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${N} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${N}
+
+                  WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
+                  )
+          math(EXPR M "${N}/ 2")
+          # Half size buffer
+          add_test(NAME ${test_name}-b${bgval}-l${i}-e${E}-f10-m${Modval}-r${rvval}-M${M} COMMAND ${test_command} -b${bgval} -l${i} -e${E} -f10 -m${Modval} -r${rvval} -M${M}
+                  WORKING_DIRECTORY ${PROJECT_S${cval}OURCE_DIR}/tests/ldpc
+                  )
+        endforeach()
+      endforeach()
+    endforeach()
+  endforeach()
+endfunction()
+set(test_name LDPC-RM-BG1)
+set(test_command ldpc_rm_test)
+ldpc_rm_unit_tests(${lifting_sizes})
+
+add_test(NAME LDPC-RM-chain COMMAND ldpc_rm_chain_test)
--- a/lib/src/phy/fec/ldpc/test/examplesBG1.dat
+++ b/lib/src/phy/fec/ldpc/test/examplesBG1.dat
--- a/lib/src/phy/fec/ldpc/test/examplesBG2.dat
+++ b/lib/src/phy/fec/ldpc/test/examplesBG2.dat
--- a/lib/src/phy/fec/ldpc/test/ldpc_chain_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_chain_test.c
@ -0,0 +1,526 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_chain_test.c
+ * \brief End-to-end test for LDPC encoder and decoder.
+ *
+ * A batch of example messages is randomly generated, encoded, 2-PAM modulated,
+ * sent over an AWGN channel and, finally, decoded by all three types of
+ * decoder. Transmitted and received messages are compared to estimate the WER.
+ * Multiple batches are simulated if the number of errors is not significant
+ * enough.
+ *
+ * A fixed number of filler bits (F) equal to encoder.bgK - 5 is added to the message.
+ * if the function is called with -e0 (no rate matchign), the rm_length is set to finalN - F,
+ * So that after rate-dematching (which includes filler bits) the input to the decoder has lenght N.
+ *
+ * Basic rate-matching simulations can be carried out by setting the codeword
+ * length to a value smaller than the base one.
+ *
+ *
+ * Synopsis: **ldpc_chain_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ *  - **-e \<number\>** Codeword length after rate matching (set to 0 [default] for full rate).
+ *  - **-s \<number\>** SNR in dB (Default 3 dB).
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/channel/ch_awgn.h"
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/random.h"
+#include "srslte/phy/utils/vector.h"
+
+srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
+int                lift_size  = 2;   /*!< \brief Lifting Size. */
+int                rm_length  = 0;   /*!< \brief Codeword length after rate matching. */
+int                finalK;           /*!< \brief Number of uncoded bits (message length). */
+int                finalN;           /*!< \brief Number of coded bits (codeword length). */
+float              snr = 0;          /*!< \brief Signal-to-Noise Ratio [dB]. */
+
+#define BATCH_SIZE 100    /*!< \brief Number of codewords in a batch. */
+#define MAX_N_BATCH 10000 /*!< \brief Max number of simulated batches. */
+#define REQ_ERRORS 100    /*!< \brief Minimum number of errors for a significant simulation. */
+#define MS_SF 0.75f       /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+
+/*!
+ * \brief Prints test help when wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX] [-eX] [-sX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+  printf("\t-e Word length after rate matching [Default %d (no rate matching, only filler-bits are extracted)]\n",
+         rm_length);
+  printf("\t-s SNR [dB, Default %.1f dB]\n", snr);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:e:s:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'e':
+        rm_length = (int)strtol(optarg, NULL, 10);
+        break;
+      case 's':
+        snr = strtod(optarg, NULL);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Prints decoder statistics.
+ */
+void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time);
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* messages_true          = NULL;
+  uint8_t* messages_sim_f         = NULL;
+  uint8_t* messages_sim_s         = NULL;
+  uint8_t* messages_sim_c         = NULL;
+  uint8_t* messages_sim_c_flood   = NULL;
+  uint8_t* messages_sim_avx       = NULL;
+  uint8_t* messages_sim_avx_flood = NULL;
+  uint8_t* codewords              = NULL;
+  float*   symbols_rm             = NULL;
+  float*   symbols                = NULL;
+  int16_t* symbols_s              = NULL;
+  int8_t*  symbols_c              = NULL;
+
+  int i = 0;
+  int j = 0;
+
+  parse_args(argc, argv);
+
+  // create an LDPC encoder
+  srslte_ldpc_encoder_t encoder;
+#ifdef LV_HAVE_AVX2
+  if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_AVX2, base_graph, lift_size) != 0) {
+    perror("encoder init");
+    exit(-1);
+  }
+#else  // no AVX2
+  if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
+    perror("encoder init");
+    exit(-1);
+  }
+#endif // LV_HAVE_AVX2
+
+  // create an LDPC decoder (float)
+  srslte_ldpc_decoder_t decoder_f;
+  if (srslte_ldpc_decoder_init(&decoder_f, SRSLTE_LDPC_DECODER_F, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+  // create an LDPC decoder (16 bit)
+  srslte_ldpc_decoder_t decoder_s;
+  if (srslte_ldpc_decoder_init(&decoder_s, SRSLTE_LDPC_DECODER_S, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+  // create an LDPC decoder (8 bit)
+  srslte_ldpc_decoder_t decoder_c;
+  if (srslte_ldpc_decoder_init(&decoder_c, SRSLTE_LDPC_DECODER_C, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+  // create an LDPC decoder (8 bit, flooded)
+  srslte_ldpc_decoder_t decoder_c_flood;
+  if (srslte_ldpc_decoder_init(&decoder_c_flood, SRSLTE_LDPC_DECODER_C_FLOOD, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+#ifdef LV_HAVE_AVX2
+  // create an LDPC decoder (8 bit, AVX2 version)
+  srslte_ldpc_decoder_t decoder_avx;
+  if (srslte_ldpc_decoder_init(&decoder_avx, SRSLTE_LDPC_DECODER_C_AVX2, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+
+  // create an LDPC decoder (8 bit, flooded scheduling, AVX2 version)
+  srslte_ldpc_decoder_t decoder_avx_flood;
+  if (srslte_ldpc_decoder_init(&decoder_avx_flood, SRSLTE_LDPC_DECODER_C_AVX2_FLOOD, base_graph, lift_size, MS_SF) !=
+      0) {
+    perror("decoder init");
+    exit(-1);
+  }
+#endif // LV_HAVE_AVX2
+
+  // create a random generator
+  srslte_random_t random_gen = srslte_random_init(0);
+
+  uint32_t F = encoder.bgK - 5; // This value is arbitrary
+
+  if (rm_length == 0) {
+    rm_length = finalN - F;
+  }
+
+  printf("Test LDPC chain:\n");
+  printf("  Base Graph      -> BG%d\n", encoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", encoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
+  printf("  Base code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         encoder.liftK,
+         encoder.liftN - 2 * lift_size,
+         encoder.bg == BG1 ? 3 : 5);
+  printf("\n  Codeword length after rate matching -> E = %d\n", rm_length);
+  printf("  Final code rate -> (K-F)/E = (%d - %d)/%d = %.3f\n",
+         encoder.liftK,
+         F,
+         rm_length,
+         1.0 * (encoder.liftK - F) / rm_length);
+  printf("\n  Signal-to-Noise Ratio -> %.2f dB\n", snr);
+
+  finalK = encoder.liftK;
+  finalN = encoder.liftN - 2 * lift_size;
+
+  messages_true          = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_f         = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_s         = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_c         = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_c_flood   = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_avx       = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_avx_flood = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  codewords              = malloc(finalN * BATCH_SIZE * sizeof(uint8_t));
+  symbols_rm             = malloc((rm_length + F) * BATCH_SIZE * sizeof(float));
+  symbols                = malloc(finalN * BATCH_SIZE * sizeof(float));
+  symbols_s              = malloc(finalN * BATCH_SIZE * sizeof(int16_t));
+  symbols_c              = malloc(finalN * BATCH_SIZE * sizeof(int8_t));
+  if (!messages_true || !messages_sim_f || !messages_sim_s || !messages_sim_c || //
+      !messages_sim_avx || !messages_sim_c_flood || !messages_sim_avx_flood ||   //
+      !codewords || !symbols || !symbols_s || !symbols_c) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  int            i_bit   = 0;
+  int            i_batch = 0;
+  struct timeval t[3];
+  double         elapsed_time_enc           = 0;
+  double         elapsed_time_dec_f         = 0;
+  double         elapsed_time_dec_s         = 0;
+  double         elapsed_time_dec_c         = 0;
+  double         elapsed_time_dec_c_flood   = 0;
+  double         elapsed_time_dec_avx       = 0;
+  double         elapsed_time_dec_avx_flood = 0;
+  int            n_error_words_f            = 0;
+  int            n_error_words_s            = 0;
+  int            n_error_words_c            = 0;
+  int            n_error_words_c_flood      = 0;
+  int            n_error_words_avx          = 0;
+  int            n_error_words_avx_flood    = 0;
+
+  float noise_std_dev = srslte_convert_dB_to_amplitude(-snr);
+
+  int16_t inf15  = (1U << 14U) - 1;
+  float   gain_s = inf15 * noise_std_dev / 20 / (1 / noise_std_dev + 2);
+
+  int8_t inf7   = (1U << 6U) - 1;
+  float  gain_c = inf7 * noise_std_dev / 8 / (1 / noise_std_dev + 2);
+
+  printf("\nBatch:\n  ");
+
+  while (((n_error_words_f < REQ_ERRORS) || (n_error_words_s < REQ_ERRORS) || (n_error_words_c < REQ_ERRORS)) &&
+         (i_batch < MAX_N_BATCH)) {
+    i_batch++;
+
+    if (!(i_batch % 10)) {
+      printf("%8d", i_batch);
+      if (!(i_batch % 90)) {
+        printf("\n  ");
+      }
+    }
+
+    /* generate data_tx */
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK - F; j++) {
+        messages_true[i * finalK + j] = srslte_random_uniform_int_dist(random_gen, 0, 1);
+      }
+      for (; j < finalK; j++) {
+        messages_true[i * finalK + j] = FILLER_BIT;
+      }
+    }
+
+    // compute the number of symbols that we need to encode/decode: closest multiple of
+    // the lifting size that is larger than rm_length
+    // Extra F bits are added since filler-bits are not part of the rm_length
+    int n_useful_symbols =
+        (rm_length + F) % lift_size ? ((rm_length + F) / lift_size + 1) * lift_size : (rm_length + F);
+
+    printf("n_useful_symbols = %d\n", n_useful_symbols);
+
+    // Encode messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_encoder_encode(
+          &encoder, messages_true + j * finalK, codewords + j * finalN, finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_enc += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    // Modulate codewords and match rate (puncturing)
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < rm_length + F; j++) {
+        symbols_rm[i * (rm_length + F) + j] =
+            (codewords[i * finalN + j] == FILLER_BIT) ? INFINITY : 1 - 2 * codewords[i * finalN + j];
+      }
+    }
+
+    // Apply AWGN
+    srslte_ch_awgn_f(symbols_rm, symbols_rm, noise_std_dev, BATCH_SIZE * (rm_length + F));
+
+    // Convert symbols into LLRs
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < rm_length + F; j++) { //+F becouse we have alredy considered fillerbits when modulating.
+        symbols[i * finalN + j] = symbols_rm[i * (rm_length + F) + j] * 2 / (noise_std_dev * noise_std_dev);
+      }
+      // the rest of symbols are undetermined, set LLR to 0
+      for (; j < finalN; j++) {
+        symbols[i * finalN + j] = 0;
+      }
+    }
+
+    //////// Floating point
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_f(&decoder_f, symbols + j * finalN, messages_sim_f + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_f += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_f[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_f++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 16 bit
+    // Quantize LLRs with 16 bits
+    srslte_vec_quant_fs(symbols, symbols_s, gain_s, 0, inf15, BATCH_SIZE * finalN);
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_s(&decoder_s, symbols_s + j * finalN, messages_sim_s + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_s += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_s[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_s++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 8 bit
+    // Quantize LLRs with 8 bits
+    srslte_vec_quant_fc(symbols, symbols_c, gain_c, 0, inf7, BATCH_SIZE * finalN);
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(&decoder_c, symbols_c + j * finalN, messages_sim_c + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_c += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_c[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_c++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 8 bit, flooded scheduling
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(
+          &decoder_c_flood, symbols_c + j * finalN, messages_sim_c_flood + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_c_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_c_flood[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_c_flood++;
+          break;
+        }
+      }
+    }
+
+#ifdef LV_HAVE_AVX2
+    //////// Fixed point - 8 bit - AVX2 version
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(
+          &decoder_avx, symbols_c + j * finalN, messages_sim_avx + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_avx += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_avx[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_avx++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 8 bit, flooded scheduling - AVX2 version
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(
+          &decoder_avx_flood, symbols_c + j * finalN, messages_sim_avx_flood + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_avx_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_avx_flood[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_avx_flood++;
+          break;
+        }
+      }
+    }
+#endif // LV_HAVE_AVX2
+  }
+
+  printf("\nEstimated throughput encoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         i_batch * BATCH_SIZE / elapsed_time_enc,
+         i_batch * BATCH_SIZE * finalK / elapsed_time_enc,
+         i_batch * BATCH_SIZE * finalN / elapsed_time_enc);
+
+  print_decoder("FLOATING POINT", i_batch, n_error_words_f, elapsed_time_dec_f);
+  print_decoder("FIXED POINT (16 bits)", i_batch, n_error_words_s, elapsed_time_dec_s);
+  print_decoder("FIXED POINT (8 bits)", i_batch, n_error_words_c, elapsed_time_dec_c);
+  print_decoder("FIXED POINT (8 bits, flooded scheduling)", i_batch, n_error_words_c_flood, elapsed_time_dec_c_flood);
+
+#ifdef LV_HAVE_AVX2
+  print_decoder("FIXED POINT (8 bits - AVX2)", i_batch, n_error_words_avx, elapsed_time_dec_avx);
+  print_decoder(
+      "FIXED POINT (8 bits, flooded scheduling - AVX2)", i_batch, n_error_words_avx_flood, elapsed_time_dec_avx_flood);
+#endif // LV_HAVE_AVX2
+
+  if (n_error_words_s > 10 * n_error_words_f) {
+    perror("16-bit performance too low!");
+    exit(-1);
+  }
+  if (n_error_words_c > 10 * n_error_words_f) {
+    perror("8-bit performance too low!");
+    exit(-1);
+  }
+  printf("\nTest completed successfully!\n\n");
+
+  free(symbols_c);
+  free(symbols_s);
+  free(symbols);
+  free(codewords);
+  free(messages_sim_avx);
+  free(messages_sim_c_flood);
+  free(messages_sim_c);
+  free(messages_sim_s);
+  free(messages_sim_f);
+  free(messages_true);
+  srslte_random_free(random_gen);
+#ifdef LV_HAVE_AVX2
+  srslte_ldpc_decoder_free(&decoder_avx);
+#endif // LV_HAVE_AVX2
+  srslte_ldpc_decoder_free(&decoder_c_flood);
+  srslte_ldpc_decoder_free(&decoder_c);
+  srslte_ldpc_decoder_free(&decoder_s);
+  srslte_ldpc_decoder_free(&decoder_f);
+  srslte_ldpc_encoder_free(&encoder);
+}
+
+void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time)
+{
+  printf("\n**** %s ****", title);
+  printf("\nEstimated word error rate:\n  %e (%d errors)\n", (double)n_errors / n_batches / BATCH_SIZE, n_errors);
+
+  printf("Estimated throughput decoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         n_batches * BATCH_SIZE / elapsed_time,
+         n_batches * BATCH_SIZE * finalK / elapsed_time,
+         n_batches * BATCH_SIZE * finalN / elapsed_time);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_dec_avx2_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_dec_avx2_test.c
@ -0,0 +1,236 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_avx2_test.c
+ * \brief Unit test for the LDPC decoder working with 8-bit integer-valued LLRs (AVX2 implementation).
+ *
+ * It decodes a batch of example codewords and compares the resulting messages
+ * with the expected ones. Reference messages and codewords are provided in
+ * files **examplesBG1.dat** and **examplesBG2.dat**.
+ *
+ * Synopsis: **ldpc_dec_c_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
+#include "srslte/phy/utils/debug.h"
+
+srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
+int                lift_size  = 2;   /*!< \brief Lifting Size. */
+int                finalK;           /*!< \brief Number of uncoded bits (message length). */
+int                finalN;           /*!< \brief Number of coded bits (codeword length). */
+int                scheduling = 0;   /*!< \brief Message scheduling (0 for layered, 1 for flooded). */
+
+#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+  printf("\t-x Scheduling [Default %c]\n", scheduling);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:x:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'x':
+        scheduling = (int)strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Reads the example file.
+ */
+void get_examples(uint8_t* messages, //
+                  uint8_t* codewords,
+                  FILE*    ex_file)
+{
+  char mstr[15]; // message string
+  char cstr[15]; // codeword string
+  char tmp[15];
+  int  i = 0;
+  int  j = 0;
+
+  sprintf(mstr, "ls%dmsgs", lift_size);
+  sprintf(cstr, "ls%dcwds", lift_size);
+  do {
+    do {
+      tmp[0] = fgetc(ex_file);
+    } while (tmp[0] != 'l');
+    fscanf(ex_file, "%[^\n]", tmp + 1);
+    fgetc(ex_file); // discard newline
+  } while (strcmp(tmp, mstr) != 0);
+
+  // read messages
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalK; i++) {
+      int rc                   = fgetc(ex_file);
+      messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+
+  fscanf(ex_file, "%[^\n]", tmp);
+  if (strcmp(tmp, cstr) != 0) {
+    printf("Something went wrong while reading example file.\n");
+    exit(-1);
+  }
+  fgetc(ex_file); // discard newline
+
+  // read codewords
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalN; i++) {
+      int rc                    = fgetc(ex_file);
+      codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+}
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* messages_true = NULL;
+  uint8_t* messages_sim  = NULL;
+  uint8_t* codewords     = NULL;
+  int8_t*  symbols       = NULL;
+  int      i             = 0;
+  int      j             = 0;
+
+  FILE* ex_file = NULL;
+  char  file_name[1000];
+
+  parse_args(argc, argv);
+
+  srslte_ldpc_decoder_type_t dectype =
+      (scheduling == 0) ? SRSLTE_LDPC_DECODER_C_AVX2 : SRSLTE_LDPC_DECODER_C_AVX2_FLOOD;
+
+  // create an LDPC decoder
+  srslte_ldpc_decoder_t decoder;
+  if (srslte_ldpc_decoder_init(&decoder, dectype, base_graph, lift_size, 1) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+
+  printf("Test LDPC decoder:\n");
+  printf("  Base Graph      -> BG%d\n", decoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", decoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
+  printf("  Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         decoder.liftK,
+         decoder.liftN - 2 * lift_size,
+         decoder.bg == BG1 ? 3 : 5);
+  printf("  Scheduling: %s\n", scheduling ? "flooded" : "layered");
+
+  finalK = decoder.liftK;
+  finalN = decoder.liftN - 2 * lift_size;
+
+  messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  messages_sim  = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  codewords     = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  symbols       = malloc(finalN * NOF_MESSAGES * sizeof(int8_t));
+  if (!messages_true || !messages_sim || !codewords || !symbols) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
+  printf("\nReading example file %s...\n", file_name);
+  ex_file = fopen(file_name, "re");
+  if (ex_file == NULL) {
+    perror("fopen");
+    exit(-1);
+  }
+
+  get_examples(messages_true, codewords, ex_file);
+
+  fclose(ex_file);
+
+  for (i = 0; i < NOF_MESSAGES * finalN; i++) {
+    symbols[i] = codewords[i] == 1 ? -2 : 2;
+  }
+
+  printf("\nDecoding test messages...\n");
+  struct timeval t[3];
+  gettimeofday(&t[1], NULL);
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    printf("  codeword %d\n", j);
+    srslte_ldpc_decoder_decode_c(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
+  }
+  gettimeofday(&t[2], NULL);
+  get_time_interval(t);
+  double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
+  printf("Elapsed time: %e s\n", elapsed_time);
+
+  printf("\nVerifing results...\n");
+  for (i = 0; i < NOF_MESSAGES * finalK; i++) {
+    if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
+      perror("wrong!!");
+      exit(-1);
+    }
+  }
+
+  printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         NOF_MESSAGES / elapsed_time,
+         NOF_MESSAGES * finalK / elapsed_time,
+         NOF_MESSAGES * finalN / elapsed_time);
+
+  printf("\nTest completed successfully!\n\n");
+
+  free(symbols);
+  free(codewords);
+  free(messages_sim);
+  free(messages_true);
+  srslte_ldpc_decoder_free(&decoder);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_dec_c_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_dec_c_test.c
@ -0,0 +1,235 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_c_test.c
+ * \brief Unit test for the LDPC decoder working with 8-bit integer-valued LLRs.
+ *
+ * It decodes a batch of example codewords and compares the resulting messages
+ * with the expected ones. Reference messages and codewords are provided in
+ * files **examplesBG1.dat** and **examplesBG2.dat**.
+ *
+ * Synopsis: **ldpc_dec_c_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
+#include "srslte/phy/utils/debug.h"
+
+srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
+int                lift_size  = 2;   /*!< \brief Lifting Size. */
+int                finalK;           /*!< \brief Number of uncoded bits (message length). */
+int                finalN;           /*!< \brief Number of coded bits (codeword length). */
+int                scheduling = 0;   /*!< \brief Message scheduling (0 for layered, 1 for flooded). */
+
+#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+  printf("\t-x Scheduling [Default %c]\n", scheduling);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:x:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'x':
+        scheduling = (int)strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Reads the example file.
+ */
+void get_examples(uint8_t* messages, //
+                  uint8_t* codewords,
+                  FILE*    ex_file)
+{
+  char mstr[15]; // message string
+  char cstr[15]; // codeword string
+  char tmp[15];
+  int  i = 0;
+  int  j = 0;
+
+  sprintf(mstr, "ls%dmsgs", lift_size);
+  sprintf(cstr, "ls%dcwds", lift_size);
+  do {
+    do {
+      tmp[0] = fgetc(ex_file);
+    } while (tmp[0] != 'l');
+    fscanf(ex_file, "%[^\n]", tmp + 1);
+    fgetc(ex_file); // discard newline
+  } while (strcmp(tmp, mstr) != 0);
+
+  // read messages
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalK; i++) {
+      int rc                   = fgetc(ex_file);
+      messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+
+  fscanf(ex_file, "%[^\n]", tmp);
+  if (strcmp(tmp, cstr) != 0) {
+    printf("Something went wrong while reading example file.\n");
+    exit(-1);
+  }
+  fgetc(ex_file); // discard newline
+
+  // read codewords
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalN; i++) {
+      int rc                    = fgetc(ex_file);
+      codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+}
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* messages_true = NULL;
+  uint8_t* messages_sim  = NULL;
+  uint8_t* codewords     = NULL;
+  int8_t*  symbols       = NULL;
+  int      i             = 0;
+  int      j             = 0;
+
+  FILE* ex_file = NULL;
+  char  file_name[1000];
+
+  parse_args(argc, argv);
+
+  srslte_ldpc_decoder_type_t dectype = (scheduling == 0) ? SRSLTE_LDPC_DECODER_C : SRSLTE_LDPC_DECODER_C_FLOOD;
+
+  // create an LDPC decoder
+  srslte_ldpc_decoder_t decoder;
+  if (srslte_ldpc_decoder_init(&decoder, dectype, base_graph, lift_size, 1) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+
+  printf("Test LDPC decoder:\n");
+  printf("  Base Graph      -> BG%d\n", decoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", decoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
+  printf("  Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         decoder.liftK,
+         decoder.liftN - 2 * lift_size,
+         decoder.bg == BG1 ? 3 : 5);
+  printf("  Scheduling: %s\n", scheduling ? "flooded" : "layered");
+
+  finalK = decoder.liftK;
+  finalN = decoder.liftN - 2 * lift_size;
+
+  messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  messages_sim  = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  codewords     = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  symbols       = malloc(finalN * NOF_MESSAGES * sizeof(int8_t));
+  if (!messages_true || !messages_sim || !codewords || !symbols) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
+  printf("\nReading example file %s...\n", file_name);
+  ex_file = fopen(file_name, "re");
+  if (ex_file == NULL) {
+    perror("fopen");
+    exit(-1);
+  }
+
+  get_examples(messages_true, codewords, ex_file);
+
+  fclose(ex_file);
+
+  for (i = 0; i < NOF_MESSAGES * finalN; i++) {
+    symbols[i] = codewords[i] == 1 ? -2 : 2;
+  }
+
+  printf("\nDecoding test messages...\n");
+  struct timeval t[3];
+  gettimeofday(&t[1], NULL);
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    printf("  codeword %d\n", j);
+    srslte_ldpc_decoder_decode_c(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
+  }
+  gettimeofday(&t[2], NULL);
+  get_time_interval(t);
+  double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
+  printf("Elapsed time: %e s\n", elapsed_time);
+
+  printf("\nVerifing results...\n");
+  for (i = 0; i < NOF_MESSAGES * finalK; i++) {
+    if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
+      perror("wrong!!");
+      exit(-1);
+    }
+  }
+
+  printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         NOF_MESSAGES / elapsed_time,
+         NOF_MESSAGES * finalK / elapsed_time,
+         NOF_MESSAGES * finalN / elapsed_time);
+
+  printf("\nTest completed successfully!\n\n");
+
+  free(symbols);
+  free(codewords);
+  free(messages_sim);
+  free(messages_true);
+  srslte_ldpc_decoder_free(&decoder);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_dec_s_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_dec_s_test.c
@ -0,0 +1,227 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_s_test.c
+ * \brief Unit test for the LDPC decoder working with 16-bit integer-valued LLRs.
+ *
+ * It decodes a batch of example codewords and compares the resulting messages
+ * with the expected ones. Reference messages and codewords are provided in
+ * files **examplesBG1.dat** and **examplesBG2.dat**.
+ *
+ * Synopsis: **ldpc_dec_s_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
+#include "srslte/phy/utils/debug.h"
+
+srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
+int                lift_size  = 2;   /*!< \brief Lifting Size. */
+int                finalK;           /*!< \brief Number of uncoded bits (message length). */
+int                finalN;           /*!< \brief Number of coded bits (codeword length). */
+
+#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Reads the example file.
+ */
+void get_examples(uint8_t* messages, //
+                  uint8_t* codewords,
+                  FILE*    ex_file)
+{
+  char mstr[15]; // message string
+  char cstr[15]; // codeword string
+  char tmp[15];
+  int  i = 0;
+  int  j = 0;
+
+  sprintf(mstr, "ls%dmsgs", lift_size);
+  sprintf(cstr, "ls%dcwds", lift_size);
+  do {
+    do {
+      tmp[0] = fgetc(ex_file);
+    } while (tmp[0] != 'l');
+    fscanf(ex_file, "%[^\n]", tmp + 1);
+    fgetc(ex_file); // discard newline
+  } while (strcmp(tmp, mstr) != 0);
+
+  // read messages
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalK; i++) {
+      int rc                   = fgetc(ex_file);
+      messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+
+  fscanf(ex_file, "%[^\n]", tmp);
+  if (strcmp(tmp, cstr) != 0) {
+    printf("Something went wrong while reading example file.\n");
+    exit(-1);
+  }
+  fgetc(ex_file); // discard newline
+
+  // read codewords
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalN; i++) {
+      int rc                    = fgetc(ex_file);
+      codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+}
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* messages_true = NULL;
+  uint8_t* messages_sim  = NULL;
+  uint8_t* codewords     = NULL;
+  int16_t* symbols       = NULL;
+  int      i             = 0;
+  int      j             = 0;
+
+  FILE* ex_file = NULL;
+  char  file_name[1000];
+
+  parse_args(argc, argv);
+
+  // create an LDPC decoder
+  srslte_ldpc_decoder_t decoder;
+  if (srslte_ldpc_decoder_init(&decoder, SRSLTE_LDPC_DECODER_S, base_graph, lift_size, 1) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+
+  printf("Test LDPC decoder:\n");
+  printf("  Base Graph      -> BG%d\n", decoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", decoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
+  printf("  Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         decoder.liftK,
+         decoder.liftN - 2 * lift_size,
+         decoder.bg == BG1 ? 3 : 5);
+
+  finalK = decoder.liftK;
+  finalN = decoder.liftN - 2 * lift_size;
+
+  messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  messages_sim  = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  codewords     = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  symbols       = malloc(finalN * NOF_MESSAGES * sizeof(int16_t));
+  if (!messages_true || !messages_sim || !codewords || !symbols) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
+  printf("\nReading example file %s...\n", file_name);
+  ex_file = fopen(file_name, "re");
+  if (ex_file == NULL) {
+    perror("fopen");
+    exit(-1);
+  }
+
+  get_examples(messages_true, codewords, ex_file);
+
+  fclose(ex_file);
+
+  for (i = 0; i < NOF_MESSAGES * finalN; i++) {
+    symbols[i] = codewords[i] == 1 ? -50 : 50;
+  }
+
+  printf("\nDecoding test messages...\n");
+  struct timeval t[3];
+  gettimeofday(&t[1], NULL);
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    printf("  codeword %d\n", j);
+    srslte_ldpc_decoder_decode_s(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
+  }
+  gettimeofday(&t[2], NULL);
+  get_time_interval(t);
+  double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
+  printf("Elapsed time: %e s\n", elapsed_time);
+
+  printf("\nVerifing results...\n");
+  for (i = 0; i < NOF_MESSAGES * finalK; i++) {
+    if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
+      perror("wrong!!");
+      exit(-1);
+    }
+  }
+
+  printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         NOF_MESSAGES / elapsed_time,
+         NOF_MESSAGES * finalK / elapsed_time,
+         NOF_MESSAGES * finalN / elapsed_time);
+
+  printf("\nTest completed successfully!\n\n");
+
+  free(symbols);
+  free(codewords);
+  free(messages_sim);
+  free(messages_true);
+  srslte_ldpc_decoder_free(&decoder);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_dec_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_dec_test.c
@ -0,0 +1,227 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_dec_test.c
+ * \brief Unit test for the LDPC decoder working with float-valued LLRs.
+ *
+ * It decodes a batch of example codewords and compares the resulting messages
+ * with the expected ones. Reference messages and codewords are provided in
+ * files **examplesBG1.dat** and **examplesBG2.dat**.
+ *
+ * Synopsis: **ldpc_dec_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
+#include "srslte/phy/utils/debug.h"
+
+srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
+int                lift_size  = 2;   /*!< \brief Lifting Size. */
+int                finalK;           /*!< \brief Number of uncoded bits (message length). */
+int                finalN;           /*!< \brief Number of coded bits (codeword length). */
+
+#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Reads the example file.
+ */
+void get_examples(uint8_t* messages, //
+                  uint8_t* codewords,
+                  FILE*    ex_file)
+{
+  char mstr[15]; // message string
+  char cstr[15]; // codeword string
+  char tmp[15];
+  int  i = 0;
+  int  j = 0;
+
+  sprintf(mstr, "ls%dmsgs", lift_size);
+  sprintf(cstr, "ls%dcwds", lift_size);
+  do {
+    do {
+      tmp[0] = fgetc(ex_file);
+    } while (tmp[0] != 'l');
+    fscanf(ex_file, "%[^\n]", tmp + 1);
+    fgetc(ex_file); // discard newline
+  } while (strcmp(tmp, mstr) != 0);
+
+  // read messages
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalK; i++) {
+      int rc                   = fgetc(ex_file);
+      messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+
+  fscanf(ex_file, "%[^\n]", tmp);
+  if (strcmp(tmp, cstr) != 0) {
+    printf("Something went wrong while reading example file.\n");
+    exit(-1);
+  }
+  fgetc(ex_file); // discard newline
+
+  // read codewords
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalN; i++) {
+      int rc                    = fgetc(ex_file);
+      codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+}
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* messages_true = NULL;
+  uint8_t* messages_sim  = NULL;
+  uint8_t* codewords     = NULL;
+  float*   symbols       = NULL;
+  int      i             = 0;
+  int      j             = 0;
+
+  FILE* ex_file = NULL;
+  char  file_name[1000];
+
+  parse_args(argc, argv);
+
+  // create an LDPC decoder
+  srslte_ldpc_decoder_t decoder;
+  if (srslte_ldpc_decoder_init(&decoder, SRSLTE_LDPC_DECODER_F, base_graph, lift_size, 1) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+
+  printf("Test LDPC decoder:\n");
+  printf("  Base Graph      -> BG%d\n", decoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", decoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", decoder.bgM, decoder.bgN, decoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", decoder.liftM, decoder.liftN, decoder.liftK);
+  printf("  Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         decoder.liftK,
+         decoder.liftN - 2 * lift_size,
+         decoder.bg == BG1 ? 3 : 5);
+
+  finalK = decoder.liftK;
+  finalN = decoder.liftN - 2 * lift_size;
+
+  messages_true = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  messages_sim  = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  codewords     = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  symbols       = malloc(finalN * NOF_MESSAGES * sizeof(float));
+  if (!messages_true || !messages_sim || !codewords || !symbols) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
+  printf("\nReading example file %s...\n", file_name);
+  ex_file = fopen(file_name, "re");
+  if (ex_file == NULL) {
+    perror("fopen");
+    exit(-1);
+  }
+
+  get_examples(messages_true, codewords, ex_file);
+
+  fclose(ex_file);
+
+  for (i = 0; i < NOF_MESSAGES * finalN; i++) {
+    symbols[i] = codewords[i] == 1 ? -50 : 50;
+  }
+
+  printf("\nDecoding test messages...\n");
+  struct timeval t[3];
+  gettimeofday(&t[1], NULL);
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    printf("  codeword %d\n", j);
+    srslte_ldpc_decoder_decode_f(&decoder, symbols + j * finalN, messages_sim + j * finalK, finalN);
+  }
+  gettimeofday(&t[2], NULL);
+  get_time_interval(t);
+  double elapsed_time = t[0].tv_sec + 1e-6 * t[0].tv_usec;
+  printf("Elapsed time: %e s\n", elapsed_time);
+
+  printf("\nVerifing results...\n");
+  for (i = 0; i < NOF_MESSAGES * finalK; i++) {
+    if ((1U & messages_sim[i]) != (1U & messages_true[i])) {
+      perror("wrong!!");
+      exit(-1);
+    }
+  }
+
+  printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         NOF_MESSAGES / elapsed_time,
+         NOF_MESSAGES * finalK / elapsed_time,
+         NOF_MESSAGES * finalN / elapsed_time);
+
+  printf("\nTest completed successfully!\n\n");
+
+  free(symbols);
+  free(codewords);
+  free(messages_sim);
+  free(messages_true);
+  srslte_ldpc_decoder_free(&decoder);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_enc_avx2_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_enc_avx2_test.c
@ -0,0 +1,226 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_enc_avx2_test.c
+ * \brief Unit test for the LDPC encoder (SIMD-optimized version).
+ *
+ * It encodes a batch of example messages and compares the resulting codewords
+ * with the expected ones. Reference messages and codewords are provided in
+ * files **examplesBG1.dat** and **examplesBG2.dat**.
+ *
+ * Synopsis: **ldpc_enc_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/utils/debug.h"
+
+srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
+int                lift_size  = 2;   /*!< \brief Lifting Size. */
+int                finalK;           /*!< \brief Number of uncoded bits (message length). */
+int                finalN;           /*!< \brief Number of coded bits (codeword length). */
+
+#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
+#define NOF_REPS 1000   /*!< \brief Number of times tests are repeated (for computing throughput). */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Reads the example file.
+ */
+void get_examples(uint8_t* messages, //
+                  uint8_t* codewords,
+                  FILE*    ex_file)
+{
+  char mstr[15]; // message string
+  char cstr[15]; // codeword string
+  char tmp[15];
+  int  i = 0;
+  int  j = 0;
+
+  sprintf(mstr, "ls%dmsgs", lift_size);
+  sprintf(cstr, "ls%dcwds", lift_size);
+  do {
+    do {
+      tmp[0] = fgetc(ex_file);
+    } while (tmp[0] != 'l');
+    fscanf(ex_file, "%[^\n]", tmp + 1);
+    fgetc(ex_file); // discard newline
+  } while (strcmp(tmp, mstr) != 0);
+
+  // read messages
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalK; i++) {
+      int rc                   = fgetc(ex_file);
+      messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+
+  fscanf(ex_file, "%[^\n]", tmp);
+  if (strcmp(tmp, cstr) != 0) {
+    printf("Something went wrong while reading example file.\n");
+    exit(-1);
+  }
+  fgetc(ex_file); // discard newline
+
+  // read codewords
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalN; i++) {
+      int rc                    = fgetc(ex_file);
+      codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+}
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* messages       = NULL;
+  uint8_t* codewords_true = NULL;
+  uint8_t* codewords_sim  = NULL;
+
+  int i = 0;
+  int j = 0;
+  int l = 0;
+
+  FILE* ex_file = NULL;
+  char  file_name[1000];
+
+  parse_args(argc, argv);
+
+  // create an LDPC encoder
+  srslte_ldpc_encoder_t encoder;
+  if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_AVX2, base_graph, lift_size) != 0) {
+    perror("encoder init");
+    exit(-1);
+  }
+
+  printf("Test LDPC encoder:\n");
+  printf("  Base Graph      -> BG%d\n", encoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", encoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
+  printf("  Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         encoder.liftK,
+         encoder.liftN - 2 * lift_size,
+         encoder.bg == BG1 ? 3 : 5);
+
+  finalK = encoder.liftK;
+  finalN = encoder.liftN - 2 * lift_size;
+
+  messages       = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  codewords_true = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  codewords_sim  = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  if (!messages || !codewords_true || !codewords_sim) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
+  printf("\nReading example file %s...\n", file_name);
+  ex_file = fopen(file_name, "re");
+  if (ex_file == NULL) {
+    perror("fopen");
+    exit(-1);
+  }
+
+  get_examples(messages, codewords_true, ex_file);
+
+  fclose(ex_file);
+
+  printf("\nEncoding test messages...\n");
+  struct timeval t[3];
+  double         elapsed_time = 0;
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    printf("  codeword %d\n", j);
+    gettimeofday(&t[1], NULL);
+    for (l = 0; l < NOF_REPS; l++) {
+      srslte_ldpc_encoder_encode(&encoder, messages + j * finalK, codewords_sim + j * finalN, finalK, finalN);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+  }
+  printf("Elapsed time: %e s\n", elapsed_time / NOF_REPS);
+
+  printf("\nVerifing results...\n");
+  for (i = 0; i < NOF_MESSAGES * finalN; i++) {
+    if (codewords_sim[i] != codewords_true[i]) {
+      perror("wrong!!");
+      exit(-1);
+    }
+  }
+
+  printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         NOF_MESSAGES / (elapsed_time / NOF_REPS),
+         NOF_MESSAGES * finalK / (elapsed_time / NOF_REPS),
+         NOF_MESSAGES * finalN / (elapsed_time / NOF_REPS));
+
+  printf("\nTest completed successfully!\n\n");
+
+  free(codewords_sim);
+  free(codewords_true);
+  free(messages);
+  srslte_ldpc_encoder_free(&encoder);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_enc_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_enc_test.c
@ -0,0 +1,226 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_enc_test.c
+ * \brief Unit test for the LDPC encoder.
+ *
+ * It encodes a batch of example messages and compares the resulting codewords
+ * with the expected ones. Reference messages and codewords are provided in
+ * files **examplesBG1.dat** and **examplesBG2.dat**.
+ *
+ * Synopsis: **ldpc_enc_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/utils/debug.h"
+
+srslte_basegraph_t base_graph = BG1; /*!< \brief Base Graph (BG1 or BG2). */
+int                lift_size  = 2;   /*!< \brief Lifting Size. */
+int                finalK;           /*!< \brief Number of uncoded bits (message length). */
+int                finalN;           /*!< \brief Number of coded bits (codeword length). */
+
+#define NOF_MESSAGES 10 /*!< \brief Number of codewords in the test. */
+#define NOF_REPS 1000   /*!< \brief Number of times tests are repeated (for computing throughput). */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Reads the example file.
+ */
+void get_examples(uint8_t* messages, //
+                  uint8_t* codewords,
+                  FILE*    ex_file)
+{
+  char mstr[15]; // message string
+  char cstr[15]; // codeword string
+  char tmp[15];
+  int  i = 0;
+  int  j = 0;
+
+  sprintf(mstr, "ls%dmsgs", lift_size);
+  sprintf(cstr, "ls%dcwds", lift_size);
+  do {
+    do {
+      tmp[0] = fgetc(ex_file);
+    } while (tmp[0] != 'l');
+    fscanf(ex_file, "%[^\n]", tmp + 1);
+    fgetc(ex_file); // discard newline
+  } while (strcmp(tmp, mstr) != 0);
+
+  // read messages
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalK; i++) {
+      int rc                   = fgetc(ex_file);
+      messages[j * finalK + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+
+  fscanf(ex_file, "%[^\n]", tmp);
+  if (strcmp(tmp, cstr) != 0) {
+    printf("Something went wrong while reading example file.\n");
+    exit(-1);
+  }
+  fgetc(ex_file); // discard newline
+
+  // read codewords
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    for (i = 0; i < finalN; i++) {
+      int rc                    = fgetc(ex_file);
+      codewords[j * finalN + i] = (uint8_t)(rc == '-' ? FILLER_BIT : rc - '0');
+    }
+    fgetc(ex_file); // discard newline
+  }
+}
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* messages       = NULL;
+  uint8_t* codewords_true = NULL;
+  uint8_t* codewords_sim  = NULL;
+
+  int i = 0;
+  int j = 0;
+  int l = 0;
+
+  FILE* ex_file = NULL;
+  char  file_name[1000];
+
+  parse_args(argc, argv);
+
+  // create an LDPC encoder
+  srslte_ldpc_encoder_t encoder;
+  if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
+    perror("encoder init");
+    exit(-1);
+  }
+
+  printf("Test LDPC encoder:\n");
+  printf("  Base Graph      -> BG%d\n", encoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", encoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
+  printf("  Final code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         encoder.liftK,
+         encoder.liftN - 2 * lift_size,
+         encoder.bg == BG1 ? 3 : 5);
+
+  finalK = encoder.liftK;
+  finalN = encoder.liftN - 2 * lift_size;
+
+  messages       = malloc(finalK * NOF_MESSAGES * sizeof(uint8_t));
+  codewords_true = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  codewords_sim  = malloc(finalN * NOF_MESSAGES * sizeof(uint8_t));
+  if (!messages || !codewords_true || !codewords_sim) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  sprintf(file_name, "examplesBG%d.dat", base_graph + 1);
+  printf("\nReading example file %s...\n", file_name);
+  ex_file = fopen(file_name, "re");
+  if (ex_file == NULL) {
+    perror("fopen");
+    exit(-1);
+  }
+
+  get_examples(messages, codewords_true, ex_file);
+
+  fclose(ex_file);
+
+  printf("\nEncoding test messages...\n");
+  struct timeval t[3];
+  double         elapsed_time = 0;
+  for (j = 0; j < NOF_MESSAGES; j++) {
+    printf("  codeword %d\n", j);
+    gettimeofday(&t[1], NULL);
+    for (l = 0; l < NOF_REPS; l++) {
+      srslte_ldpc_encoder_encode(&encoder, messages + j * finalK, codewords_sim + j * finalN, finalK, finalN);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+  }
+  printf("Elapsed time: %e s\n", elapsed_time / NOF_REPS);
+
+  printf("\nVerifing results...\n");
+  for (i = 0; i < NOF_MESSAGES * finalN; i++) {
+    if (codewords_sim[i] != codewords_true[i]) {
+      perror("wrong!!");
+      exit(-1);
+    }
+  }
+
+  printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         NOF_MESSAGES / (elapsed_time / NOF_REPS),
+         NOF_MESSAGES * finalK / (elapsed_time / NOF_REPS),
+         NOF_MESSAGES * finalN / (elapsed_time / NOF_REPS));
+
+  printf("\nTest completed successfully!\n\n");
+
+  free(codewords_sim);
+  free(codewords_true);
+  free(messages);
+  srslte_ldpc_encoder_free(&encoder);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_rm_chain_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_rm_chain_test.c
@ -0,0 +1,646 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_rm_chain_test.c
+ * \brief End-to-end test for LDPC encoder, rate-matcher, rate-dematcher and decoder.
+ *
+ * A batch of example messages is randomly generated, encoded, rate-matched, 2-PAM modulated,
+ * sent over an AWGN channel and, finally, rate-dematched and decoded by all three types of
+ * decoder. Transmitted and received messages are compared to estimate the WER.
+ * Multiple batches are simulated if the number of errors is not significant
+ * enough.
+ *
+ *
+ * Synopsis: **ldpc_rm_chain_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ *  - **-e \<number\>** Codeword length after rate matching (set to 0 [default] for full rate).
+ *  - **-f \<number\>** Number of filler bits (Default 17).
+ *  - **-r \<number\>** Redundancy version {0-3}.
+ *  - **-m \<number\>** Modulation type BPSK = 0, QPSK =1, QAM16 = 2, QAM64 = 3, QAM256 = 4.
+ *  - **-M \<number\>** Limited buffer size.
+ *  - **-s \<number\>** SNR in dB(Default 3 dB).
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/channel/ch_awgn.h"
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_decoder.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/fec/ldpc/ldpc_rm.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/random.h"
+#include "srslte/phy/utils/vector.h"
+
+srslte_basegraph_t base_graph = BG1;    /*!< \brief Base Graph (BG1 or BG2). */
+uint32_t           lift_size  = 2;      /*!< \brief Lifting Size. */
+uint32_t           rm_length  = 0;      /*!< \brief Codeword length after rate matching. */
+uint32_t           F          = 22 - 5; /*!< \brief Number of filler bits in each CBS. */
+uint32_t           E          = 14000;  /*!< \brief Rate-matched Codeword size. */
+uint8_t            rv         = 0;      /*!< \brief Redundancy version {0-3}. */
+mod_type_t         mod_type   = BPSK;   /*!< \brief Modulation type: BPSK, QPSK, QAM16, QAM64, QAM256 = 4 */
+uint32_t           Nref       = 0;      /*!< \brief Limited buffer size. */
+float              snr        = 0;      /*!< \brief Signal-to-Noise Ratio [dB]. */
+
+int finalK = 0; /*!< \brief Number of uncoded bits (message length, including punctured and filler bits). */
+int finalN = 0; /*!< \brief Number of coded bits (codeword length). */
+
+#define BATCH_SIZE 100    /*!< \brief Number of codewords in a batch. */
+#define MAX_N_BATCH 10000 /*!< \brief Max number of simulated batches. */
+#define REQ_ERRORS 100    /*!< \brief Minimum number of errors for a significant simulation. */
+#define MS_SF 0.75f       /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
+
+/*!
+ * \brief Prints test help when wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+
+  printf("Usage: %s [-bX] [-lX] [-eX] [-fX] [-rX] [-mX] [-MX] [sX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+  printf("\t-e Word length after rate matching [Default %d (no rate matching i.e. E = N - F)]\n", rm_length);
+  printf("\t-f Filler bits size (F) [Default %d]\n", F);
+  printf("\t-r Redundancy version (rv) [Default %d]\n", rv);
+  printf("\t-m Modulation_type BPSK=0, QPSK=1, 16QAM=2, 64QAM=3, 256QAM = 4 [Default %d]\n", mod_type);
+  printf("\t-M Limited buffer size (Nref) [Default = %d (normal buffer Nref = N)]\n", Nref);
+  printf("\t-s SNR [dB, Default %.1f dB]\n", snr);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:e:f:r:m:M:s:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (int)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'e':
+        rm_length = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'f':
+        F = (uint32_t)strtol(optarg, NULL, 10);
+        break;
+      case 'r':
+        rv = (uint8_t)strtol(optarg, NULL, 10);
+        break;
+      case 'm':
+        mod_type = (mod_type_t)strtol(optarg, NULL, 10);
+        break;
+      case 'M':
+        Nref = (uint32_t)strtol(optarg, NULL, 10);
+        break;
+      case 's':
+        snr = strtod(optarg, NULL);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Prints decoder statistics.
+ */
+void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time);
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+
+  uint8_t* messages_true          = NULL;
+  uint8_t* messages_sim_f         = NULL;
+  uint8_t* messages_sim_s         = NULL;
+  uint8_t* messages_sim_c         = NULL;
+  uint8_t* messages_sim_c_flood   = NULL;
+  uint8_t* messages_sim_avx       = NULL;
+  uint8_t* messages_sim_avx_flood = NULL;
+  uint8_t* codewords              = NULL;
+  uint8_t* rm_codewords           = NULL;
+  float*   rm_symbols             = NULL;
+  int16_t* rm_symbols_s           = NULL;
+  int8_t*  rm_symbols_c           = NULL;
+  float*   symbols                = NULL; // unrm_symbols
+  int16_t* symbols_s              = NULL; // unrm_symbols
+  int8_t*  symbols_c              = NULL; // unrm_symbols
+
+  int i = 0;
+  int j = 0;
+
+  parse_args(argc, argv);
+
+  // create an LDPC encoder
+  srslte_ldpc_encoder_t encoder;
+#ifdef LV_HAVE_AVX2
+  if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_AVX2, base_graph, lift_size) != 0) {
+    perror("encoder init");
+    exit(-1);
+  }
+#else  // no AVX2
+  if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
+    perror("encoder init");
+    exit(-1);
+  }
+#endif // LV_HAVE_AVX2
+
+  // create a LDPC rate DeMatcher
+  finalK = encoder.liftK;
+  finalN = encoder.liftN - 2 * lift_size;
+  if (rm_length == 0) {
+    rm_length = finalN - F;
+  }
+  if (Nref == 0) {
+    Nref = finalN;
+  }
+
+  // create a LDPC rate Matcher
+  srslte_ldpc_rm_t rm_tx;
+  if (srslte_ldpc_rm_tx_init(&rm_tx) != 0) {
+    perror("rate matcher init");
+    exit(-1);
+  }
+
+  // create a LDPC rate DeMatcher
+  srslte_ldpc_rm_t rm_rx;
+  if (srslte_ldpc_rm_rx_init_f(&rm_rx) != 0) {
+    perror("rate dematcher init");
+    exit(-1);
+  }
+
+  // create a LDPC rate DeMatcher (int16_t)
+  srslte_ldpc_rm_t rm_rx_s;
+  if (srslte_ldpc_rm_rx_init_s(&rm_rx_s) != 0) {
+    perror("rate dematcher init (int16_t)");
+    exit(-1);
+  }
+
+  // create a LDPC rate DeMatcher (int8_t)
+  srslte_ldpc_rm_t rm_rx_c;
+  if (srslte_ldpc_rm_rx_init_c(&rm_rx_c) != 0) {
+    perror("rate dematcher init (int8_t)");
+    exit(-1);
+  }
+
+  // create an LDPC decoder (float)
+  srslte_ldpc_decoder_t decoder_f;
+  if (srslte_ldpc_decoder_init(&decoder_f, SRSLTE_LDPC_DECODER_F, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+  // create an LDPC decoder (16 bit)
+  srslte_ldpc_decoder_t decoder_s;
+  if (srslte_ldpc_decoder_init(&decoder_s, SRSLTE_LDPC_DECODER_S, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init (int16_t)");
+    exit(-1);
+  }
+  // create an LDPC decoder (8 bit)
+  srslte_ldpc_decoder_t decoder_c;
+  if (srslte_ldpc_decoder_init(&decoder_c, SRSLTE_LDPC_DECODER_C, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init (int8_t)");
+    exit(-1);
+  }
+  // create an LDPC decoder (8 bit, flooded)
+  srslte_ldpc_decoder_t decoder_c_flood;
+  if (srslte_ldpc_decoder_init(&decoder_c_flood, SRSLTE_LDPC_DECODER_C_FLOOD, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+#ifdef LV_HAVE_AVX2
+  // create an LDPC decoder (8 bit, AVX2 version)
+  srslte_ldpc_decoder_t decoder_avx;
+  if (srslte_ldpc_decoder_init(&decoder_avx, SRSLTE_LDPC_DECODER_C_AVX2, base_graph, lift_size, MS_SF) != 0) {
+    perror("decoder init");
+    exit(-1);
+  }
+
+  // create an LDPC decoder (8 bit, flooded scheduling, AVX2 version)
+  srslte_ldpc_decoder_t decoder_avx_flood;
+  if (srslte_ldpc_decoder_init(&decoder_avx_flood, SRSLTE_LDPC_DECODER_C_AVX2_FLOOD, base_graph, lift_size, MS_SF) !=
+      0) {
+    perror("decoder init");
+    exit(-1);
+  }
+#endif // LV_HAVE_AVX2
+
+  // create a random generator
+  srslte_random_t random_gen = srslte_random_init(0);
+
+  printf("Test LDPC chain:\n");
+  printf("  Base Graph      -> BG%d\n", encoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", encoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
+  printf("  Base code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         encoder.liftK,
+         encoder.liftN - 2 * lift_size,
+         encoder.bg == BG1 ? 3 : 5);
+  printf("\n");
+  printf("  Codeblock length             ->   K = %d\n", finalK);
+  printf("  Codeword length              ->   N = %d\n", finalN);
+  printf("  Rate matched codeword length ->   E = %d\n", rm_length);
+  printf("  Number of filler bits        ->   F = %d\n", F);
+  printf("  Redundancy version           ->  rv = %d\n", rv);
+  printf("  Final code rate  -> (K-F)/E = (%d - %d)/%d = %.3f\n",
+         encoder.liftK,
+         F,
+         rm_length,
+         1.0 * (encoder.liftK - F) / rm_length);
+  printf("\n  Signal-to-Noise Ratio -> %.2f dB\n", snr);
+
+  messages_true          = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_f         = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_s         = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_c         = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_c_flood   = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_avx       = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  messages_sim_avx_flood = malloc(finalK * BATCH_SIZE * sizeof(uint8_t));
+  codewords              = malloc(finalN * BATCH_SIZE * sizeof(uint8_t));
+  rm_codewords           = malloc(rm_length * BATCH_SIZE * sizeof(uint8_t));
+  rm_symbols             = malloc(rm_length * BATCH_SIZE * sizeof(float));
+  rm_symbols_s           = malloc(rm_length * BATCH_SIZE * sizeof(uint16_t));
+  rm_symbols_c           = malloc(rm_length * BATCH_SIZE * sizeof(uint8_t));
+
+  symbols   = malloc(finalN * BATCH_SIZE * sizeof(float));
+  symbols_s = malloc(finalN * BATCH_SIZE * sizeof(int16_t));
+  symbols_c = malloc(finalN * BATCH_SIZE * sizeof(int8_t));
+  if (!messages_true || !messages_sim_f || !messages_sim_s || !messages_sim_c || //
+      !messages_sim_avx || !messages_sim_c_flood || !messages_sim_avx_flood ||   //
+      !codewords || !rm_codewords || !rm_symbols || !rm_symbols_s || !rm_symbols_c || !symbols || !symbols_s ||
+      !symbols_c) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  int            i_bit   = 0;
+  int            i_batch = 0;
+  struct timeval t[3];
+  double         elapsed_time_enc           = 0;
+  double         elapsed_time_dec_f         = 0;
+  double         elapsed_time_dec_s         = 0;
+  double         elapsed_time_dec_c         = 0;
+  double         elapsed_time_dec_c_flood   = 0;
+  double         elapsed_time_dec_avx       = 0;
+  double         elapsed_time_dec_avx_flood = 0;
+  int            n_error_words_f            = 0;
+  int            n_error_words_s            = 0;
+  int            n_error_words_c            = 0;
+  int            n_error_words_c_flood      = 0;
+  int            n_error_words_avx          = 0;
+  int            n_error_words_avx_flood    = 0;
+
+  float noise_std_dev = srslte_convert_dB_to_amplitude(-snr);
+
+  int16_t inf15  = (1U << 14U) - 1;
+  float   gain_s = inf15 * noise_std_dev / 20 / (1 / noise_std_dev + 2);
+
+  int8_t inf7   = (1U << 6U) - 1;
+  float  gain_c = inf7 * noise_std_dev / 8 / (1 / noise_std_dev + 2);
+
+  printf("\nBatch:\n  ");
+
+  while (((n_error_words_f < REQ_ERRORS) || (n_error_words_s < REQ_ERRORS) || (n_error_words_c < REQ_ERRORS)) &&
+         (i_batch < MAX_N_BATCH)) {
+    i_batch++;
+
+    if (!(i_batch % 10)) {
+      printf("%8d", i_batch);
+      if (!(i_batch % 90)) {
+        printf("\n  ");
+      }
+    }
+
+    /* generate data_tx */
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK - F; j++) {
+        messages_true[i * finalK + j] = srslte_random_uniform_int_dist(random_gen, 0, 1);
+      }
+      for (; j < finalK; j++) {
+        messages_true[i * finalK + j] = FILLER_BIT;
+      }
+    }
+
+    // lDPC Encoding
+    // compute the number of symbols that we need to encode/decode: at least (E + F) if E+F < N,
+    unsigned int n_useful_symbols = (E + F);
+
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_encoder_encode(
+          &encoder, messages_true + j * finalK, codewords + j * finalN, finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_enc += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    // rate matching
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_rm_tx(&rm_tx,
+                        codewords + j * finalN,
+                        rm_codewords + j * rm_length,
+                        rm_length,
+                        base_graph,
+                        lift_size,
+                        rv,
+                        mod_type,
+                        Nref);
+    }
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < rm_length; j++) {
+        rm_symbols[i * rm_length + j] = 1 - 2 * rm_codewords[i * rm_length + j];
+      }
+    }
+
+    // Apply AWGN
+    srslte_ch_awgn_f(rm_symbols, rm_symbols, noise_std_dev, BATCH_SIZE * rm_length);
+
+    // Convert symbols into LLRs
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < rm_length; j++) {
+        rm_symbols[i * rm_length + j] = rm_symbols[i * rm_length + j] * 2 / (noise_std_dev * noise_std_dev);
+      }
+    }
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      if (srslte_ldpc_rm_rx_f(&rm_rx,
+                              rm_symbols + i * rm_length,
+                              symbols + i * finalN,
+                              rm_length,
+                              F,
+                              base_graph,
+                              lift_size,
+                              rv,
+                              mod_type,
+                              Nref)) {
+        exit(-1);
+      }
+    }
+
+    //////// Floating point
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_f(&decoder_f, symbols + j * finalN, messages_sim_f + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_f += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_f[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_f++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 16 bit
+
+    // Quantize LLRs with 16 bits
+    srslte_vec_quant_fs(rm_symbols, rm_symbols_s, gain_s, 0, inf15, BATCH_SIZE * rm_length);
+
+    // Rate dematcher
+    for (i = 0; i < BATCH_SIZE; i++) {
+      if (srslte_ldpc_rm_rx_s(&rm_rx_s,
+                              rm_symbols_s + i * rm_length,
+                              symbols_s + i * finalN,
+                              rm_length,
+                              F,
+                              base_graph,
+                              lift_size,
+                              rv,
+                              mod_type,
+                              Nref)) {
+        exit(-1);
+      }
+    }
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_s(&decoder_s, symbols_s + j * finalN, messages_sim_s + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_s += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_s[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_s++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 8 bit
+    // Quantize LLRs with 8 bits
+    srslte_vec_quant_fc(rm_symbols, rm_symbols_c, gain_c, 0, inf7, BATCH_SIZE * rm_length);
+
+    // Rate dematcher
+    for (i = 0; i < BATCH_SIZE; i++) {
+      if (srslte_ldpc_rm_rx_c(&rm_rx_c,
+                              rm_symbols_c + i * rm_length,
+                              symbols_c + i * finalN,
+                              rm_length,
+                              F,
+                              base_graph,
+                              lift_size,
+                              rv,
+                              mod_type,
+                              Nref)) {
+        exit(-1);
+      }
+    }
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(&decoder_c, symbols_c + j * finalN, messages_sim_c + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_c += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_c[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_c++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 8 bit, flooded scheduling
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(
+          &decoder_c_flood, symbols_c + j * finalN, messages_sim_c_flood + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_c_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_c_flood[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_c_flood++;
+          break;
+        }
+      }
+    }
+
+#ifdef LV_HAVE_AVX2
+    //////// Fixed point - 8 bit - AVX2 version
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(
+          &decoder_avx, symbols_c + j * finalN, messages_sim_avx + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_avx += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_avx[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_avx++;
+          break;
+        }
+      }
+    }
+
+    //////// Fixed point - 8 bit, flooded scheduling - AVX2 version
+
+    // Recover messages
+    gettimeofday(&t[1], NULL);
+    for (j = 0; j < BATCH_SIZE; j++) {
+      srslte_ldpc_decoder_decode_c(
+          &decoder_avx_flood, symbols_c + j * finalN, messages_sim_avx_flood + j * finalK, n_useful_symbols);
+    }
+    gettimeofday(&t[2], NULL);
+    get_time_interval(t);
+    elapsed_time_dec_avx_flood += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+    for (i = 0; i < BATCH_SIZE; i++) {
+      for (j = 0; j < finalK; j++) {
+        i_bit = i * finalK + j;
+        if (messages_sim_avx_flood[i_bit] != (1U & messages_true[i_bit])) {
+          n_error_words_avx_flood++;
+          break;
+        }
+      }
+    }
+#endif // LV_HAVE_AVX2
+  }
+
+  printf("\nEstimated throughput encoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         i_batch * BATCH_SIZE / elapsed_time_enc,
+         i_batch * BATCH_SIZE * finalK / elapsed_time_enc,
+         i_batch * BATCH_SIZE * finalN / elapsed_time_enc);
+
+  print_decoder("FLOATING POINT", i_batch, n_error_words_f, elapsed_time_dec_f);
+  print_decoder("FIXED POINT (16 bits)", i_batch, n_error_words_s, elapsed_time_dec_s);
+  print_decoder("FIXED POINT (8 bits)", i_batch, n_error_words_c, elapsed_time_dec_c);
+  print_decoder("FIXED POINT (8 bits, flooded scheduling)", i_batch, n_error_words_c_flood, elapsed_time_dec_c_flood);
+
+#ifdef LV_HAVE_AVX2
+  print_decoder("FIXED POINT (8 bits - AVX2)", i_batch, n_error_words_avx, elapsed_time_dec_avx);
+  print_decoder(
+      "FIXED POINT (8 bits, flooded scheduling - AVX2)", i_batch, n_error_words_avx_flood, elapsed_time_dec_avx_flood);
+#endif // LV_HAVE_AVX2
+
+  if (n_error_words_s > 10 * n_error_words_f) {
+    perror("16-bit performance too low!");
+    exit(-1);
+  }
+  if (n_error_words_c > 10 * n_error_words_f) {
+    perror("8-bit performance too low!");
+    exit(-1);
+  }
+  printf("\nTest completed successfully!\n\n");
+
+  free(symbols);
+  free(symbols_s);
+  free(symbols_c);
+  free(rm_symbols);
+  free(rm_symbols_s);
+  free(rm_symbols_c);
+  free(rm_codewords);
+  free(codewords);
+  free(messages_sim_avx);
+  free(messages_sim_c_flood);
+  free(messages_sim_c);
+  free(messages_sim_s);
+  free(messages_sim_f);
+  free(messages_true);
+  srslte_random_free(random_gen);
+#ifdef LV_HAVE_AVX2
+  srslte_ldpc_decoder_free(&decoder_avx);
+#endif // LV_HAVE_AVX2
+  srslte_ldpc_decoder_free(&decoder_c_flood);
+  srslte_ldpc_decoder_free(&decoder_c);
+  srslte_ldpc_decoder_free(&decoder_s);
+  srslte_ldpc_decoder_free(&decoder_f);
+  srslte_ldpc_encoder_free(&encoder);
+  srslte_ldpc_rm_tx_free(&rm_tx);
+  srslte_ldpc_rm_rx_free_f(&rm_rx);
+  srslte_ldpc_rm_rx_free_s(&rm_rx_s);
+  srslte_ldpc_rm_rx_free_c(&rm_rx_c);
+}
+
+void print_decoder(char* title, int n_batches, int n_errors, double elapsed_time)
+{
+  printf("\n**** %s ****", title);
+  printf("\nEstimated word error rate:\n  %e (%d errors)\n", (double)n_errors / n_batches / BATCH_SIZE, n_errors);
+
+  printf("Estimated throughput decoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+         n_batches * BATCH_SIZE / elapsed_time,
+         n_batches * BATCH_SIZE * finalK / elapsed_time,
+         n_batches * BATCH_SIZE * finalN / elapsed_time);
+}
--- a/lib/src/phy/fec/ldpc/test/ldpc_rm_test.c
+++ b/lib/src/phy/fec/ldpc/test/ldpc_rm_test.c
@ -0,0 +1,355 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file ldpc_rm_test.c
+ * \brief Unit test for the LDPC RateMatcher and RateDematcher.
+ *
+ * A batch of example messages is randomly generated, encoded, rate-matched, 2-PAM modulated,
+ * and, finally, rate-dematched and decoded by all three types of
+ * rate dematchers (float, int16_t, int8_t).
+ * The rate-dematched codeword is compared against the transmitted codeword
+ *
+ * Synopsis: **ldpc_rm_test [options]**
+ *
+ * Options:
+ *  - **-b \<number\>** Base Graph (1 or 2. Default 1).
+ *  - **-l \<number\>** Lifting Size (according to 5GNR standard. Default 2).
+ *  - **-e \<number\>** Codeword length after rate matching (set to 0 [default] for full rate).
+ *  - **-f \<number\>** Number of filler bits (Default 17).
+ *  - **-r \<number\>** Redundancy version {0-3}.
+ *  - **-m \<number\>** Modulation type BPSK = 0, QPSK =1, QAM16 = 2, QAM64 = 3, QAM256 = 4.
+ *  - **-M \<number\>** Limited buffer size.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "srslte/phy/fec/ldpc/ldpc_common.h"
+#include "srslte/phy/fec/ldpc/ldpc_encoder.h"
+#include "srslte/phy/fec/ldpc/ldpc_rm.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/random.h"
+
+srslte_basegraph_t base_graph = BG2;  /*!< \brief Base Graph (BG1 or BG2). */
+uint32_t           lift_size  = 208;  /*!< \brief Lifting Size. */
+uint32_t           C          = 2;    /*!< \brief Number of code block segments (CBS). */
+uint32_t           F          = 10;   /*!< \brief Number of filler bits in each CBS. */
+uint32_t           E          = 0;    /*!< \brief Rate-matched codeword size (E = 0, no rate matching). */
+uint8_t            rv         = 0;    /*!< \brief Redundancy version {0-3}. */
+mod_type_t         mod_type   = QPSK; /*!< \brief Modulation type: BPSK, QPSK, QAM16, QAM64, QAM256. */
+uint32_t           Nref       = 0;    /*!< \brief Limited buffer size.*/
+
+uint32_t N = 0; /*!< \brief Codeblock size (including punctured and filler bits). */
+uint32_t K = 0; /*!< \brief Codeword size. */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-bX] [-lX] [-eX] [-fX] [-rX] [-mX] [-MX]\n", prog);
+  printf("\t-b Base Graph [(1 or 2) Default %d]\n", base_graph + 1);
+  printf("\t-l Lifting Size [Default %d]\n", lift_size);
+  printf("\t-e Word length after rate matching [Default %d (no rate matching i.e. E = N - F)]\n", E);
+  printf("\t-f Filler bits size (F) [Default %d]\n", F);
+  printf("\t-r Redundancy version (rv) [Default %d]\n", rv);
+  printf("\t-m Modulation_type BPSK=0, QPSK=1, 16QAM=2, 64QAM=3, 256QAM = 4 [Default %d]\n", mod_type);
+  printf("\t-M Limited buffer size (Nref) [Default = %d (normal buffer Nref = N)]\n", Nref);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "b:l:e:f:r:m:M:")) != -1) {
+    switch (opt) {
+      case 'b':
+        base_graph = (uint32_t)strtol(optarg, NULL, 10) - 1;
+        break;
+      case 'l':
+        lift_size = (uint32_t)strtol(optarg, NULL, 10);
+        break;
+      case 'e':
+        E = (uint32_t)strtol(optarg, NULL, 10);
+        break;
+      case 'f':
+        F = (uint32_t)strtol(optarg, NULL, 10);
+        break;
+      case 'r':
+        rv = (uint8_t)strtol(optarg, NULL, 10);
+        break;
+      case 'm':
+        mod_type = (mod_type_t)strtol(optarg, NULL, 10);
+        break;
+      case 'M':
+        Nref = (uint32_t)strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Main test function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* codeblocks     = NULL; /*  codeblocks including filler bits  */
+  uint8_t* codewords      = NULL;
+  uint8_t* rm_codewords   = NULL;
+  float*   rm_symbols     = NULL;
+  int16_t* rm_symbols_s   = NULL;
+  int8_t*  rm_symbols_c   = NULL;
+  float*   unrm_symbols   = NULL;
+  int16_t* unrm_symbols_s = NULL;
+  int8_t*  unrm_symbols_c = NULL;
+
+  uint32_t i     = 0;
+  uint32_t r     = 0;
+  int      error = 0;
+
+  parse_args(argc, argv);
+
+  srslte_random_t random_gen = srslte_random_init(0);
+
+  // create an LDPC encoder
+  srslte_ldpc_encoder_t encoder;
+  if (srslte_ldpc_encoder_init(&encoder, SRSLTE_LDPC_ENCODER_C, base_graph, lift_size) != 0) {
+    perror("encoder init");
+    exit(-1);
+  }
+
+  K = encoder.liftK;
+  N = encoder.liftN - 2 * lift_size;
+  if (E == 0) {
+    E = N - F;
+  }
+  if (Nref == 0) {
+    Nref = N;
+  }
+
+  // create a LDPC rate Matcher
+  srslte_ldpc_rm_t rm_tx;
+  if (srslte_ldpc_rm_tx_init(&rm_tx) != 0) {
+    perror("rate matcher init");
+    exit(-1);
+  }
+
+  // create a LDPC rate DeMatcher
+  srslte_ldpc_rm_t rm_rx;
+  if (srslte_ldpc_rm_rx_init_f(&rm_rx) != 0) {
+    perror("rate dematcher init");
+    exit(-1);
+  }
+
+  // create a LDPC rate DeMatcher (int16_t)
+  srslte_ldpc_rm_t rm_rx_s;
+  if (srslte_ldpc_rm_rx_init_s(&rm_rx_s) != 0) {
+    perror("rate dematcher init (int16_t)");
+    exit(-1);
+  }
+
+  // create a LDPC rate DeMatcher (int8_t)
+  srslte_ldpc_rm_t rm_rx_c;
+  if (srslte_ldpc_rm_rx_init_c(&rm_rx_c) != 0) {
+    perror("rate dematcher init (int8_t)");
+    exit(-1);
+  }
+
+  printf("Test LDPC chain:\n");
+  printf("  Base Graph      -> BG%d\n", encoder.bg + 1);
+  printf("  Lifting Size    -> %d\n", encoder.ls);
+  printf("  Protograph      -> M = %d, N = %d, K = %d\n", encoder.bgM, encoder.bgN, encoder.bgK);
+  printf("  Lifted graph    -> M = %d, N = %d, K = %d\n", encoder.liftM, encoder.liftN, encoder.liftK);
+  printf("  Base code rate -> K/(N-2) = %d/%d = 1/%d\n",
+         encoder.liftK,
+         encoder.liftN - 2 * lift_size,
+         encoder.bg == BG1 ? 3 : 5);
+  printf("\n");
+  printf("  Codeblock length             ->   K = %d\n", K);
+  printf("  Codeword length              ->   N = %d\n", N);
+  printf("  Rate matched codeword length ->   E = %d\n", E);
+  printf("  Number of filler bits        ->   F = %d\n", F);
+  printf("  Redundancy version           ->  rv = %d\n", rv);
+  printf("  Final code rate  -> (K-F)/E = (%d - %d)/%d = %.3f\n", encoder.liftK, F, E, 1.0 * (encoder.liftK - F) / E);
+  printf("\n");
+
+  codeblocks     = malloc(C * K * sizeof(uint8_t));
+  codewords      = malloc(C * N * sizeof(uint8_t));
+  rm_codewords   = malloc(C * E * sizeof(uint8_t));
+  rm_symbols     = malloc(C * E * sizeof(float));
+  rm_symbols_s   = malloc(C * E * sizeof(int16_t));
+  rm_symbols_c   = malloc(C * E * sizeof(int8_t));
+  unrm_symbols   = malloc(C * N * sizeof(float));
+  unrm_symbols_s = malloc(C * N * sizeof(int16_t));
+  unrm_symbols_c = malloc(C * N * sizeof(int8_t));
+  if (!codeblocks || !codewords || !rm_codewords || !rm_symbols || !rm_symbols_s || !rm_symbols_c || !unrm_symbols ||
+      !unrm_symbols_s || !unrm_symbols_c) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  // Generate random bits
+  for (r = 0; r < C; r++) {
+    for (i = 0; i < K - F; i++) {
+      // codeblock_seg[i] = rand() % 2;
+      codeblocks[r * K + i] = srslte_random_uniform_int_dist(random_gen, 0, 1);
+    }
+    for (; i < K; i++) { // add filler bits
+      codeblocks[r * K + i] = FILLER_BIT;
+    }
+  }
+
+  // lDPC Encoding
+  // compute the number of symbols that we need to encode/decode: at least (E + F) if E+F < N,
+  unsigned int n_useful_symbols = (E + F);
+
+  // Encode messages
+  // gettimeofday(&t[1], NULL);
+  for (r = 0; r < C; r++) {
+    if (srslte_ldpc_encoder_encode(&encoder, codeblocks + r * K, codewords + r * N, K, n_useful_symbols)) {
+      exit(-1);
+    }
+
+    // LDPC rate matching
+    if (srslte_ldpc_rm_tx(
+            &rm_tx, codewords + r * N, rm_codewords + r * E, E, base_graph, lift_size, rv, mod_type, Nref)) {
+      exit(-1);
+    }
+
+    // Modulate codewords
+    // quantization
+
+    int16_t inf16 = (1U << 15U) - 1;
+    int8_t  inf8  = (1U << 7U) - 1;
+    for (i = 0; i < E; i++) {
+      rm_symbols[r * E + i]   = rm_codewords[r * E + i] ? -1 : 1;
+      rm_symbols_s[r * E + i] = rm_codewords[r * E + i] ? -1 : 1;
+      rm_symbols_c[r * E + i] = rm_codewords[r * E + i] ? -1 : 1;
+    }
+
+    if (srslte_ldpc_rm_rx_f(
+            &rm_rx, rm_symbols + r * E, unrm_symbols + r * N, E, F, base_graph, lift_size, rv, mod_type, Nref)) {
+      exit(-1);
+    }
+    if (srslte_ldpc_rm_rx_s(
+            &rm_rx_s, rm_symbols_s + r * E, unrm_symbols_s + r * N, E, F, base_graph, lift_size, rv, mod_type, Nref)) {
+      exit(-1);
+    }
+    if (srslte_ldpc_rm_rx_c(
+            &rm_rx_c, rm_symbols_c + r * E, unrm_symbols_c + r * N, E, F, base_graph, lift_size, rv, mod_type, Nref)) {
+      exit(-1);
+    }
+
+    // Check self correctness for the float version
+    error = 0;
+    for (i = 0; i < N; i++) {
+      if (((unrm_symbols[i + r * N] == 0) && (codewords[i + r * N] != FILLER_BIT)) ||
+          ((unrm_symbols[i + r * N] == INFINITY) && (codewords[i + r * N] == FILLER_BIT)) ||
+          ((unrm_symbols[i + r * N] > 0) && (codewords[i + r * N] == 0)) ||
+          ((unrm_symbols[i + r * N] < 0) && (codewords[i + r * N]))) {
+        // any of these cases are ok
+      } else {
+
+        error = -1;
+        break;
+      }
+    }
+
+    if (error < 0) {
+      printf("Error in rate-matching block at code segment: %d\n unrm_symb[%d] = %2.1f\n codeword[%d] = %d\n",
+             r,
+             i,
+             unrm_symbols[i + r * N],
+             i,
+             codewords[i + r * N]);
+    } else {
+      printf(" No errors in rate-matching block\n");
+    }
+    // check against float implementation
+    for (i = 0; i < N; i++) {
+      if (((int16_t)unrm_symbols[i + r * N] == unrm_symbols_s[i + r * N]) ||
+          (unrm_symbols[i + r * N] == INFINITY && unrm_symbols_s[i + r * N] == inf16) ||
+          ((int16_t)unrm_symbols[i + r * N] == 0 && unrm_symbols_s[i + r * N] == 0)) {
+      } else {
+        error = -2;
+        break;
+      }
+    }
+    if (error == -2) {
+      printf("Error in rate-matching block (int16_t) at code segment: %d\n unrm_symb[%d] = %d\n unrm_symb_s[%d] = %d\n",
+             r,
+             i,
+             (int16_t)unrm_symbols[i + r * N],
+             i,
+             unrm_symbols_s[i + r * N]);
+    } else {
+      printf(" No errors in rate-matching block (int16_t)\n");
+    }
+
+    // check against float implementation
+    for (i = 0; i < N; i++) {
+      if (((int8_t)unrm_symbols[i + r * N] == unrm_symbols_c[i + r * N]) ||
+          (unrm_symbols[i + r * N] == INFINITY && unrm_symbols_c[i + r * N] == inf8)) {
+      } else {
+        error = -3;
+        break;
+      }
+    }
+    if (error == -3) {
+      printf(
+          "Error in rate-matching block (int8_t) at code segment: %d\n unrm_symb[%d] = %2.1f\n unrm_symb_c[%d] = %d\n",
+          r,
+          i,
+          unrm_symbols[i + r * N],
+          i,
+          unrm_symbols_c[i + r * N]);
+    } else {
+      printf(" No errors in rate-matching block: (int8_t)\n");
+    }
+
+  } // codeblocks r
+
+  free(unrm_symbols);
+  free(unrm_symbols_s);
+  free(unrm_symbols_c);
+  free(rm_symbols);
+  free(rm_symbols_s);
+  free(rm_symbols_c);
+  free(rm_codewords);
+  free(codewords);
+  free(codeblocks);
+  srslte_random_free(random_gen);
+  srslte_ldpc_encoder_free(&encoder);
+  srslte_ldpc_rm_tx_free(&rm_tx);
+  srslte_ldpc_rm_rx_free_f(&rm_rx);
+  srslte_ldpc_rm_rx_free_s(&rm_rx_s);
+  srslte_ldpc_rm_rx_free_c(&rm_rx_c);
+  return error;
+}
--- a/lib/src/phy/fec/polar/CMakeLists.txt
+++ b/lib/src/phy/fec/polar/CMakeLists.txt
@ -0,0 +1,21 @@
+#
+# Project: 5GCoding-SRS
+# Author: Jesus Gomez (CTTC)
+# Copyright: Software Radio Systems Limited
+#
+
+set(FEC_SOURCES ${FEC_SOURCES}
+        polar/polar_encoder.c
+        polar/polar_encoder_pipelined.c
+        polar/polar_encoder_avx2.c
+        polar/polar_decoder.c
+        polar/polar_decoder_ssc_all.c
+        polar/polar_decoder_ssc_f.c
+        polar/polar_decoder_ssc_s.c
+        polar/polar_decoder_ssc_c.c
+        polar/polar_decoder_ssc_c_avx2.c
+        polar/polar_decoder_vector.c
+        polar/polar_decoder_vector_avx2.c
+        PARENT_SCOPE)
+
+add_subdirectory(test)
--- a/lib/src/phy/fec/polar/polar_decoder.c
+++ b/lib/src/phy/fec/polar/polar_decoder.c
@ -0,0 +1,233 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder.c
+ * \brief Definition of the polar decoder.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * 5G uses a polar decoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
+ *
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <math.h>
+#include <string.h>
+
+#include "polar_decoder_ssc_c.h"
+#include "polar_decoder_ssc_c_avx2.h"
+#include "polar_decoder_ssc_f.h"
+#include "polar_decoder_ssc_s.h"
+#include "srslte/phy/fec/polar/polar_decoder.h"
+#include "srslte/phy/utils/debug.h"
+
+/*! SSC Polar decoder with float LLR inputs. */
+static int decode_ssc_f(void* o, const float* symbols, uint8_t* data)
+{
+
+  srslte_polar_decoder_t* q = o;
+
+  init_polar_decoder_ssc_f(q->ptr, symbols, data);
+
+  polar_decoder_ssc_f(q->ptr, data);
+
+  return 0;
+}
+
+/*! SSC Polar decoder with int16_t LLR inputs. */
+static int decode_ssc_s(void* o, const int16_t* symbols, uint8_t* data)
+{
+  srslte_polar_decoder_t* q = o;
+
+  init_polar_decoder_ssc_s(q->ptr, symbols, data);
+
+  polar_decoder_ssc_s(q->ptr, data);
+
+  return 0;
+}
+
+/*! SSC Polar decoder with int8_t LLR inputs. */
+static int decode_ssc_c(void* o, const int8_t* symbols, uint8_t* data)
+{
+  srslte_polar_decoder_t* q = o;
+
+  init_polar_decoder_ssc_c(q->ptr, symbols, data);
+
+  polar_decoder_ssc_c(q->ptr, data);
+
+  return 0;
+}
+
+#ifdef LV_HAVE_AVX2
+/*! SSC Polar decoder AVX2 with int8_t LLR inputs . */
+static int decode_ssc_c_avx2(void* o, const int8_t* symbols, uint8_t* data)
+{
+  srslte_polar_decoder_t* q = o;
+
+  init_polar_decoder_ssc_c_avx2(q->ptr, symbols, data);
+
+  polar_decoder_ssc_c_avx2(q->ptr, data);
+
+  return 0;
+}
+#endif // LV_HAVE_AVX2
+
+/*! Destructor of a (float) SSC polar decoder. */
+static void free_ssc_f(void* o)
+{
+  srslte_polar_decoder_t* q = o;
+  delete_polar_decoder_ssc_f(q->ptr);
+}
+
+/*! Destructor of a (int16_t) SSC polar decoder. */
+static void free_ssc_s(void* o)
+{
+  srslte_polar_decoder_t* q = o;
+  delete_polar_decoder_ssc_s(q->ptr);
+}
+
+/*! Destructor of a (int8_t) SSC polar decoder. */
+static void free_ssc_c(void* o)
+{
+  srslte_polar_decoder_t* q = o;
+  delete_polar_decoder_ssc_c(q->ptr);
+}
+
+#ifdef LV_HAVE_AVX2
+/*! Destructor of a (int8_t, avx2) SSC polar decoder. */
+static void free_ssc_c_avx2(void* o)
+{
+  srslte_polar_decoder_t* q = o;
+  delete_polar_decoder_ssc_c_avx2(q->ptr);
+}
+#endif
+
+/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with float LLR inputs. */
+static int init_ssc_f(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
+{
+  q->decode_f = decode_ssc_f;
+  q->free     = free_ssc_f;
+
+  if ((q->ptr = create_polar_decoder_ssc_f(frozen_set, code_size_log, frozen_set_size)) == NULL) {
+    ERROR("create_polar_decoder_ssc_f failed\n");
+    free_ssc_f(q);
+    return -1;
+  }
+  return 0;
+}
+
+/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with uint16_t LLR inputs. */
+static int init_ssc_s(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
+{
+  q->decode_s = decode_ssc_s;
+  q->free     = free_ssc_s;
+
+  if ((q->ptr = create_polar_decoder_ssc_s(frozen_set, code_size_log, frozen_set_size)) == NULL) {
+    ERROR("create_polar_decoder_ssc_s failed\n");
+    free_ssc_s(q);
+    return -1;
+  }
+  return 0;
+}
+
+/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with uint8_t LLR inputs. */
+static int init_ssc_c(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
+{
+  q->decode_c = decode_ssc_c;
+  q->free     = free_ssc_c;
+
+  if ((q->ptr = create_polar_decoder_ssc_c(frozen_set, code_size_log, frozen_set_size)) == NULL) {
+    ERROR("create_polar_decoder_ssc_c failed\n");
+    free_ssc_c(q);
+    return -1;
+  }
+  return 0;
+}
+
+#ifdef LV_HAVE_AVX2
+/*! Initializes a polar decoder structure to use the SSC polar decoder algorithm with uint8_t LLR inputs and AVX2
+ * instructions. */
+static int
+init_ssc_c_avx2(srslte_polar_decoder_t* q, uint16_t* frozen_set, uint16_t code_size_log, uint16_t frozen_set_size)
+{
+  q->decode_c = decode_ssc_c_avx2;
+  q->free     = free_ssc_c_avx2;
+
+  if ((q->ptr = create_polar_decoder_ssc_c_avx2(frozen_set, code_size_log, frozen_set_size)) == NULL) {
+    ERROR("create_polar_decoder_ssc_c failed\n");
+    free_ssc_c_avx2(q);
+    return -1;
+  }
+  return 0;
+}
+#endif
+
+int srslte_polar_decoder_init(srslte_polar_decoder_t*     q,
+                              srslte_polar_decoder_type_t type,
+                              uint16_t                    code_size_log,
+                              uint16_t*                   frozen_set,
+                              uint16_t                    frozen_set_size)
+{
+  switch (type) {
+    case SRSLTE_POLAR_DECODER_SSC_F:
+      return init_ssc_f(q, frozen_set, code_size_log, frozen_set_size);
+    case SRSLTE_POLAR_DECODER_SSC_S:
+      return init_ssc_s(q, frozen_set, code_size_log, frozen_set_size);
+    case SRSLTE_POLAR_DECODER_SSC_C:
+      return init_ssc_c(q, frozen_set, code_size_log, frozen_set_size);
+#ifdef LV_HAVE_AVX2
+    case SRSLTE_POLAR_DECODER_SSC_C_AVX2:
+      return init_ssc_c_avx2(q, frozen_set, code_size_log, frozen_set_size);
+#endif
+    default:
+      ERROR("Decoder not implemented\n");
+      return -1;
+  }
+  return 0;
+}
+
+void srslte_polar_decoder_free(srslte_polar_decoder_t* q)
+{
+  if (q->free) {
+    q->free(q);
+  }
+  memset(q, 0, sizeof(srslte_polar_decoder_t));
+}
+
+int srslte_polar_decoder_decode_f(srslte_polar_decoder_t* q, const float* llr, uint8_t* data_decoded)
+{
+  return q->decode_f(q, llr, data_decoded);
+}
+
+int srslte_polar_decoder_decode_s(srslte_polar_decoder_t* q, const int16_t* llr, uint8_t* data_decoded)
+{
+  return q->decode_s(q, llr, data_decoded);
+}
+
+int srslte_polar_decoder_decode_c(srslte_polar_decoder_t* q, const int8_t* llr, uint8_t* data_decoded)
+{
+  return q->decode_c(q, llr, data_decoded);
+}
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_all.c
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_all.c
@ -0,0 +1,102 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_all.c
+ * \brief Definition of the SSC polar decoder functions common to all implementations
+ *
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+#include "polar_decoder_ssc_all.h"
+#include "../utils_avx2.h"
+
+int init_node_type(const uint16_t* frozen_set, struct Params* param)
+{
+
+  uint8_t   s             = 0; // stage
+  uint8_t*  is_not_rate_0 = NULL;
+  uint8_t*  is_rate_1     = NULL;
+  uint16_t* i_even        = NULL;
+  uint16_t* i_odd         = NULL;
+
+  uint16_t code_size      = param->code_stage_size[param->code_size_log];
+  uint16_t code_half_size = param->code_stage_size[param->code_size_log - 1];
+
+  is_not_rate_0 = aligned_alloc(SRSLTE_AVX2_B_SIZE, 2 * code_size * sizeof(uint8_t));
+  if (!is_not_rate_0) {
+    perror("malloc");
+    return -1;
+  }
+  is_rate_1 = is_not_rate_0 + code_size;
+
+  i_odd = malloc(code_half_size * sizeof(uint16_t));
+  if (!i_odd) {
+    free(is_not_rate_0);
+    perror("malloc");
+    return -1;
+  }
+
+  i_even = malloc(code_half_size * sizeof(uint16_t));
+  if (!i_even) {
+    free(is_not_rate_0);
+    free(i_odd);
+    perror("malloc");
+    return -1;
+  }
+
+  memset(i_even, 0, code_half_size);
+  memset(i_odd, 0, code_half_size);
+  for (uint16_t i = 0; i < code_half_size; i++) {
+    i_even[i] = 2 * i;
+    i_odd[i]  = 2 * i + 1;
+  }
+
+  // node_type = is_not_rate_0_node: 0 if rate 0, 1 if not rate 0.
+  memset(is_not_rate_0, 1, code_size);
+  memset(is_rate_1, 1, code_size);
+  for (uint16_t i = 0; i < param->frozen_set_size; i++) {
+    is_not_rate_0[frozen_set[i]] = 0;
+    is_rate_1[frozen_set[i]]     = 0;
+  }
+
+  s = 0;
+  for (uint16_t j = 0; j < code_size; j++) {
+    param->node_type[s][j] = 3 * is_not_rate_0[j]; //  0 if rate-0; 2 if rate-r; 3 if rate 1
+  }
+
+  for (s = 1; s < param->code_size_log + 1; s++) {
+    for (uint16_t j = 0; j < param->code_stage_size[param->code_size_log - s]; j++) {
+      is_not_rate_0[j]       = is_not_rate_0[i_even[j]] | is_not_rate_0[i_odd[j]]; // bitor
+      is_rate_1[j]           = is_rate_1[i_even[j]] & is_rate_1[i_odd[j]];         // bitand
+      param->node_type[s][j] = 2 * is_not_rate_0[j] + is_rate_1[j]; //  0 if rate-0; 2 if rate-r; 3 if rate 1
+    }
+  }
+
+  free(i_even);
+  free(i_odd);
+  free(is_not_rate_0);
+
+  return 0;
+}
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_all.h
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_all.h
@ -0,0 +1,78 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_all.h
+ * \brief Declaration of the SSC polar decoder functions common to all implementations
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_DECODER_SSC_ALL_H
+#define POLAR_DECODER_SSC_ALL_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*!
+ * \brief Types of node in an SSC decoder.
+ */
+typedef enum {
+  RATE_0 = 0, /*!< \brief See function rate_0_node(). */
+  RATE_R = 2, /*!< \brief See function rate_r_node(). */
+  RATE_1 = 3, /*!< \brief See function rate_1_node(). */
+} node_rate;
+
+/*!
+ * \brief Stores constants.
+ */
+struct Params {
+  uint8_t   code_size_log;   /*!< \brief \f$log_2\f$ of code size. */
+  uint16_t* code_stage_size; /*!< \brief Number of bits of the encoder input/output vector at a given stage. */
+  uint16_t  frozen_set_size; /*!< \brief Number of frozen bits. */
+  uint8_t** node_type;       /*!< \brief Node type indicator 1 at all stages 3 (rate-1), 2 (rate-r), 0 (rate-0). */
+};
+
+/*!
+ * \brief Describes the state of a SSC polar decoder
+ */
+struct State {
+  uint8_t stage;         /*!< \brief Current stage [0 - code_size_log] of the decoding algorithm. */
+  bool    flag_finished; /*!< \brief True if the last bit is decoded. False otherwise. */
+  uint16_t*
+      active_node_per_stage; /*!< \brief Indicates the active node in each stage of the algorithm at a given moment. */
+};
+
+/*!
+ * Computes node types and initializes struct Params.
+ * \param[in] frozen_set The position of the frozen bits in the codeword.
+ * \param[in, out] param A struct Params
+ */
+int init_node_type(const uint16_t* frozen_set, struct Params* param);
+
+#endif // polar_decoder_SSC_ALL_H
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_c.c
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_c.c
@ -0,0 +1,422 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_c.c
+ * \brief Definition of the SSC polar decoder inner functions working with
+ * 8-bit integer-valued LLRs.
+ *
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+// IMPORTANT: polar_decoder_SSC_c.c is exactly the polar_decoder_SSC_f.c except for:
+// (1) #include "polar_decoder_ssc_c.h"
+// (2) the naming of the external function, which finish with _s instead of _f
+// (3) the initialization of them  of the set functions in create_polar_decoder_ssc_s
+//  pp->f        = srslte_vec_function_f_ccc;
+//  pp->g        = srslte_vec_function_g_bccc;
+//  pp->xor      = srslte_vec_xor_bbb;
+//  pp->hard_bit = srslte_vec_hard_bit_cc;
+
+#include "polar_decoder_ssc_c.h"
+#include "../utils_avx2.h"
+#include "polar_decoder_vector.h"
+#include "srslte/phy/fec/polar/polar_encoder.h"
+#include "srslte/phy/utils/vector.h"
+
+/*!
+ * \brief Describes an SSC polar decoder (8-bit version).
+ */
+struct pSSC_c {
+  int8_t**                llr0;    /*!< \brief Pointers to the upper half of LLRs values at all stages. */
+  int8_t**                llr1;    /*!< \brief Pointers to the lower half of LLRs values at all stages. */
+  uint8_t*                est_bit; /*!< \brief Pointers to the temporary estimated bits. */
+  struct Params*          param;   /*!< \brief Pointer to a Params structure. */
+  struct State*           state;   /*!< \brief Pointer to a State. */
+  srslte_polar_encoder_t* enc;     /*!< \brief Pointer to a srslte_polar_encoder_t. */
+  void (*f)(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len); /*!< \brief Pointer to the function-f. */
+  void (*g)(const uint8_t* b,
+            const int8_t*  x,
+            const int8_t*  y,
+            int8_t*        z,
+            const uint16_t len); /*!< \brief Pointer to the function-g. */
+  void (*xor)(const uint8_t* x,
+              const uint8_t* y,
+              uint8_t*       z,
+              const uint32_t len);                                   /*!< \brief Pointer to the function-g. */
+  void (*hard_bit)(const int8_t* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
+};
+
+/*!
+ * Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
+ * Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
+ * return the associated \f$2^s\f$ estimated bits.
+ *
+ */
+static void simplified_node(void* p, uint8_t* message);
+
+/*!
+ * All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
+ * pointer to point to the next active node. It is assumed that message bits are initialized to 0.
+ *
+ */
+static void rate_0_node(void* p);
+
+/*!
+ * ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
+ * making a hard decision on them.
+ * ::RATE_1 nodes also update message bits vector.
+ *
+ */
+static void rate_1_node(void* p, uint8_t* message);
+
+/*!
+ * ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bit by calling
+ * the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
+ * At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
+ * with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
+ * This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
+ * updates \a estbits memory space for stage \f$(s + 1)\f$.
+ *
+ */
+static void rate_r_node(void* p, uint8_t* message);
+
+int init_polar_decoder_ssc_c(void* p, const int8_t* input_llr, uint8_t* data_decoded)
+{
+  struct pSSC_c* pp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  uint8_t code_size_log  = pp->param->code_size_log; // code_size_log.
+  int16_t code_size      = pp->param->code_stage_size[code_size_log];
+  int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
+
+  // Initializes the data_decoded_vector to all zeros
+  memset(data_decoded, 0, code_size);
+
+  // Initialize est_bit vector to all zeros
+  memset(pp->est_bit, 0, code_size);
+
+  // Initializes LLR buffer for the last stage/level with the input LLRs values
+  for (uint16_t i = 0; i < code_half_size; i++) {
+    pp->llr0[code_size_log][i] = input_llr[i];
+    pp->llr1[code_size_log][i] = input_llr[i + code_half_size];
+  }
+
+  // Initializes the state of the decoding tree
+  pp->state->stage = code_size_log + 1; // start from the only one node at the last stage + 1.
+  for (uint16_t i = 0; i < code_size_log + 1; i++) {
+    pp->state->active_node_per_stage[i] = 0;
+  }
+  pp->state->flag_finished = false;
+
+  return 0;
+}
+
+int polar_decoder_ssc_c(void* p, uint8_t* data_decoded)
+{
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  simplified_node(p, data_decoded);
+  return 0;
+}
+
+void delete_polar_decoder_ssc_c(void* p)
+{
+  struct pSSC_c* pp = p;
+
+  if (p != NULL) {
+    free(pp->llr0[0]); // remove LLR buffer.
+    free(pp->llr0);
+    free(pp->llr1);
+    free(pp->param->node_type[0]);
+    free(pp->param->node_type);
+    free(pp->est_bit); // remove estbits buffer.
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->state->active_node_per_stage);
+    free(pp->state);
+    srslte_polar_encoder_free(pp->enc);
+    free(pp->enc);
+    free(pp);
+  }
+}
+
+void* create_polar_decoder_ssc_c(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
+{
+  struct pSSC_c* pp = NULL; // pointer to the polar decoder instance
+
+  // allocate memory to the polar decoder instance
+  if ((pp = malloc(sizeof(struct pSSC_c))) == NULL) {
+    return NULL;
+  }
+
+  // set functions
+  pp->f        = srslte_vec_function_f_ccc;
+  pp->g        = srslte_vec_function_g_bccc;
+  pp->xor      = srslte_vec_xor_bbb;
+  pp->hard_bit = srslte_vec_hard_bit_cc;
+
+  // encoder of maximum size
+  if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
+    free(pp);
+    return NULL;
+  }
+  srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
+
+  // algorithm constants/parameters
+  if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  pp->param->code_stage_size[0] = 1;
+  for (uint8_t i = 1; i < code_size_log + 1; i++) {
+    pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
+  }
+
+  pp->param->code_size_log = code_size_log;
+
+  // state  -- initialized in polar_decoder_ssc_init
+  if ((pp->state = malloc(sizeof(struct State))) == NULL) {
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+  if ((pp->state->active_node_per_stage = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // allocates memory for estimated bits per stage
+  uint16_t est_bits_size = pp->param->code_stage_size[code_size_log];
+
+  pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bits_size); // every 32 chars are aligned
+
+  // allocate memory for LLR pointers.
+  pp->llr0 = malloc((code_size_log + 1) * sizeof(int8_t*));
+  pp->llr1 = malloc((code_size_log + 1) * sizeof(int8_t*));
+
+  // There are LLR buffers for n = 0 to n = code_size_log. Each with size 2^n. Thus,
+  // the total memory needed is 2^(n+1)-1.
+  // Only the stages starting at multiples of SRSLTE_AVX2_B_SIZE are aligned.
+
+  // Let n_simd_llr be the exponent of the SIMD size in nummer of LLRs.
+  // i.e. in a SIMD instruction we can load 2^(n_simd_llr) LLR values
+  // then the memory for stages s >= n_simd_llr - 1 is aligned.
+  // but only the operations at stages s > n_simd_llr have all the inputs aligned.
+  uint8_t  n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
+  uint16_t llr_all_stages   = 1U << n_llr_all_stages;
+
+  pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(int8_t)); // 32*8=256
+  // allocate memory to the polar decoder instance
+  if (pp->llr0[0] == NULL) {
+    free(pp->est_bit);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // initialize all LLR pointers
+  pp->llr1[0] = pp->llr0[0] + 1;
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->llr0[s] = pp->llr0[0] + pp->param->code_stage_size[s];
+    pp->llr1[s] = pp->llr0[0] + pp->param->code_stage_size[s] + pp->param->code_stage_size[s - 1];
+  }
+
+  // allocate memory for node type pointers, one per stage.
+  pp->param->frozen_set_size = frozen_set_size;
+  pp->param->node_type       = malloc((code_size_log + 1) * sizeof(uint8_t*));
+
+  // allocate memory to node_type_ssc. Stage s has  2^(N-s) nodes s=0,...,N.
+  // Thus, same size as LLRs all stages.
+  pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
+
+  if (pp->param->node_type[0] == NULL) {
+    free(pp->param->node_type);
+    free(pp->est_bit);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
+  }
+
+  init_node_type(frozen_set, pp->param);
+
+  return pp;
+}
+
+static void simplified_node(void* p, uint8_t* message)
+{
+
+  struct pSSC_c* pp = p;
+
+  pp->state->stage--; // to child node.
+
+  uint8_t  stage   = pp->state->stage;
+  uint16_t bit_pos = pp->state->active_node_per_stage[stage];
+
+  switch (pp->param->node_type[stage][bit_pos]) {
+    case RATE_1:
+      rate_1_node(pp, message);
+      break;
+    case RATE_0:
+      rate_0_node(pp);
+      break;
+    case RATE_R:
+      rate_r_node(pp, message);
+      break;
+    default:
+      printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
+      exit(-1);
+      break;
+  }
+
+  pp->state->stage++; // to parent node.
+}
+
+static void rate_0_node(void* p)
+{
+  struct pSSC_c* pp = p;
+
+  uint8_t  code_size_log = pp->param->code_size_log; // code_size_log.
+  int16_t  code_size     = pp->param->code_stage_size[code_size_log];
+  uint16_t bit_pos       = pp->state->active_node_per_stage[0];
+  uint8_t  stage         = pp->state->stage;
+
+  if (bit_pos == code_size - 1) {
+    pp->state->flag_finished = true;
+  } else {
+
+    // update active node at all the stages
+    for (uint8_t i = 0; i <= stage; i++) {
+      pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
+    }
+  }
+}
+
+static void rate_1_node(void* p, uint8_t* message)
+{
+  struct pSSC_c* pp    = p;
+  uint8_t        stage = pp->state->stage; // for SSC decoder rate 1 nodes are always at stage 0.
+
+  uint16_t bit_pos         = pp->state->active_node_per_stage[0];
+  uint16_t code_size       = pp->param->code_stage_size[pp->param->code_size_log];
+  uint16_t code_stage_size = pp->param->code_stage_size[stage];
+
+  uint8_t* codeword = pp->est_bit + bit_pos;
+  int8_t*  LLR      = pp->llr0[stage];
+
+  pp->hard_bit(LLR, codeword, code_stage_size);
+
+  if (stage != 0) {
+    srslte_polar_encoder_encode(pp->enc, codeword, message + bit_pos, stage);
+  } else {
+    message[bit_pos] = codeword[0];
+  }
+
+  // update active node at all the stages
+  for (uint8_t i = 0; i <= stage; i++) {
+    pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
+  }
+
+  // check if this is the last bit
+  if (pp->state->active_node_per_stage[0] == code_size) {
+    pp->state->flag_finished = true;
+  }
+}
+
+static void rate_r_node(void* p, uint8_t* message)
+{
+  struct pSSC_c* pp              = p;
+  uint8_t*       estbits0        = NULL;
+  uint8_t*       estbits1        = NULL;
+  uint16_t       bit_pos         = 0;
+  int16_t        offset0         = 0;
+  int16_t        offset1         = 0;
+  uint8_t        stage           = pp->state->stage;
+  uint16_t       stage_size      = pp->param->code_stage_size[stage];
+  uint16_t       stage_half_size = pp->param->code_stage_size[stage - 1];
+
+  pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+
+  // move to the child node to the left (up) of the tree.
+  simplified_node(pp, message);
+  if (pp->state->flag_finished == true) { // (just in case). However for 5G frozen sets, the code can never end here.
+    return;
+  }
+
+  bit_pos  = pp->state->active_node_per_stage[0];
+  offset0  = bit_pos - stage_half_size;
+  estbits0 = pp->est_bit + offset0;
+
+  pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+  // move to the child node to the right (down) of the tree.
+  simplified_node(pp, message);
+  if (pp->state->flag_finished == true) {
+    return;
+  }
+
+  bit_pos = pp->state->active_node_per_stage[0];
+
+  offset0  = bit_pos - stage_size;
+  offset1  = offset0 + stage_half_size;
+  estbits0 = pp->est_bit + offset0;
+  estbits1 = pp->est_bit + offset1;
+
+  pp->xor (estbits0, estbits1, estbits0, stage_half_size);
+
+  // update this node index
+  pp->state->active_node_per_stage[stage] = pp->state->active_node_per_stage[stage] + 1; // return to the father node
+}
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_c.h
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_c.h
@ -0,0 +1,77 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_c.h
+ * \brief Declaration of the SSC polar decoder inner functions working with
+ * 8-bit integer-valued LLRs.
+ * \author Jesus Gomez (CTTC) \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_DECODER_SSC_C_H
+#define POLAR_DECODER_SSC_C_H
+#include "polar_decoder_ssc_all.h"
+
+/*!
+ * Creates an SSC polar decoder structure of type pSSC, and allocates memory for the decoding buffers.
+ *
+ * This function is exactly the same as the one for the floating-point version.
+ * Note, however, that it works with a different pSSC structure (different function pointers
+ * pSSC::f, pSSC::f, pSSC::g, pSSC::xor and pSSC::hard_bit).
+ *
+ * \param[in] frozen_set The position of the frozen bits in the codeword.
+ * \param[in] frozen_set_size Number of frozen bits.
+ * \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
+ * \return A pointer to a pSSC structure if the function executes correctly, NULL otherwise.
+ */
+void* create_polar_decoder_ssc_c(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
+
+/*!
+ * The (8-bit) polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
+ *
+ * \param[in, out] p A pointer to the dismantled decoder.
+ */
+void delete_polar_decoder_ssc_c(void* p);
+
+/*!
+ * Initializes an (8-bit) SSC polar decoder before processing a new codeword.
+ *
+ * \param[in, out] p A void pointer used to declare a pSSC structure.
+ * \param[in] llr LLRs for the new codeword.
+ * \param[out] data_decoded Pointer to the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_polar_decoder_ssc_c(void* p, const int8_t* llr, uint8_t* data_decoded);
+
+/*!
+ * Decodes a data message from a 8 bit resolution codeword with the specified decoder. Note that
+ * a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_c().
+ *
+ * \param[in] p A pointer to the desired decoder.
+ * \param[out] data The decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int polar_decoder_ssc_c(void* p, uint8_t* data);
+
+#endif // POLAR_DECODER_SSC_C_H
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_c_avx2.c
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_c_avx2.c
@ -0,0 +1,360 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_c_avx2.c
+ * \brief Definition of the SSC polar decoder inner functions working with
+ * 8-bit integer-valued LLRs and AVX2 instructions.
+ *
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include "polar_decoder_ssc_c_avx2.h"
+#include "../utils_avx2.h"
+#include "polar_decoder_vector_avx2.h"
+#include "srslte/phy/fec/polar/polar_encoder.h"
+
+#ifdef LV_HAVE_AVX2
+
+/*!
+ * \brief Describes the state of a AVX2 SSC polar decoder
+ */
+struct StateAVX2 {
+  uint8_t  stage;   /*!< \brief Current stage [0 - code_size_log] of the decoding algorithm. */
+  uint16_t bit_pos; /*!< \brief position of the next bit to be estimated in est_bit buffer. */
+};
+
+/*!
+ * \brief Describes an SSC polar decoder (8-bit version).
+ */
+struct pSSC_c_avx2 {
+  int8_t**                llr0;    /*!< \brief Pointers to the upper half of LLRs values at all stages. */
+  int8_t**                llr1;    /*!< \brief Pointers to the lower half of LLRs values at all stages. */
+  uint8_t*                est_bit; /*!< \brief Pointers to the temporary estimated bits. */
+  struct Params*          param;   /*!< \brief Pointer to a Params structure. */
+  struct StateAVX2*       state;   /*!< \brief Pointer to a State. */
+  srslte_polar_encoder_t* enc;     /*!< \brief Pointer to a srslte_polar_encoder_t. */
+  void (*f)(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len); /*!< \brief Pointer to the function-f. */
+  void (*g)(const uint8_t* b,
+            const int8_t*  x,
+            const int8_t*  y,
+            int8_t*        z,
+            const uint16_t len); /*!< \brief Pointer to the function-g. */
+  void (*xor)(const uint8_t* x,
+              const uint8_t* y,
+              uint8_t*       z,
+              const uint16_t len);                                   /*!< \brief Pointer to the function-g. */
+  void (*hard_bit)(const int8_t* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
+};
+
+/*!
+ * max function
+ */
+static int max(int a, int b)
+{
+  return a > b ? a : b;
+}
+
+/*!
+ * Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
+ * Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
+ * return the associated \f$2^s\f$ estimated bits.
+ *
+ * All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
+ * pointer to point to the next active node. It is assumed that message bits are initialized to 0.
+ *
+ * ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
+ * making a hard decision on them.
+ * ::RATE_1 nodes also update message bits vector.
+ *
+ * ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bits by calling
+ * the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
+ * At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
+ * with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
+ * This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
+ * updates \a estbits memory space for stage \f$(s + 1)\f$.
+ *
+ */
+static void simplified_node(struct pSSC_c_avx2* p);
+
+void delete_polar_decoder_ssc_c_avx2(void* p)
+{
+  struct pSSC_c_avx2* pp = p;
+
+  if (p != NULL) {
+    free(pp->llr0[0]); // remove LLR buffer.
+    free(pp->llr0);
+    free(pp->llr1);
+    free(pp->param->node_type[0]);
+    free(pp->param->node_type);
+    free(pp->est_bit); // remove estbits buffer.
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->state);
+    srslte_polar_encoder_free(pp->enc);
+    free(pp->enc);
+    free(pp);
+  }
+}
+
+void* create_polar_decoder_ssc_c_avx2(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
+{
+  struct pSSC_c_avx2* pp = NULL; // pointer to the polar decoder instance
+  // allocate memory to the polar decoder instance
+  if ((pp = malloc(sizeof(struct pSSC_c_avx2))) == NULL) {
+    return NULL;
+  }
+
+  // set functions
+  pp->f        = srslte_vec_function_f_ccc_avx2;
+  pp->g        = srslte_vec_function_g_bccc_avx2;
+  pp->xor      = srslte_vec_xor_bbb_avx2;
+  pp->hard_bit = srslte_vec_hard_bit_cc_avx2;
+
+  // encoder of maximum size
+  if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
+    free(pp);
+    return NULL;
+  }
+
+  srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_AVX2, code_size_log);
+
+  // algorithm constants/parameters
+  if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  pp->param->code_stage_size[0] = 1;
+  for (uint8_t i = 1; i < code_size_log + 1; i++) {
+    pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
+  }
+
+  pp->param->code_size_log = code_size_log;
+
+  // state  -- initialized in polar_decoder_ssc_init
+  if ((pp->state = malloc(sizeof(struct StateAVX2))) == NULL) {
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // allocates memory for estimated bits per stage
+  // allocates extra SRSLTE_AVX2_B_SIZE bytes to allow store the output of 256-bit instructions
+  int est_bit_size = pp->param->code_stage_size[code_size_log] + SRSLTE_AVX2_B_SIZE;
+
+  pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bit_size); // every 32 chars are aligned
+
+  // allocate memory for LLR pointers.
+  pp->llr0 = malloc((code_size_log + 1) * sizeof(int8_t*));
+  pp->llr1 = malloc((code_size_log + 1) * sizeof(int8_t*));
+
+  // LLR MEMORY NOT ALIGNED FOR LLR_BUFFERS_SIZE < SRSLTE_SIMB_LLR_ALIGNED
+
+  // We do not align the memory at lower stages, as if done, after each function f and function g
+  // operation, the second half of the output vector needs to be moved to the next
+  // aligned position. This extra operation may incur more overhead that the gain of aligned memory.
+
+  uint8_t  n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
+  uint16_t llr_all_stages   = 1U << n_llr_all_stages;
+
+  // Reserve at least SRSLTE_AVX2_B_SIZE bytes for each stage, so that there is space for the output
+  // of the 32-bytes mm256 vectorized functions.
+  // llr1 (second half) of lower stages is not aligned.
+
+  uint16_t llr_all_stages_avx2 = llr_all_stages;
+  if (code_size_log >= 5) {
+    llr_all_stages_avx2 += SRSLTE_AVX2_B_SIZE * 5;
+  } else {
+    llr_all_stages_avx2 += (code_size_log + 1) * SRSLTE_AVX2_B_SIZE;
+  }
+
+  // add extra SRSLTE_AVX2_B_SIZE llrs positions for hard_bit functions on the last bits have
+  // access to allocated memory
+  llr_all_stages_avx2 += SRSLTE_AVX2_B_SIZE;
+
+  pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages_avx2 * sizeof(int8_t)); // 32*8=256
+
+  // allocate memory to the polar decoder instance
+  if (pp->llr0[0] == NULL) {
+    free(pp->est_bit);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  pp->llr1[0] = pp->llr0[0] + 1;
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->llr0[s] = pp->llr0[s - 1] + max(SRSLTE_AVX2_B_SIZE, pp->param->code_stage_size[s - 1]);
+    pp->llr1[s] = pp->llr0[s] + pp->param->code_stage_size[s - 1];
+  }
+
+  // allocate memory for node type pointers, one per stage.
+  pp->param->frozen_set_size = frozen_set_size;
+  pp->param->node_type       = malloc((code_size_log + 1) * sizeof(uint8_t*));
+
+  // allocate memory to node_type_ssc. Stage s has 2^(N-s) nodes s=0,...,N.
+  // Thus, same size as LLRs all stages.
+  pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
+
+  if (pp->param->node_type[0] == NULL) {
+    free(pp->param->node_type);
+    free(pp->est_bit);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
+  }
+
+  init_node_type(frozen_set, pp->param);
+
+  return pp;
+}
+
+int init_polar_decoder_ssc_c_avx2(void* p, const int8_t* input_llr, uint8_t* data_decoded)
+{
+  struct pSSC_c_avx2* pp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  uint8_t code_size_log  = pp->param->code_size_log;
+  int16_t code_size      = pp->param->code_stage_size[code_size_log];
+  int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
+
+  // Initializes the data_decoded_vector to all zeros
+  memset(data_decoded, 0, code_size);
+
+  // Initialize est_bit vector to all zeros
+  int est_bit_size = pp->param->code_stage_size[code_size_log] + SRSLTE_AVX2_B_SIZE;
+  memset(pp->est_bit, 0, est_bit_size);
+
+  // Initializes LLR buffer for the last stage/level with the input LLRs values
+  memcpy(&pp->llr0[code_size_log][0], &input_llr[0], code_half_size * sizeof(int8_t));
+  memcpy(&pp->llr1[code_size_log][0], &input_llr[code_half_size], code_half_size * sizeof(int8_t));
+
+  // Initializes the state of the decoding tree
+  pp->state->stage   = code_size_log + 1; // start from the only one node at the last stage + 1.
+  pp->state->bit_pos = 0;
+
+  return 0;
+}
+
+int polar_decoder_ssc_c_avx2(void* p, uint8_t* data_decoded)
+{
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  struct pSSC_c_avx2* pp = p;
+
+  simplified_node(pp);
+
+  // est_bit contains the coded bits. To obtain the message, we call the encoder
+  srslte_polar_encoder_encode(pp->enc, pp->est_bit, data_decoded, pp->param->code_size_log);
+
+  // transform {0,-128} into {0, 1}
+  srslte_vec_sign_to_bit_c_avx2(data_decoded, 1U << pp->param->code_size_log);
+  return 0;
+}
+
+static void simplified_node(struct pSSC_c_avx2* p)
+{
+
+  struct pSSC_c_avx2* pp = p;
+
+  pp->state->stage--; // to child node.
+
+  uint8_t  stage    = pp->state->stage;
+  uint16_t bit_pos  = pp->state->bit_pos >> stage;
+  uint8_t* estbits0 = NULL;
+  uint8_t* estbits1 = NULL;
+
+  uint16_t stage_size      = pp->param->code_stage_size[stage];
+  uint16_t stage_half_size = pp->param->code_stage_size[stage - 1];
+
+  switch (pp->param->node_type[stage][bit_pos]) {
+
+    case RATE_1:
+      pp->hard_bit(pp->llr0[stage], pp->est_bit + pp->state->bit_pos, stage_size);
+
+      pp->state->bit_pos = pp->state->bit_pos + stage_size;
+      break;
+
+    case RATE_0:
+      pp->state->bit_pos = pp->state->bit_pos + stage_size;
+      break;
+
+    case RATE_R:
+
+      pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+
+      // move to the child node to the left (up) of the tree.
+      simplified_node(pp);
+
+      estbits0 = pp->est_bit + pp->state->bit_pos - stage_half_size;
+      pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+
+      // move to the child node to the right (down) of the tree.
+      simplified_node(pp);
+
+      estbits0 = pp->est_bit + pp->state->bit_pos - stage_size;
+      estbits1 = pp->est_bit + pp->state->bit_pos - stage_size + stage_half_size;
+      pp->xor (estbits0, estbits1, estbits0, stage_half_size);
+
+      break;
+
+    default:
+      printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
+      exit(-1);
+      break;
+  }
+
+  pp->state->stage++; // to parent node.
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_c_avx2.h
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_c_avx2.h
@ -0,0 +1,74 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_c_avx2.h
+ * \brief Declaration of the SSC polar decoder inner functions working with
+ * 8-bit integer-valued LLRs and AVX2 instructions
+ * \author Jesus Gomez (CTTC) \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_DECODER_SSC_C_AVX2_H
+#define POLAR_DECODER_SSC_C_AVX2_H
+
+#include "polar_decoder_ssc_all.h"
+
+/*!
+ * Creates an SSC polar decoder structure of type pSSC_c_avx2, and allocates memory for the decoding buffers.
+ *
+ * \param[in] frozen_set The position of the frozen bits in the codeword.
+ * \param[in] frozen_set_size Number of frozen bits.
+ * \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
+ * \return A pointer to a pSSC_c_avx2 structure if the function executes correctly, NULL otherwise.
+ */
+void* create_polar_decoder_ssc_c_avx2(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
+
+/*!
+ * The (8-bit, avx2) polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
+ *
+ * \param[in, out] p A pointer to the dismantled decoder.
+ */
+void delete_polar_decoder_ssc_c_avx2(void* p);
+
+/*!
+ * Initializes an (8-bit, avx2) SSC polar decoder before processing a new codeword.
+ *
+ * \param[in, out] p A void pointer used to declare a pSSC_c_avx2 structure.
+ * \param[in] llr LLRs for the new codeword.
+ * \param[out] data_decoded Pointer to the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_polar_decoder_ssc_c_avx2(void* p, const int8_t* llr, uint8_t* data_decoded);
+
+/*!
+ * Decodes a data message from a 8 bit resolution codeword with the specified decoder. Note that
+ * a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_c_avx2().
+ *
+ * \param[in] p A pointer to the desired decoder.
+ * \param[out] data The decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int polar_decoder_ssc_c_avx2(void* p, uint8_t* data);
+
+#endif // POLAR_DECODER_SSC_C_AVX2_H
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_f.c
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_f.c
@ -0,0 +1,416 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_f.c
+ * \brief Definition of the SSC polar decoder inner functions working with
+ * float-valued LLRs.
+ *
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include "polar_decoder_ssc_f.h"
+#include "../utils_avx2.h"
+#include "polar_decoder_vector.h"
+#include "srslte/phy/fec/polar/polar_encoder.h"
+#include "srslte/phy/utils/vector.h"
+
+/*!
+ * \brief Describes an SSC polar decoder (float version).
+ */
+struct pSSC_f {
+  float**                 llr0;    /*!< \brief Pointers to the upper half of LLRs values at all stages. */
+  float**                 llr1;    /*!< \brief Pointers to the lower half of LLRs values at all stages. */
+  uint8_t*                est_bit; /*!< \brief Pointers to the temporary estimated bits. */
+  struct Params*          param;   /*!< \brief Pointer to a Params structure. */
+  struct State*           state;   /*!< \brief Pointer to a State. */
+  srslte_polar_encoder_t* enc;     /*!< \brief Pointer to a srslte_polar_encoder_t. */
+  void (*f)(const float* x, const float* y, float* z, const uint16_t len); /*!< \brief Pointer to the function-f. */
+  void (*g)(const uint8_t* b,
+            const float*   x,
+            const float*   y,
+            float*         z,
+            const uint16_t len); /*!< \brief Pointer to the function-g. */
+  void (*xor)(const uint8_t* x,
+              const uint8_t* y,
+              uint8_t*       z,
+              const uint32_t len);                                  /*!< \brief Pointer to the function-g. */
+  void (*hard_bit)(const float* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
+};
+
+/*!
+ * Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
+ * Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
+ * return the associated \f$2^s\f$ estimated bits.
+ *
+ */
+static void simplified_node(void* p, uint8_t* message);
+
+/*!
+ * All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
+ * pointer to point to the next active node. It is assumed that message bits are initialized to 0.
+ *
+ */
+static void rate_0_node(void* p);
+
+/*!
+ * ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
+ * making a hard decision on them.
+ * ::RATE_1 nodes also update message bits vector.
+ *
+ */
+static void rate_1_node(void* p, uint8_t* message);
+
+/*!
+ * ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bit by calling
+ * the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
+ * At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
+ * with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
+ * This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
+ * updates \a estbits memory space for stage \f$(s + 1)\f$.
+ *
+ */
+static void rate_r_node(void* p, uint8_t* message);
+
+int init_polar_decoder_ssc_f(void* p, const float* input_llr, uint8_t* data_decoded)
+{
+  struct pSSC_f* pp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  uint8_t code_size_log  = pp->param->code_size_log;
+  int16_t code_size      = pp->param->code_stage_size[code_size_log];
+  int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
+
+  // Initializes the data_decoded_vector to all zeros
+  memset(data_decoded, 0, code_size);
+
+  // Initialize est_bit vector to all zeros
+  memset(pp->est_bit, 0, code_size);
+
+  // Initializes LLR buffer for the last stage/level with the input LLRs values
+  for (uint16_t i = 0; i < code_half_size; i++) {
+    pp->llr0[code_size_log][i] = input_llr[i];
+    pp->llr1[code_size_log][i] = input_llr[i + code_half_size];
+  }
+
+  // Initializes the state of the decoding tree
+  pp->state->stage = code_size_log + 1; // start from the only one node at the last stage + 1.
+  for (uint16_t i = 0; i < code_size_log + 1; i++) {
+    pp->state->active_node_per_stage[i] = 0;
+  }
+  pp->state->flag_finished = false;
+
+  return 0;
+}
+
+int polar_decoder_ssc_f(void* p, uint8_t* data_decoded)
+{
+  if (p == NULL) {
+    return -1;
+  }
+
+  simplified_node(p, data_decoded);
+  return 0;
+}
+
+void delete_polar_decoder_ssc_f(void* p)
+{
+  struct pSSC_f* pp = p;
+
+  if (p != NULL) {
+    free(pp->llr0[0]); // remove LLR buffer.
+    free(pp->llr0);
+    free(pp->llr1);
+    free(pp->param->node_type[0]);
+    free(pp->param->node_type);
+    free(pp->est_bit); // remove estbits buffer.
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->state->active_node_per_stage);
+    free(pp->state);
+    srslte_polar_encoder_free(pp->enc);
+    free(pp->enc);
+    free(pp);
+  }
+}
+
+void* create_polar_decoder_ssc_f(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
+{
+  struct pSSC_f* pp = NULL; // pointer to the polar decoder instance
+
+  // allocate memory to the polar decoder instance
+  if ((pp = malloc(sizeof(struct pSSC_f))) == NULL) {
+    return NULL;
+  }
+
+  // set functions
+  pp->f        = srslte_vec_function_f_fff;
+  pp->g        = srslte_vec_function_g_bfff;
+  pp->xor      = srslte_vec_xor_bbb;
+  pp->hard_bit = srslte_vec_hard_bit_fc;
+
+  // encoder of maximum size
+  if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
+    free(pp);
+    return NULL;
+  }
+  srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
+
+  // algorithm constants/parameters
+  if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  pp->param->code_stage_size[0] = 1;
+  for (uint8_t i = 1; i < code_size_log + 1; i++) {
+    pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
+  }
+
+  pp->param->code_size_log = code_size_log;
+
+  // state  -- initialized in polar_decoder_ssc_init
+  if ((pp->state = malloc(sizeof(struct State))) == NULL) {
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+  if ((pp->state->active_node_per_stage = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // allocates memory for estimated bits per stage
+  uint16_t est_bits_size = pp->param->code_stage_size[code_size_log];
+
+  pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bits_size); // every 32 chars are aligned
+
+  // allocate memory for LLR pointers.
+  pp->llr0 = malloc((code_size_log + 1) * sizeof(float*));
+  pp->llr1 = malloc((code_size_log + 1) * sizeof(float*));
+
+  // There are LLR buffers for n = 0 to n = code_size_log. Each with size 2^n. Thus,
+  // the total memory needed is 2^(n+1)-1.
+  // Only the stages starting at multiples of SRSLTE_AVX2_B_SIZE are aligned.
+
+  // Let n_simd_llr be the exponent of the SIMD size in nummer of LLRs.
+  // i.e. in a SIMD instruction we can load 2^(n_simd_llr) LLR values
+  // then the memory for stages s >= n_simd_llr - 1 is aligned.
+  // but only the operations at stages s > n_simd_llr have all the inputs aligned.
+  uint8_t  n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
+  uint16_t llr_all_stages   = 1U << n_llr_all_stages;
+
+  pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(float)); // 32*8=256
+
+  // allocate memory to the polar decoder instance
+  if (pp->llr0[0] == NULL) {
+    free(pp->llr1);
+    free(pp->llr0);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // initialize all LLR pointers
+  pp->llr1[0] = pp->llr0[0] + 1;
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->llr0[s] = pp->llr0[0] + pp->param->code_stage_size[s];
+    pp->llr1[s] = pp->llr0[0] + pp->param->code_stage_size[s] + pp->param->code_stage_size[s - 1];
+  }
+
+  // allocate memory for node type pointers, one per stage.
+  pp->param->frozen_set_size = frozen_set_size;
+  pp->param->node_type       = malloc((code_size_log + 1) * sizeof(uint8_t*));
+
+  // allocate memory to node_type_ssc. Stage s has  2^(N-s) nodes s=0,...,N.
+  // Thus, same size as LLRs all stages.
+  pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
+
+  if (pp->param->node_type[0] == NULL) {
+    free(pp->llr0[0]);
+    free(pp->llr1);
+    free(pp->llr0);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
+  }
+
+  init_node_type(frozen_set, pp->param);
+
+  return pp;
+}
+
+static void simplified_node(void* p, uint8_t* message)
+{
+
+  struct pSSC_f* pp = p;
+
+  pp->state->stage--; // to child node.
+
+  uint8_t  stage   = pp->state->stage;
+  uint16_t bit_pos = pp->state->active_node_per_stage[stage];
+
+  switch (pp->param->node_type[stage][bit_pos]) {
+    case RATE_1:
+      rate_1_node(pp, message);
+      break;
+    case RATE_0:
+      rate_0_node(pp);
+      break;
+    case RATE_R:
+      rate_r_node(pp, message);
+      break;
+    default:
+      printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
+      exit(-1);
+      break;
+  }
+
+  pp->state->stage++; // to parent node.
+}
+
+static void rate_0_node(void* p)
+{
+  struct pSSC_f* pp = p;
+
+  uint8_t  code_size_log = pp->param->code_size_log; // code_size_log.
+  int16_t  code_size     = pp->param->code_stage_size[code_size_log];
+  uint16_t bit_pos       = pp->state->active_node_per_stage[0];
+  uint8_t  stage         = pp->state->stage;
+
+  if (bit_pos == code_size - 1) {
+    pp->state->flag_finished = true;
+  } else {
+
+    // update active node at all the stages
+    for (uint8_t i = 0; i <= stage; i++) {
+      pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
+    }
+  }
+}
+
+static void rate_1_node(void* p, uint8_t* message)
+{
+  struct pSSC_f* pp    = p;
+  uint8_t        stage = pp->state->stage; // for SSC decoder rate 1 nodes are always at stage 0.
+
+  uint16_t bit_pos         = pp->state->active_node_per_stage[0];
+  uint16_t code_size       = pp->param->code_stage_size[pp->param->code_size_log];
+  uint16_t code_stage_size = pp->param->code_stage_size[stage];
+
+  uint8_t* codeword = pp->est_bit + bit_pos;
+  float*   LLR      = pp->llr0[stage];
+
+  pp->hard_bit(LLR, codeword, code_stage_size);
+
+  if (stage != 0) {
+    srslte_polar_encoder_encode(pp->enc, codeword, message + bit_pos, stage);
+  } else {
+    message[bit_pos] = codeword[0];
+  }
+
+  // update active node at all the stages
+  for (uint8_t i = 0; i <= stage; i++) {
+    pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
+  }
+
+  // check if this is the last bit
+  if (pp->state->active_node_per_stage[0] == code_size) {
+    pp->state->flag_finished = true;
+  }
+}
+
+static void rate_r_node(void* p, uint8_t* message)
+{
+  struct pSSC_f* pp              = p;
+  uint8_t*       estbits0        = NULL;
+  uint8_t*       estbits1        = NULL;
+  uint16_t       bit_pos         = 0;
+  int16_t        offset0         = 0;
+  int16_t        offset1         = 0;
+  uint8_t        stage           = pp->state->stage;
+  uint16_t       stage_size      = pp->param->code_stage_size[stage];
+  uint16_t       stage_half_size = pp->param->code_stage_size[stage - 1];
+
+  pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+
+  // move to the child node to the left (up) of the tree.
+  simplified_node(pp, message);
+  if (pp->state->flag_finished == true) { // (just in case). However for 5G frozen sets, the code can never end here.
+    return;
+  }
+
+  bit_pos  = pp->state->active_node_per_stage[0];
+  offset0  = bit_pos - stage_half_size;
+  estbits0 = pp->est_bit + offset0;
+
+  pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+  // move to the child node to the right (down) of the tree.
+  simplified_node(pp, message);
+  if (pp->state->flag_finished == true) {
+    return;
+  }
+
+  // compute_xor(pp);
+  bit_pos = pp->state->active_node_per_stage[0];
+
+  offset0  = bit_pos - stage_size;
+  offset1  = offset0 + stage_half_size;
+  estbits0 = pp->est_bit + offset0;
+  estbits1 = pp->est_bit + offset1;
+
+  pp->xor (estbits0, estbits1, estbits0, stage_half_size);
+
+  // update this node index
+  pp->state->active_node_per_stage[stage] = pp->state->active_node_per_stage[stage] + 1; // return to the father node
+}
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_f.h
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_f.h
@ -0,0 +1,71 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_f.h
+ * \brief Declaration of the SSC polar decoder inner functions working with
+ * float-valued LLRs.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_DECODER_SSC_F_H
+#define POLAR_DECODER_SSC_F_H
+
+#include "polar_decoder_ssc_all.h"
+
+/*!
+ * Creates an SSC polar decoder structure of type pSSC, and allocates memory for the decoding buffers.
+ * \param[in] frozen_set The position of the frozen bits in the codeword.
+ * \param[in] frozen_set_size Number of frozen bits.
+ * \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
+ * \return A pointer to a pSSC structure if the function executes correctly, NULL otherwise.
+ */
+void* create_polar_decoder_ssc_f(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
+
+/*!
+ * The polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
+ * \param[in, out] p A pointer to the dismantled decoder.
+ */
+void delete_polar_decoder_ssc_f(void* p);
+
+/*!
+ * Initializes an SSC polar decoder before processing a new codeword.
+ * \param[in, out] p A void pointer used to declare a pSSC structure.
+ * \param[in] llr LLRs for the new codeword.
+ * \param[out] data_decoded Pointer to the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_polar_decoder_ssc_f(void* p, const float* llr, uint8_t* data_decoded);
+
+/*!
+ * Decodes a data message from a codeword with the specified decoder. Note that
+ * a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_f().
+ * \param[in] p A pointer to the desired decoder.
+ * \param[out] data The decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int polar_decoder_ssc_f(void* p, uint8_t* data);
+
+#endif // POLAR_DECODER_SSC_F_H
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_s.c
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_s.c
@ -0,0 +1,430 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_s.c
+ * \brief Definition of the SSC polar decoder inner functions working with
+ * 16-bit integer-valued LLRs.
+ *
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+// IMPORTANT: polar_decoder_SSC_s.c is exactly the polar_decoder_SSC_f.c except for:
+// (1) #include "polar_decoder_ssc_s.h"
+// (2) the naming of the external function, which finish with _s instead of _f
+// (3) the initialization of them  of the set functions in create_polar_decoder_ssc_s
+//  pp->f        = srslte_vec_function_f_sss;
+//  pp->g        = srslte_vec_function_g_bsss;
+//  pp->xor      = srslte_vec_xor_bbb;
+//  pp->hard_bit = srslte_vec_hard_bit_sc;
+
+#include "polar_decoder_ssc_s.h"
+#include "../utils_avx2.h"
+#include "polar_decoder_vector.h"
+#include "srslte/phy/fec/polar/polar_encoder.h"
+#include "srslte/phy/utils/vector.h"
+
+/*!
+ * \brief Type indicator for printing LLRs if debugging
+ */
+#define PRIllr "d" // for printing llrs if debugging
+
+/*!
+ * \brief Describes an SSC polar decoder (16-bit version).
+ */
+struct pSSC_s {
+  int16_t**               llr0;    /*!< \brief Pointers to the upper half of LLRs values at all stages. */
+  int16_t**               llr1;    /*!< \brief Pointers to the lower half of LLRs values at all stages. */
+  uint8_t*                est_bit; /*!< \brief Pointers to the temporary estimated bits. */
+  struct Params*          param;   /*!< \brief Pointer to a Params structure. */
+  struct State*           state;   /*!< \brief Pointer to a State. */
+  srslte_polar_encoder_t* enc;     /*!< \brief Pointer to a srslte_polar_encoder_t. */
+  void (*f)(const int16_t* x,
+            const int16_t* y,
+            int16_t*       z,
+            const uint16_t len); /*!< \brief Pointer to the function-f. */
+  void (*g)(const uint8_t* b,
+            const int16_t* x,
+            const int16_t* y,
+            int16_t*       z,
+            const uint16_t len); /*!< \brief Pointer to the function-g. */
+  void (*xor)(const uint8_t* x,
+              const uint8_t* y,
+              uint8_t*       z,
+              const uint32_t len);                                    /*!< \brief Pointer to the function-g. */
+  void (*hard_bit)(const int16_t* x, uint8_t* z, const uint16_t len); /*!< \brief Pointer to the hard-bit function. */
+};
+
+/*!
+ * Switches between the different types of node (::RATE_1, ::RATE_0, ::RATE_R) for the SSC algorithm.
+ * Nodes in the decoding tree at stage \f$ s\f$ get the \f$2^s\f$ LLRs from the parent node and
+ * return the associated \f$2^s\f$ estimated bits.
+ *
+ */
+static void simplified_node(void* p, uint8_t* message);
+
+/*!
+ * All decoded bits below a ::RATE_0 node are 0. The function updates the \a p->state->active_node_per_stage
+ * pointer to point to the next active node. It is assumed that message bits are initialized to 0.
+ *
+ */
+static void rate_0_node(void* p);
+
+/*!
+ * ::RATE_1 nodes at stage \f$ s \f$ return the associated \f$2^s\f$ estimated bits by
+ * making a hard decision on them.
+ * ::RATE_1 nodes also update message bits vector.
+ *
+ */
+static void rate_1_node(void* p, uint8_t* message);
+
+/*!
+ * ::RATE_R nodes at stage \f$ s \f$ return the associated \f$2^s\f$ decoded bit by calling
+ * the child nodes to the right and left of the decoding tree and then polar encoding (xor) their output.
+ * At stage \f$ s \f$, this function runs function srslte_vec_function_f_fff() and srslte_vec_function_g_bfff()
+ * with vector size \f$2^{ s - 1}\f$ and updates \a llr0 and \a llr1 memory space for stage \f$(s - 1)\f$.
+ * This function also runs srslte_vec_xor_bbb() with vector size \f$2^{s-1}\f$ and
+ * updates \a estbits memory space for stage \f$(s + 1)\f$.
+ *
+ */
+static void rate_r_node(void* p, uint8_t* message);
+
+int init_polar_decoder_ssc_s(void* p, const int16_t* input_llr, uint8_t* data_decoded)
+{
+  struct pSSC_s* pp = p;
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  uint8_t code_size_log  = pp->param->code_size_log;
+  int16_t code_size      = pp->param->code_stage_size[code_size_log];
+  int16_t code_half_size = pp->param->code_stage_size[code_size_log - 1];
+
+  // Initializes the data_decoded_vector to all zeros
+  memset(data_decoded, 0, code_size);
+
+  // Initialize est_bit vector to all zeros
+  memset(pp->est_bit, 0, code_size);
+
+  // Initializes LLR buffer for the last stage/level with the input LLRs values
+  for (uint16_t i = 0; i < code_half_size; i++) {
+    pp->llr0[code_size_log][i] = input_llr[i];
+    pp->llr1[code_size_log][i] = input_llr[i + code_half_size];
+  }
+
+  // Initializes the state of the decoding tree
+  pp->state->stage = code_size_log + 1; // start from the only one node at the last stage + 1.
+  for (uint16_t i = 0; i < code_size_log + 1; i++) {
+    pp->state->active_node_per_stage[i] = 0;
+  }
+  pp->state->flag_finished = false;
+
+  return 0;
+}
+
+int polar_decoder_ssc_s(void* p, uint8_t* data_decoded)
+{
+
+  if (p == NULL) {
+    return -1;
+  }
+
+  simplified_node(p, data_decoded);
+  return 0;
+}
+
+void delete_polar_decoder_ssc_s(void* p)
+{
+  struct pSSC_s* pp = p;
+
+  if (p != NULL) {
+    free(pp->llr0[0]); // remove LLR buffer.
+    free(pp->llr0);
+    free(pp->llr1);
+    free(pp->param->node_type[0]);
+    free(pp->param->node_type);
+    free(pp->est_bit); // remove estbits buffer.
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->state->active_node_per_stage);
+    free(pp->state);
+    srslte_polar_encoder_free(pp->enc);
+    free(pp->enc);
+    free(pp);
+  }
+}
+
+void* create_polar_decoder_ssc_s(uint16_t* frozen_set, const uint8_t code_size_log, const uint16_t frozen_set_size)
+{
+  struct pSSC_s* pp = NULL; // pointer to the polar decoder instance
+
+  // allocate memory to the polar decoder instance
+  if ((pp = malloc(sizeof(struct pSSC_s))) == NULL) {
+    return NULL;
+  }
+
+  // set functions
+  pp->f        = srslte_vec_function_f_sss;
+  pp->g        = srslte_vec_function_g_bsss;
+  pp->xor      = srslte_vec_xor_bbb;
+  pp->hard_bit = srslte_vec_hard_bit_sc;
+
+  // encoder of maximum size
+  if ((pp->enc = malloc(sizeof(srslte_polar_encoder_t))) == NULL) {
+    free(pp);
+    return NULL;
+  }
+  srslte_polar_encoder_init(pp->enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
+
+  // algorithm constants/parameters
+  if ((pp->param = malloc(sizeof(struct Params))) == NULL) {
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  if ((pp->param->code_stage_size = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  pp->param->code_stage_size[0] = 1;
+  for (uint8_t i = 1; i < code_size_log + 1; i++) {
+    pp->param->code_stage_size[i] = 2 * pp->param->code_stage_size[i - 1];
+  }
+
+  pp->param->code_size_log = code_size_log;
+
+  // state  -- initialized in polar_decoder_ssc_init
+  if ((pp->state = malloc(sizeof(struct State))) == NULL) {
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+  if ((pp->state->active_node_per_stage = malloc((code_size_log + 1) * sizeof(uint16_t))) == NULL) {
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // allocates memory for estimated bits per stage
+  uint16_t est_bits_size = pp->param->code_stage_size[code_size_log];
+
+  pp->est_bit = aligned_alloc(SRSLTE_AVX2_B_SIZE, est_bits_size); // every 32 chars are aligned
+
+  // allocate memory for LLR pointers.
+  pp->llr0 = malloc((code_size_log + 1) * sizeof(int16_t*));
+  pp->llr1 = malloc((code_size_log + 1) * sizeof(int16_t*));
+
+  // There are LLR buffers for n = 0 to n = code_size_log. Each with size 2^n. Thus,
+  // the total memory needed is 2^(n+1)-1.
+  // Only the stages starting at multiples of SRSLTE_AVX2_B_SIZE are aligned.
+
+  // Let n_simd_llr be the exponent of the SIMD size in nummer of LLRs.
+  // i.e. in a SIMD instruction we can load 2^(n_simd_llr) LLR values
+  // then the memory for stages s >= n_simd_llr - 1 is aligned.
+  // but only the operations at stages s > n_simd_llr have all the inputs aligned.
+  uint8_t  n_llr_all_stages = code_size_log + 1; // there are 2^(n_llr_all_stages) - 1 LLR values summing up all stages.
+  uint16_t llr_all_stages   = 1U << n_llr_all_stages;
+
+  pp->llr0[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(int16_t)); // 32*8=256
+  // allocate memory to the polar decoder instance
+  if (pp->llr0[0] == NULL) {
+    free(pp->est_bit);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // initialize all LLR pointers
+  pp->llr1[0] = pp->llr0[0] + 1;
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->llr0[s] = pp->llr0[0] + pp->param->code_stage_size[s];
+    pp->llr1[s] = pp->llr0[0] + pp->param->code_stage_size[s] + pp->param->code_stage_size[s - 1];
+  }
+
+  // allocate memory for node type pointers, one per stage.
+  pp->param->frozen_set_size = frozen_set_size;
+  pp->param->node_type       = malloc((code_size_log + 1) * sizeof(uint8_t*));
+
+  // allocate memory to node_type_ssc. Stage s has  2^(N-s) nodes s=0,...,N.
+  // Thus, same size as LLRs all stages.
+  pp->param->node_type[0] = aligned_alloc(SRSLTE_AVX2_B_SIZE, llr_all_stages * sizeof(uint8_t)); // 32*8=256
+
+  if (pp->param->node_type[0] == NULL) {
+    free(pp->param->node_type);
+    free(pp->est_bit);
+    free(pp->state);
+    free(pp->param->code_stage_size);
+    free(pp->param);
+    free(pp->enc);
+    free(pp);
+    return NULL;
+  }
+
+  // initialize all node type pointers. (stage 0 is the first, opposite to LLRs)
+  for (uint8_t s = 1; s < code_size_log + 1; s++) {
+    pp->param->node_type[s] = pp->param->node_type[s - 1] + pp->param->code_stage_size[code_size_log - s + 1];
+  }
+
+  init_node_type(frozen_set, pp->param);
+
+  return pp;
+}
+
+static void simplified_node(void* p, uint8_t* message)
+{
+
+  struct pSSC_s* pp = p;
+
+  pp->state->stage--; // to child node.
+
+  uint8_t  stage   = pp->state->stage;
+  uint16_t bit_pos = pp->state->active_node_per_stage[stage];
+
+  switch (pp->param->node_type[stage][bit_pos]) {
+    case RATE_1:
+      rate_1_node(pp, message);
+      break;
+    case RATE_0:
+      rate_0_node(pp);
+      break;
+    case RATE_R:
+      rate_r_node(pp, message);
+      break;
+    default:
+      printf("ERROR: wrong node type %d\n", pp->param->node_type[stage][bit_pos]);
+      exit(-1);
+      break;
+  }
+
+  pp->state->stage++; // to parent node.
+}
+
+static void rate_0_node(void* p)
+{
+  struct pSSC_s* pp = p;
+
+  uint8_t  code_size_log = pp->param->code_size_log; // code_size_log.
+  int16_t  code_size     = pp->param->code_stage_size[code_size_log];
+  uint16_t bit_pos       = pp->state->active_node_per_stage[0];
+  uint8_t  stage         = pp->state->stage;
+
+  if (bit_pos == code_size - 1) {
+    pp->state->flag_finished = true;
+  } else {
+
+    // update active node at all the stages
+    for (uint8_t i = 0; i <= stage; i++) {
+      pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
+    }
+  }
+}
+
+static void rate_1_node(void* p, uint8_t* message)
+{
+  struct pSSC_s* pp    = p;
+  uint8_t        stage = pp->state->stage; // for SSC decoder rate 1 nodes are always at stage 0.
+
+  uint16_t bit_pos         = pp->state->active_node_per_stage[0];
+  uint16_t code_size       = pp->param->code_stage_size[pp->param->code_size_log];
+  uint16_t code_stage_size = pp->param->code_stage_size[stage];
+
+  uint8_t* codeword = pp->est_bit + bit_pos;
+  int16_t* LLR      = pp->llr0[stage];
+
+  pp->hard_bit(LLR, codeword, code_stage_size);
+
+  if (stage != 0) {
+    srslte_polar_encoder_encode(pp->enc, codeword, message + bit_pos, stage);
+  } else {
+    message[bit_pos] = codeword[0];
+  }
+
+  // update active node at all the stages
+  for (uint8_t i = 0; i <= stage; i++) {
+    pp->state->active_node_per_stage[i] = pp->state->active_node_per_stage[i] + pp->param->code_stage_size[stage - i];
+  }
+
+  // check if this is the last bit
+  if (pp->state->active_node_per_stage[0] == code_size) {
+    pp->state->flag_finished = true;
+  }
+}
+
+static void rate_r_node(void* p, uint8_t* message)
+{
+  struct pSSC_s* pp              = p;
+  uint8_t*       estbits0        = NULL;
+  uint8_t*       estbits1        = NULL;
+  uint16_t       bit_pos         = 0;
+  int16_t        offset0         = 0;
+  int16_t        offset1         = 0;
+  uint8_t        stage           = pp->state->stage;
+  uint16_t       stage_size      = pp->param->code_stage_size[stage];
+  uint16_t       stage_half_size = pp->param->code_stage_size[stage - 1];
+
+  pp->f(pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+
+  // move to the child node to the left (up) of the tree.
+  simplified_node(pp, message);
+  if (pp->state->flag_finished == true) { // (just in case). However for 5G frozen sets, the code can never end here.
+    return;
+  }
+
+  bit_pos  = pp->state->active_node_per_stage[0];
+  offset0  = bit_pos - stage_half_size;
+  estbits0 = pp->est_bit + offset0;
+
+  pp->g(estbits0, pp->llr0[stage], pp->llr1[stage], pp->llr0[stage - 1], stage_half_size);
+  // move to the child node to the right (down) of the tree.
+  simplified_node(pp, message);
+  if (pp->state->flag_finished == true) {
+    return;
+  }
+
+  bit_pos = pp->state->active_node_per_stage[0];
+
+  offset0  = bit_pos - stage_size;
+  offset1  = offset0 + stage_half_size;
+  estbits0 = pp->est_bit + offset0;
+  estbits1 = pp->est_bit + offset1;
+
+  pp->xor (estbits0, estbits1, estbits0, stage_half_size);
+
+  // update this node index
+  pp->state->active_node_per_stage[stage] = pp->state->active_node_per_stage[stage] + 1; // return to the father node
+}
--- a/lib/src/phy/fec/polar/polar_decoder_ssc_s.h
+++ b/lib/src/phy/fec/polar/polar_decoder_ssc_s.h
@ -0,0 +1,78 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_ssc_s.h
+ * \brief Definition of the SSC polar decoder inner functions working with
+ * 16-bit integer-valued LLRs.
+ * \author Jesus Gomez (CTTC) \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_DECODER_SSC_S_H
+#define POLAR_DECODER_SSC_S_H
+
+#include "polar_decoder_ssc_all.h"
+
+/*!
+ * Creates an SSC polar decoder structure of type pSSC, and allocates memory for the decoding buffers.
+ *
+ * This function is exactly the same as the one for the floating-point version.
+ * Note, however, that it works with a different pSSC structure (different function pointers
+ * pSSC::f, pSSC::f, pSSC::g, pSSC::xor and pSSC::hard_bit).
+ *
+ * \param[in] frozen_set The position of the frozen bits in the codeword.
+ * \param[in] frozen_set_size Number of frozen bits.
+ * \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
+ * \return A pointer to a pSSC structure if the function executes correctly, NULL otherwise.
+ */
+void* create_polar_decoder_ssc_s(uint16_t* frozen_set, uint8_t code_size_log, uint16_t frozen_set_size);
+
+/*!
+ * The 16-bit polar decoder SSC "destructor": it frees all the resources allocated to the decoder.
+ *
+ * \param[in, out] p A pointer to the dismantled decoder.
+ */
+void delete_polar_decoder_ssc_s(void* p);
+
+/*!
+ * Initializes a 16-bit SSC polar decoder before processing a new codeword.
+ *
+ * \param[in, out] p A void pointer used to declare a pSSC structure.
+ * \param[in] llr LLRs for the new codeword.
+ * \param[out] data_decoded Pointer to the decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int init_polar_decoder_ssc_s(void* p, const int16_t* llr, uint8_t* data_decoded);
+
+/*!
+ * Decodes a data message from a 16-bit resolution codeword with the specified decoder. Note that
+ * a pointer to the codeword LLRs is included in \a p and initialized by init_polar_decoder_ssc_c().
+ *
+ * \param[in] p A pointer to the desired decoder.
+ * \param[out] data The decoded message.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int polar_decoder_ssc_s(void* p, uint8_t* data);
+
+#endif // POLAR_DECODER_SSC_S_H
--- a/lib/src/phy/fec/polar/polar_decoder_vector.c
+++ b/lib/src/phy/fec/polar/polar_decoder_vector.c
@ -0,0 +1,216 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_vector.c
+ * \brief Definition of the polar decoder vectorizable functions.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include "math.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h> //abs function
+
+/*!
+ * Sign of a real number.
+ */
+static int sgn(float v)
+{
+  return (v > 0) - (v < 0);
+}
+
+/*!
+ *  Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
+ */
+
+#define hard_bit                                                                                                       \
+  {                                                                                                                    \
+    int s = 0;                                                                                                         \
+    for (uint16_t i = 0; i < len; ++i) {                                                                               \
+      s = sgn(x[i]);                                                                                                   \
+      if (s == 0) {                                                                                                    \
+        z[i] = 0;                                                                                                      \
+      } else {                                                                                                         \
+        z[i] = (uint8_t)(1 - s) / 2;                                                                                   \
+      }                                                                                                                \
+    }                                                                                                                  \
+  }
+
+void srslte_vec_function_f_fff(const float* x, const float* y, float* z, const uint16_t len)
+{
+  float L0      = NAN;
+  float L1      = NAN;
+  float absL0   = NAN;
+  float absL1   = NAN;
+  float sgnL0L1 = NAN;
+
+  for (int i = 0; i < len; i++) {
+    L0      = x[i];
+    L1      = y[i];
+    absL0   = fabsf(L0);
+    absL1   = fabsf(L1);
+    sgnL0L1 = sgn(L0) * sgn(L1);
+    if (absL0 >= absL1) {
+      L0 = sgnL0L1 * absL1;
+    } else {
+      L0 = sgnL0L1 * absL0;
+    }
+    z[i] = L0;
+  }
+}
+
+void srslte_vec_function_f_sss(const int16_t* x, const int16_t* y, int16_t* z, const uint16_t len)
+{
+
+  int16_t L0      = 0;
+  int16_t L1      = 0;
+  int16_t absL0   = 0;
+  int16_t absL1   = 0;
+  int16_t sgnL0L1 = 0;
+
+  for (int i = 0; i < len; i++) {
+    L0      = x[i];
+    L1      = y[i];
+    absL0   = abs(L0);
+    absL1   = abs(L1);
+    sgnL0L1 = sgn(L0) * sgn(L1);
+    if (absL0 >= absL1) {
+      L0 = sgnL0L1 * absL1;
+    } else {
+      L0 = sgnL0L1 * absL0;
+    }
+    z[i] = L0;
+  }
+}
+
+void srslte_vec_function_f_ccc(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
+{
+  int8_t L0      = 0;
+  int8_t L1      = 0;
+  int8_t absL0   = 0;
+  int8_t absL1   = 0;
+  int8_t sgnL0L1 = 0;
+
+  for (int i = 0; i < len; i++) {
+    L0      = x[i];
+    L1      = y[i];
+    absL0   = abs(L0);
+    absL1   = abs(L1);
+    sgnL0L1 = sgn(L0) * sgn(L1);
+    if (absL0 >= absL1) {
+      L0 = sgnL0L1 * absL1;
+    } else {
+      L0 = sgnL0L1 * absL0;
+    }
+    z[i] = L0;
+  }
+}
+
+void srslte_vec_hard_bit_fc(const float* x, uint8_t* z, const uint16_t len)
+{
+  hard_bit;
+}
+
+void srslte_vec_hard_bit_sc(const int16_t* x, uint8_t* z, const uint16_t len)
+{
+  hard_bit;
+}
+
+void srslte_vec_hard_bit_cc(const int8_t* x, uint8_t* z, const uint16_t len)
+{
+  hard_bit;
+}
+
+void srslte_vec_function_g_bfff(const uint8_t* b, const float* x, const float* y, float* z, const uint16_t len)
+{
+
+  float  L0 = NAN;
+  float  L1 = NAN;
+  int8_t V  = 0;
+
+  for (int i = 0; i < len; i++) {
+    L0   = x[i];
+    L1   = y[i];
+    V    = -2 * b[i] + 1; // (warning!) changes size from uint8_t to int8_t
+    L0   = L1 + V * L0;
+    z[i] = L0;
+  }
+}
+
+void srslte_vec_function_g_bsss(const uint8_t* b, const int16_t* x, const int16_t* y, int16_t* z, const uint16_t len)
+{
+
+  int16_t L0 = 0;
+  int16_t L1 = 0;
+  int8_t  V  = 0;
+
+  long tmp = 0;
+
+  for (int i = 0; i < len; i++) {
+    L0 = x[i];
+    L1 = y[i];
+    V  = -2 * b[i] + 1; // (warning!) changes size from uint8_t to int8_t
+
+    tmp = (long)L1 + V * L0;
+    if (tmp > 32767) {
+      tmp = 32767;
+    }
+    if (tmp < -32767) {
+      tmp = -32767;
+    }
+    L0 = (int16_t)tmp;
+
+    z[i] = L0;
+  }
+}
+
+void srslte_vec_function_g_bccc(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
+{
+
+  int8_t L0 = 0;
+  int8_t L1 = 0;
+  int8_t V  = 0;
+
+  long tmp = 0;
+
+  for (int i = 0; i < len; i++) {
+    L0 = x[i];
+    L1 = y[i];
+    V  = -2 * b[i] + 1; // (warning!) changes size from uint8_t to int8_t
+
+    tmp = (long)L1 + V * L0;
+    if (tmp > 127) {
+      tmp = 127;
+    }
+    if (tmp < -127) {
+      tmp = -127;
+    }
+    L0 = (int8_t)tmp;
+
+    z[i] = L0;
+  }
+}
--- a/lib/src/phy/fec/polar/polar_decoder_vector.h
+++ b/lib/src/phy/fec/polar/polar_decoder_vector.h
@ -0,0 +1,119 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_vector.h
+ * \brief Declaration of the polar decoder vectorizable functions.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_VECTOR_FUNCTIONS_H
+#define POLAR_VECTOR_FUNCTIONS_H
+#include "srslte/config.h"
+#include <stdint.h>
+
+/*!
+ * Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise (box-plus operator).
+ * \param[in] x A pointer to a vector of floats.
+ * \param[in] y A pointer to a vector of floats.
+ * \param[out] z A pointer to a vector of floats.
+ * \param[in] len Length of vectors x, y and z.
+ */
+SRSLTE_API void srslte_vec_function_f_fff(const float* x, const float* y, float* z, uint16_t len);
+
+/*!
+ * Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise (box-plus operator).
+ * \param[in] x A pointer to a vector of int16_t.
+ * \param[in] y A pointer to a vector of int16_t.
+ * \param[out] z A pointer to a vector of int16_t.
+ * \param[in] len Length of vectors x, y and z.
+ */
+SRSLTE_API void srslte_vec_function_f_sss(const int16_t* x, const int16_t* y, int16_t* z, uint16_t len);
+
+/*!
+ * Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise (box-plus operator).
+ * \param[in] x A pointer to a vector of int8_t.
+ * \param[in] y A pointer to a vector of int8_t.
+ * \param[out] z A pointer to a vector of int8_t.
+ * \param[in] len Length of vectors x, y and z.
+ */
+SRSLTE_API void srslte_vec_function_f_ccc(const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
+
+/*!
+ * Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$.
+ * \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
+ * \param[in] x A pointer to a vector of floats.
+ * \param[in] y A pointer to a vector of floats.
+ * \param[out] z A pointer to a vector of floats.
+ * \param[in] len Length of vectors b, x, y and z.
+ */
+SRSLTE_API void srslte_vec_function_g_bfff(const uint8_t* b, const float* x, const float* y, float* z, uint16_t len);
+
+/*!
+ * Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$.
+ * \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
+ * \param[in] x A pointer to a vector of int16_t.
+ * \param[in] y A pointer to a vector of int16_t.
+ * \param[out] z A pointer to a vector of int16_t.
+ * \param[in] len Length of vectors b, x, y and z.
+ */
+SRSLTE_API void
+srslte_vec_function_g_bsss(const uint8_t* b, const int16_t* x, const int16_t* y, int16_t* z, uint16_t len);
+
+/*!
+ * Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$.
+ * \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
+ * \param[in] x A pointer to a vector of int8_t.
+ * \param[in] y A pointer to a vector of int8_t.
+ * \param[out] z A pointer to a vector of int8_t.
+ * \param[in] len Length of vectors b, x, y and z.
+ */
+SRSLTE_API void srslte_vec_function_g_bccc(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
+
+/*!
+ * Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
+ * \param[in] x A pointer to a vector of floats.
+ * \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] len Length of vectors x and z.
+ */
+SRSLTE_API void srslte_vec_hard_bit_fc(const float* x, uint8_t* z, uint16_t len);
+
+/*!
+ * Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
+ * \param[in] x A pointer to a vector of int16_t.
+ * \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] len Length of vectors x and z.
+ */
+SRSLTE_API void srslte_vec_hard_bit_sc(const int16_t* x, uint8_t* z, uint16_t len);
+
+/*!
+ * Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$.
+ * \param[in] x A pointer to a vector of int8_t.
+ * \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] len Length of vectors x and z.
+ */
+SRSLTE_API void srslte_vec_hard_bit_cc(const int8_t* x, uint8_t* z, uint16_t len);
+
+#endif // POLAR_VECTOR_FUNCTIONS_H
--- a/lib/src/phy/fec/polar/polar_decoder_vector_avx2.c
+++ b/lib/src/phy/fec/polar/polar_decoder_vector_avx2.c
@ -0,0 +1,136 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_vector_avx2.c
+ * \brief Definition of the polar decoder vectorizable functions using AVX2 instructions.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#include "../utils_avx2.h"
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef LV_HAVE_AVX2
+
+#include <immintrin.h>
+
+/*!
+ * \brief Bit mask to extract the Most Significant Bit (MSB).
+ */
+#define MSB_MASK (-128) // 0b10000000
+
+// General remarks
+// We replace bits by {0, 128} (uint8_t) or {0, -128} (int8_t)
+
+void srslte_vec_function_f_ccc_avx2(const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
+{
+
+  for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
+    __m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
+    __m256i m_y = _mm256_loadu_si256((__m256i*)&y[i]);
+
+    __m256i m_sign            = _mm256_sign_epi8(m_x, m_y);
+    __m256i m_abs_x           = _mm256_abs_epi8(m_x);
+    __m256i m_abs_y           = _mm256_abs_epi8(m_y);
+    __m256i m_min_abs_x_abs_y = _mm256_min_epi8(m_abs_x, m_abs_y);
+    __m256i m_z               = _mm256_sign_epi8(m_min_abs_x_abs_y, m_sign);
+
+    _mm256_storeu_si256((__m256i*)&z[i], m_z);
+  }
+}
+
+void srslte_vec_function_g_bccc_avx2(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, const uint16_t len)
+{
+
+  const __m256i M_1      = _mm256_set1_epi8(1);
+  const __m256i M_NEG127 = _mm256_set1_epi8(-127);
+
+  for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
+
+    __m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
+    __m256i m_y = _mm256_loadu_si256((__m256i*)&y[i]);
+    __m256i m_b = _mm256_loadu_si256((__m256i*)&b[i]);
+
+    __m256i m_b_or_1 =
+        _mm256_or_si256(m_b, M_1); // avoids m_b being 0, in which case m_sign_x = 0 (in the next instruction)
+    __m256i m_sign_x = _mm256_sign_epi8(m_x, m_b_or_1);
+    __m256i m_z      = _mm256_adds_epi8(m_sign_x, m_y);
+    __m256i m_sz     = _mm256_max_epi8(M_NEG127, m_z);
+
+    _mm256_storeu_si256((__m256i*)&z[i], m_sz);
+  }
+}
+
+void srslte_vec_xor_bbb_avx2(const uint8_t* x, const uint8_t* y, uint8_t* z, uint16_t len)
+{
+
+  for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
+    __m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
+    __m256i m_y = _mm256_loadu_si256((__m256i*)&y[i]);
+
+    __m256i m_z = _mm256_xor_si256(m_x, m_y);
+
+    _mm256_storeu_si256((__m256i*)&z[i], m_z);
+  }
+}
+
+void srslte_vec_hard_bit_cc_avx2(const int8_t* x, uint8_t* z, const uint16_t len)
+{
+  const __m256i M_MSB_MASK = _mm256_set1_epi8(MSB_MASK);
+
+  for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
+    __m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
+
+    __m256i m_z = _mm256_and_si256(m_x, M_MSB_MASK);
+
+    _mm256_storeu_si256((__m256i*)&z[i], m_z);
+  }
+  // restore, by setting to 0, the memory positions between z + len and z + len + SRSLTE_AVX2_B_SIZE
+  memset(z + len, 0, SRSLTE_AVX2_B_SIZE);
+}
+
+void srslte_vec_sign_to_bit_c_avx2(uint8_t* x, uint16_t len)
+{
+  const __m256i M_NEG1 = _mm256_set1_epi8(-1);
+
+  int i = 0;
+  for (; i < len - SRSLTE_AVX2_B_SIZE + 1; i += SRSLTE_AVX2_B_SIZE) {
+    __m256i m_x = _mm256_loadu_si256((__m256i*)&x[i]);
+
+    __m256i m_abs_x = _mm256_sign_epi8(M_NEG1, m_x);
+
+    _mm256_storeu_si256((__m256i*)&x[i], m_abs_x);
+  }
+
+  // executed if code_size < 32, which is never the case in 5G
+  for (; i < len; i++) {
+    x[i] = x[i] >> 7U;
+  }
+}
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/polar/polar_decoder_vector_avx2.h
+++ b/lib/src/phy/fec/polar/polar_decoder_vector_avx2.h
@ -0,0 +1,89 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_decoder_vector_avx2.h
+ * \brief Declaration of the 8-bit AVX2 polar decoder vectorizable functions.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_VECTOR_FUNCTIONS_AVX2_H
+#define POLAR_VECTOR_FUNCTIONS_AVX2_H
+#include "../utils_avx2.h"
+#include "srslte/config.h"
+#include <stdint.h>
+
+/*!
+ * Transforms input uint8_t bits represented by {0, 128} to {0, 1} with AVX2 instructions,
+ * the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
+ * Specifically, the function returns 0 if x=0 and 1 if x<0, otherwise the output is not defined.
+ * \param[in, out] x A pointer to a vector of uint8_t.
+ * \param[in] len Length of vectors x, y and z.
+ */
+SRSLTE_API void srslte_vec_sign_to_bit_c_avx2(uint8_t* x, uint16_t len);
+
+/*!
+ * Computes \f$ z = sign(x) \times sign(y) \times \min(abs(x), abs(y)) \f$ elementwise
+ * (box-plus operator) with AVX2 instructions,
+ * the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
+ * \param[in] x A pointer to a vector of int8_t.
+ * \param[in] y A pointer to a vector of int8_t.
+ * \param[out] z A pointer to a vector of int8_t.
+ * \param[in] len Length of vectors x, y and z.
+ */
+SRSLTE_API void srslte_vec_function_f_ccc_avx2(const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
+
+/*!
+ * Returns \f$ z = x + y \f$ if \f$ (b = 1) \f$ and \f$ z= -x + y \f$ if \f$ (b = 0)\f$ with AVX2 instructions,
+ * the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
+ * \param[in] b A pointer to a vectors of uint8_t with 0's and 1's.
+ * \param[in] x A pointer to a vector of int8_t.
+ * \param[in] y A pointer to a vector of int8_t.
+ * \param[out] z A pointer to a vector of int8_t.
+ * \param[in] len Length of vectors b, x, y and z.
+ */
+SRSLTE_API void
+srslte_vec_function_g_bccc_avx2(const uint8_t* b, const int8_t* x, const int8_t* y, int8_t* z, uint16_t len);
+
+/*!
+ * Computes \f$ z = x \oplus y \f$ elementwise with AVX2 instructions,
+ * the output must have size larger than \ref SRSLTE_AVX2_B_SIZE.
+ * \param[in] x A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] y A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] len Length of vectors x, y and z.
+ */
+SRSLTE_API void srslte_vec_xor_bbb_avx2(const uint8_t* x, const uint8_t* y, uint8_t* z, uint16_t len);
+
+/*!
+ * Returns 1 if \f$ (x < 0) \f$ and 0 if \f$ (x >= 0) \f$ with AVX2 instructions,
+ * the output must have size larger that \ref SRSLTE_AVX2_B_SIZE.
+ * \param[in] x A pointer to a vector of int8_t.
+ * \param[out] z A pointer to a vector of uint8_t with 0's and 1's.
+ * \param[in] len Length of vectors x and z.
+ */
+SRSLTE_API void srslte_vec_hard_bit_cc_avx2(const int8_t* x, uint8_t* z, uint16_t len);
+
+#endif // POLAR_VECTOR_FUNCTIONS_H
--- a/lib/src/phy/fec/polar/polar_encoder.c
+++ b/lib/src/phy/fec/polar/polar_encoder.c
@ -0,0 +1,130 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_encoder.c
+ * \brief Definition of the polar encoder.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
+ *
+ */
+#include "srslte/phy/fec/polar/polar_encoder.h"
+#include "polar_encoder_avx2.h"
+#include "polar_encoder_pipelined.h"
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#ifdef LV_HAVE_AVX2
+
+/*! AVX2 polar encoder */
+static int encode_avx2(void* o, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
+{
+  srslte_polar_encoder_t* q = o;
+
+  polar_encoder_encode_avx2(q->ptr, input, output, code_size_log);
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the AVX2 encoder. */
+static void free_avx2(void* o)
+{
+  srslte_polar_encoder_t* q = o;
+  delete_polar_encoder_avx2(q->ptr);
+}
+
+/*! Initializes a polar encoder structure to use the AVX2 polar encoder algorithm*/
+static int init_avx2(srslte_polar_encoder_t* q, const uint8_t code_size_log)
+{
+  q->encode = encode_avx2;
+  q->free   = free_avx2;
+  if ((q->ptr = create_polar_encoder_avx2(code_size_log)) == NULL) {
+    free_avx2(q);
+    return -1;
+  }
+  return 0;
+}
+#endif // LV_HAVE_AVX2
+
+/*! Pipelined polar encoder */
+static int encode_pipelined(void* o, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
+{
+  srslte_polar_encoder_t* q = o;
+
+  polar_encoder_encode_pipelined(q->ptr, input, output, code_size_log);
+  return 0;
+}
+
+/*! Carries out the actual destruction of the memory allocated to the pipelined encoder. */
+static void free_pipelined(void* o)
+{
+  srslte_polar_encoder_t* q = o;
+  delete_polar_encoder_pipelined(q->ptr);
+}
+
+/*! Initializes a polar encoder structure to use the pipeline polar encoder algorithm*/
+static int init_pipelined(srslte_polar_encoder_t* q, const uint8_t code_size_log)
+{
+  q->encode = encode_pipelined;
+  q->free   = free_pipelined;
+  if ((q->ptr = create_polar_encoder_pipelined(code_size_log)) == NULL) {
+    free_pipelined(q);
+    return -1;
+  }
+  return 0;
+}
+
+int srslte_polar_encoder_init(srslte_polar_encoder_t* q, srslte_polar_encoder_type_t type, const uint8_t code_size_log)
+{
+  switch (type) { // NOLINT
+    case SRSLTE_POLAR_ENCODER_PIPELINED:
+      return init_pipelined(q, code_size_log);
+#ifdef LV_HAVE_AVX2
+    case SRSLTE_POLAR_ENCODER_AVX2:
+      return init_avx2(q, code_size_log);
+#endif // LV_HAVE_AVX2
+    default:
+      return -1;
+  }
+  return 0;
+}
+
+void srslte_polar_encoder_free(srslte_polar_encoder_t* q)
+{
+  if (q->free) {
+    q->free(q);
+  }
+  memset(q, 0, sizeof(srslte_polar_encoder_t));
+}
+
+int srslte_polar_encoder_encode(srslte_polar_encoder_t* q,
+                                const uint8_t*          input,
+                                uint8_t*                output,
+                                const uint8_t           code_size_log)
+{
+  return q->encode(q, input, output, code_size_log);
+}
--- a/lib/src/phy/fec/polar/polar_encoder_avx2.c
+++ b/lib/src/phy/fec/polar/polar_encoder_avx2.c
@ -0,0 +1,200 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_encoder_avx2.c
+ * \brief Definition of the AVX2 polar encoder.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
+ *
+ */
+
+#include "../utils_avx2.h"
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#ifdef LV_HAVE_AVX2
+
+#include <emmintrin.h>
+#include <immintrin.h>
+#include <tmmintrin.h>
+
+/*!
+ * \brief Describes an AVX2 polar encoder.
+ */
+struct pAVX2 {
+  uint8_t code_size_log; /*!< \brief The \f$ log_2\f$ of the maximum supported number of bits of the encoder
+                            input/output vector. */
+  uint8_t* tmp;          /*!< \brief Pointer to a temporary buffer. */
+};
+
+void delete_polar_encoder_avx2(void* o)
+{
+  struct pAVX2* q = o;
+
+  if (q->tmp) {
+    free(q->tmp);
+  }
+  free(q);
+}
+
+void* create_polar_encoder_avx2(const uint8_t code_size_log)
+{
+  struct pAVX2* q = NULL; // pointer to the polar encoder instance
+
+  // allocate memory to the polar decoder instance
+  if ((q = malloc(sizeof(struct pAVX2))) == NULL) {
+    return NULL;
+  }
+
+  uint16_t code_size = 1U << code_size_log;
+
+  if (code_size_log > SRSLTE_AVX2_B_SIZE_LOG) {
+    q->tmp = malloc(code_size * sizeof(uint8_t));
+  } else {
+    q->tmp = malloc(SRSLTE_AVX2_B_SIZE * sizeof(uint8_t));
+  }
+  if (!q->tmp) {
+    free(q);
+    perror("malloc");
+    return NULL;
+  }
+
+  q->code_size_log = code_size_log;
+
+  return q;
+}
+
+/*!
+ * Runs, in parallel, \f$ 2^{5-stage}\f$ polar encoders of size \f$ 2^{stage} \f$ each for s=1 to 5.
+ */
+static inline void srslte_vec_polar_encoder_32_avx2(const uint8_t* x, uint8_t* z, uint8_t stage)
+{
+  const __m256i MZERO = _mm256_set1_epi8(0);
+
+  __m256i simd_x = _mm256_loadu_si256((__m256i*)x);
+  __m256i simd_y;
+  switch (stage) {
+    case 5:
+      // in 0x21, the  2 takes zeros, and the 1 takes the second half of simd_x
+      simd_y = _mm256_permute2x128_si256(simd_x, MZERO, 0x21);
+      simd_x = _mm256_xor_si256(simd_x, simd_y);
+    case 4:
+      simd_y = _mm256_srli_si256(simd_x, 8); // move each half 8-bytes= 64
+      simd_x = _mm256_xor_si256(simd_x, simd_y);
+    case 3: // stage 3
+      simd_y = _mm256_srli_epi64(simd_x, 32);
+      simd_x = _mm256_xor_si256(simd_x, simd_y);
+    case 2: // stage 2
+      simd_y = _mm256_srli_epi32(simd_x, 16);
+      simd_x = _mm256_xor_si256(simd_x, simd_y);
+    case 1: // stage 1
+      simd_y = _mm256_srli_epi16(simd_x, 8);
+      simd_x = _mm256_xor_si256(simd_x, simd_y);
+      _mm256_storeu_si256((__m256i*)z, simd_x);
+      break;
+    default:
+      printf("Wrong stage = %d\n", stage);
+  }
+}
+
+/*!
+ * Computes \f$ z = x \oplus y \f$ elementwise with AVX2 instructions.
+ */
+static inline void srslte_vec_xor_bbb_avx2(const uint8_t* x, const uint8_t* y, uint8_t* z, uint16_t len)
+{
+
+  for (int i = 0; i < len; i += SRSLTE_AVX2_B_SIZE) {
+    __m256i simd_x = _mm256_loadu_si256((__m256i*)&x[i]);
+    __m256i simd_y = _mm256_loadu_si256((__m256i*)&y[i]);
+
+    __m256i simd_z = _mm256_xor_si256(simd_x, simd_y);
+
+    _mm256_storeu_si256((__m256i*)&z[i], simd_z);
+  }
+}
+
+int polar_encoder_encode_avx2(void* p, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
+{
+
+  struct pAVX2* q = p;
+
+  uint8_t* tmp = q->tmp;
+
+  uint8_t* x = NULL;
+  uint8_t* y = NULL;
+  uint8_t* z = NULL;
+
+  if (q == NULL) {
+    return -1;
+  }
+
+  // load data
+  uint32_t code_size = 1U << code_size_log;
+
+  memcpy(tmp, input, code_size * sizeof(uint8_t));
+
+  if (code_size_log > q->code_size_log) {
+    printf("ERROR: max code size log %d, current code size log %d.\n", q->code_size_log, code_size_log);
+    return -1;
+  }
+
+  uint32_t code_size_stage      = 0;
+  uint32_t code_half_size_stage = 0;
+  uint32_t num_blocks           = 0;
+  uint32_t s                    = code_size_log;
+  for (; s > SRSLTE_AVX2_B_SIZE_LOG; s--) {
+    code_size_stage      = 1U << s;
+    code_half_size_stage = 1U << (s - 1);
+    num_blocks           = 1U << (code_size_log - s);
+
+    for (uint32_t b = 0; b < num_blocks; b++) {
+      x = &tmp[b * code_size_stage];
+      y = x + code_half_size_stage;
+      z = x;
+      srslte_vec_xor_bbb_avx2(x, y, z, code_half_size_stage);
+    }
+  }
+
+  uint32_t num_simd_size_blocks = 1;
+  if (code_size_log > SRSLTE_AVX2_B_SIZE_LOG) {
+    num_simd_size_blocks = 1U << (code_size_log - SRSLTE_AVX2_B_SIZE_LOG);
+  }
+
+  for (uint32_t b = 0; b < num_simd_size_blocks; b++) {
+    x = &tmp[b * SRSLTE_AVX2_B_SIZE];
+    z = x;
+    srslte_vec_polar_encoder_32_avx2(x, z, s);
+  }
+
+  memcpy(output, tmp, code_size * sizeof(uint8_t));
+
+  return 0;
+}
+
+#endif // LV_HAVE_AVX2
--- a/lib/src/phy/fec/polar/polar_encoder_avx2.h
+++ b/lib/src/phy/fec/polar/polar_encoder_avx2.h
@ -0,0 +1,62 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_encoder_avx2.h
+ * \brief Declaration of the AVX2 polar encoder.
+ * \author Jesus Gomez (CTTC) \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_ENCODER_AVX2_H
+#define POLAR_ENCODER_AVX2_H
+
+#include <stdint.h>
+
+/*!
+ * The AVX2 polar encoder "destructor": it frees all the resources allocated to the encoder.
+ *
+ * \param[in, out] p A pointer to the dismantled encoder.
+ */
+void delete_polar_encoder_avx2(void* p);
+
+/*!
+ * Encodes the input vector into a codeword with the specified polar encoder.
+ * \param[in] p A void pointer used to declare a AVX2 polar encoder structure.
+ * \param[in] input The encoder input vector.
+ * \param[out] output The encoder output vector.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the encoder input/output vector.
+ * It can less or equal to the maximum code_size_log specified in q.code_size_log of the srslte_polar_encoder_t
+ * structure \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int polar_encoder_encode_avx2(void* p, const uint8_t* input, uint8_t* output, uint8_t code_size_log);
+
+/*!
+ * Creates an AVX2 polar encoder structure of type pAVX2, and allocates memory for the encoding buffers.
+ *
+ * \param[in] code_size_log \f$log_2\f$ of the number of bits in the codeword.
+ * \return A pointer to a pAVX2 structure if the function executes correctly, NULL otherwise.
+ */
+void* create_polar_encoder_avx2(uint8_t code_size_log);
+
+#endif // POLAR_ENCODER_AVX2_H
--- a/lib/src/phy/fec/polar/polar_encoder_pipelined.c
+++ b/lib/src/phy/fec/polar/polar_encoder_pipelined.c
@ -0,0 +1,160 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_encoder_pipelined.c
+ * \brief Definition of the pipelined polar encoder.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * The pipelined polar encoder is described in
+ * Erdal Arikan, "Polar code: A pipelined implementation" presented at "4th International Symposium on Broadband
+ * Communication (ISBC 2010) July 11-14, 2010, Melaka, Malaysia"
+ * 5G uses a polar encoder with maximum sizes \f$2^n\f$ with \f$n = 5,...,10\f$.
+ *
+ */
+
+#include "srslte/phy/fec/polar/polar_encoder.h"
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+
+/*!
+ * \brief Describes an PIPELINED polar encoder.
+ */
+struct pPIPELINED {
+  uint16_t code_size;       /*!< \brief Number of bits of the encoder input/output vector. */
+  uint8_t  code_size_log;   /*!< \brief The \f$ log_2\f$ of the maximum supported number of bits of the encoder
+                               input/output vector. */
+  uint16_t  code_half_size; /*!< \brief Half of the number of bits of the encoder input/output vector. */
+  uint16_t* i_even;         /*!< \brief Pointer to the even positions of the encoder input/output vector. */
+  uint16_t* i_odd;          /*!< \brief Pointer to the odd positions of the encoder input/output vector. */
+  uint8_t*  tmp;            /*!< \brief Pointer to a temporary buffer. */
+};
+
+void delete_polar_encoder_pipelined(void* o)
+{
+  struct pPIPELINED* q = o;
+  if (q->i_even) {
+    free(q->i_even);
+  }
+  if (q->i_odd) {
+    free(q->i_odd);
+  }
+  if (q->tmp) {
+    free(q->tmp);
+  }
+  free(q);
+}
+
+void* create_polar_encoder_pipelined(const uint8_t code_size_log)
+{
+  struct pPIPELINED* q = NULL; // pointer to the polar encoder instance
+
+  // allocate memory to the polar decoder instance
+  if ((q = malloc(sizeof(struct pPIPELINED))) == NULL) {
+    return NULL;
+  }
+
+  uint16_t code_size      = 1U << code_size_log;
+  uint16_t code_half_size = code_size / 2;
+
+  q->i_odd = malloc(code_half_size * sizeof(uint16_t));
+  if (!q->i_odd) {
+    free(q);
+    perror("malloc");
+    return NULL;
+  }
+
+  q->i_even = malloc(code_half_size * sizeof(uint16_t));
+  if (!q->i_even) {
+    free(q->i_odd);
+    free(q);
+    perror("malloc");
+    return NULL;
+  }
+
+  q->tmp = malloc(code_size * sizeof(uint8_t));
+  if (!q->tmp) {
+    free(q->i_even);
+    free(q->i_odd);
+    free(q);
+    perror("malloc");
+    return NULL;
+  }
+
+  for (uint16_t i = 0; i < code_size / 2; i++) {
+    q->i_even[i] = 2 * i;
+    q->i_odd[i]  = 2 * i + 1;
+  }
+
+  q->code_size      = code_size;
+  q->code_size_log  = code_size_log;
+  q->code_half_size = code_half_size;
+
+  return q;
+}
+
+int polar_encoder_encode_pipelined(void* p, const uint8_t* input, uint8_t* output, const uint8_t code_size_log)
+{
+
+  struct pPIPELINED* q = p;
+
+  if (q == NULL) {
+    return -1;
+  }
+
+  // first stage also initializes output vector
+  uint16_t code_half_size = 1U << (code_size_log - 1U);
+  if (code_half_size > q->code_half_size) {
+    printf("ERROR: max code size %d, current code size %d.\n", 2 * q->code_half_size, 2 * code_half_size);
+    return -1;
+  }
+
+  for (uint16_t j = 0; j < code_half_size; j++) {
+    q->tmp[j]                  = input[q->i_even[j]];
+    q->tmp[j + code_half_size] = input[q->i_odd[j]];
+  }
+
+  for (uint16_t j = 0; j < code_half_size; j++) {
+    output[q->i_odd[j]]  = q->tmp[q->i_odd[j]];
+    output[q->i_even[j]] = q->tmp[q->i_even[j]] ^ q->tmp[q->i_odd[j]]; // bitXor
+  }
+
+  // remaining stages
+  for (uint16_t i = 1; i < code_size_log; i++) {
+
+    for (uint16_t j = 0; j < code_half_size; j++) {
+      q->tmp[j]                  = output[q->i_even[j]];
+      q->tmp[j + code_half_size] = output[q->i_odd[j]];
+    }
+
+    for (uint16_t j = 0; j < code_half_size; j++) {
+      output[q->i_odd[j]]  = q->tmp[q->i_odd[j]];
+      output[q->i_even[j]] = q->tmp[q->i_even[j]] ^ q->tmp[q->i_odd[j]]; // bitXor
+    }
+  }
+
+  return 0;
+}
--- a/lib/src/phy/fec/polar/polar_encoder_pipelined.h
+++ b/lib/src/phy/fec/polar/polar_encoder_pipelined.h
@ -0,0 +1,62 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_encoder_pipelined.h
+ * \brief Declaration of the pipelined polar encoder.
+ * \author Jesus Gomez (CTTC) \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ */
+
+#ifndef POLAR_ENCODER_PIPELINED_H
+#define POLAR_ENCODER_PIPELINED_H
+
+#include <stdint.h>
+
+/*!
+ * The pipelined polar encoder "destructor": it frees all the resources allocated to the encoder.
+ *
+ * \param[in, out] p A pointer to the dismantled encoder.
+ */
+void delete_polar_encoder_pipelined(void* p);
+
+/*!
+ * Encodes the input vector into a codeword with the specified polar encoder.
+ * \param[in] p A void pointer used to declare a pPIPELINED structure.
+ * \param[in] input The encoder input vector.
+ * \param[out] output The encoder output vector.
+ * \param[in] code_size_log The \f$\log_2\f$ of the number of bits of the encoder input/output vector.
+ * It can less or equal to the maximum code_size_log specified in q.code_size_log of the srslte_polar_encoder_t
+ * structure \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int polar_encoder_encode_pipelined(void* p, const uint8_t* input, uint8_t* output, uint8_t code_size_log);
+
+/*!
+ * Creates a pipelined polar encoder structure of type pPIPELINED, and allocates memory for the encoding buffers.
+ *
+ * \param[in] code_size_log \f$\log_2\f$ of the number of bits in the codeword.
+ * \return A pointer to a pPIPELINED structure if the function executes correctly, NULL otherwise.
+ */
+void* create_polar_encoder_pipelined(uint8_t code_size_log);
+
+#endif // POLAR_ENCODER_PIPELINED_H
--- a/lib/src/phy/fec/polar/test/CMakeLists.txt
+++ b/lib/src/phy/fec/polar/test/CMakeLists.txt
@ -0,0 +1,71 @@
+#
+# Project: 5GCoding-SRS
+# Author: Jesus Gomez (CTTC)
+# Copyright: Software Radio Systems Limited
+#
+
+add_library(polar_test_utils polar_sets.c subchannel_allocation.c)
+
+add_executable(polar_chain_test polar_chain_test.c)
+
+target_link_libraries(polar_chain_test srslte_phy polar_test_utils)
+
+set_target_properties(polar_chain_test
+        PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/tests/polar"
+        )
+
+file(GLOB FROZEN_SETS
+        "frozensets/*.bin"
+        )
+set(OUT_FROZEN_SETS ${FROZEN_SETS})
+list(TRANSFORM OUT_FROZEN_SETS REPLACE "${CMAKE_CURRENT_SOURCE_DIR}/frozensets/" "")
+list(TRANSFORM OUT_FROZEN_SETS PREPEND "${PROJECT_SOURCE_DIR}/tests/polar/frozensets/")
+
+add_custom_command(
+        OUTPUT ${OUT_FROZEN_SETS}
+        COMMAND cp -r frozensets "${PROJECT_SOURCE_DIR}/tests/polar"
+        DEPENDS ${FROZEN_SETS}
+        WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+        COMMENT "Copying frozen set files"
+        VERBATIM
+)
+
+add_custom_target(polar_frozen_sets
+        DEPENDS ${OUT_FROZEN_SETS}
+        )
+
+add_dependencies(polar_chain_test polar_frozen_sets)
+
+### Test polar libs
+function(polar_unit_tests)
+    set(S ${ARGV0})  #101 means no noise, 100 scan
+    set(listC 5   6  6  6   7   7   8   8   9   9   10)
+    set(listR 32 64 64 64 128 128 256 256 512 864 1024)
+    set(listM 31 31 36 63  36  64  36 128 256  56  512)
+    set(listP  0  0  0  0   0   0   0   0   0   0    0)
+    set(listW  0  0  0  0   0   0   0   0   0   0    0)
+    list(LENGTH listC len)
+    math(EXPR lenr "${len} - 1")
+    foreach(num RANGE ${lenr})
+        list(GET listC ${num} cval)
+        list(GET listR ${num} rval)
+        list(GET listM ${num} mval)
+        list(GET listP ${num} pval)
+        list(GET listW ${num} wval)
+        add_test(NAME ${test_name}-s${S}-c${cval}-r${rval}-m${mval}-p${pval}-w${wval}
+                COMMAND ${test_command} -s${S} -c${cval} -r${rval} -m${mval} -p${pval} -w${wval}
+                WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/polar
+                )
+    endforeach()
+endfunction()
+
+# Unit tests
+set(test_name POLAR-UNIT-TEST)
+set(test_command polar_chain_test)
+polar_unit_tests(101)
+
+# WER (performance) tests
+# For these tests, run ctest --verbose
+set(test_name POLAR-PERF-TEST)
+set(test_command polar_chain_test)
+polar_unit_tests(-3)
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_1024_1024_512_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_1024_1024_512_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_128_128_36_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_128_128_36_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_128_128_64_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_128_128_64_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_256_256_128_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_256_256_128_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_256_256_36_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_256_256_36_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_32_32_16_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_32_32_16_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_32_32_31_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_32_32_31_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_512_512_256_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_512_512_256_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_512_512_36_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_512_512_36_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_512_864_56_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_512_864_56_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_64_64_31_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_64_64_31_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_64_64_36_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_64_64_36_0_0.bin
--- a/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_64_64_63_0_0.bin
+++ b/lib/src/phy/fec/polar/test/frozensets/polar_code_sets_64_64_63_0_0.bin
--- a/lib/src/phy/fec/polar/test/polar_chain_test.c
+++ b/lib/src/phy/fec/polar/test/polar_chain_test.c
@ -0,0 +1,803 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_chain_test.c
+ * \brief Throughput and WER tests for the polar encoder/decoder.
+ *
+ * Synopsis: **polar_test [options]**
+ *
+ * Options:
+ *
+ *  - <b>-c \<number\></b> \f$log_2\f$ of the codeword length [Default 8]
+ *
+ *  - <b>-r \<number\></b> Rate matching size [Default 256]
+ *
+ *  - <b>-m \<number\></b> Message size [Default 128]
+ *
+ *  - <b>-p \<number\></b> Parity-set size [Default 0]
+ *
+ *  - <b>-w \<number\></b> nWmPC [Default 0]
+ *
+ *  - <b>-s \<number\></b>  SNR [dB, Default 3.00 dB] -- Use 100 for scan, and 101 for noiseless
+ *
+ *  - <b>-o \<number\></b>  Print output results [Default 0] -- Use 0 for detailed, Use 1 for 1 line, Use 2 for vector
+ * form
+ *
+ * It (1) generates a random set of bits (data); (2) passes the data bits
+ * through the subchannel allocation block where the input vector to the
+ * encoder is generated; (3) encodes the input vector; (4) adds Gaussian channel noise
+ * (optional); (5) passes the decoder output through the subchannel
+ * deallocation block where data bits are extracted; (6) compares the decoded
+ * bits with the original data bits and measures the throughput (in bit / s).
+ *
+ * The message, frozen and parity bit sets corresponding to the input
+ * parameters -c, -r, -m, -p, -w must be available in the subfolder \a
+ * frozensets of the execution directory.
+ * These sets are stored in files with the following name convention:
+ * >  polar_code_<code_size>_<rate_matching_size>_<message_size>_<parity_set_size>_<wmPC>.bin
+ *
+ * See \ref polar for futher details.
+ *
+ */
+
+#include "math.h"
+
+#include "srslte/phy/channel/ch_awgn.h"
+#include "srslte/phy/common/timestamp.h"
+#include "srslte/phy/utils/bit.h"
+#include "srslte/phy/utils/debug.h"
+#include "srslte/phy/utils/phy_logger.h"
+#include "srslte/phy/utils/random.h"
+#include "srslte/phy/utils/vector.h" // srslte_convert_dB_to_amplitude
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+// cttc utils lib
+#include "srslte/phy/utils/vector.h"
+
+//  polar libs
+#include "polar_sets.h"
+#include "srslte/phy/fec/polar/polar_decoder.h"
+#include "srslte/phy/fec/polar/polar_encoder.h"
+#include "subchannel_allocation.h"
+
+#define SNR_POINTS 10  /*!< \brief Number of SNR evaluation points.*/
+#define SNR_MIN (-2.0) /*!< \brief Min SNR [dB].*/
+#define SNR_MAX 8.0    /*!< \brief Max SNR [dB].*/
+
+#define BATCH_SIZE 100    /*!< \brief Number of codewords in a batch. */
+#define MAX_N_BATCH 10000 /*!< \brief Max number of simulated batches. */
+#define REQ_ERRORS 100    /*!< \brief Minimum number of errors for a significant simulation. */
+
+// default values
+uint8_t  code_size_log      = 8;   /*!< \brief \f$log_2\f$ of code size. */
+uint16_t message_size       = 128; /*!< \brief Number of message bits (data and CRC). */
+uint16_t rate_matching_size = 256; /*!< \brief Number of bits of the codeword after rate matching. */
+uint8_t  parity_set_size    = 0;   /*!< \brief Number of parity bits. */
+uint8_t  nWmPC              = 0;   /*!< \brief Number of parity bits of minimum weight type. */
+double   snr_db             = 3;   /*!< \brief SNR in dB (101 for no noise, 100 for scan). */
+int      print_output       = 0;   /*!< \brief print output form (0 for detailed, 1 for 1 line, 2 for vector). */
+
+/*!
+ * \brief Prints test help when a wrong parameter is passed as input.
+ */
+void usage(char* prog)
+{
+  printf("Usage: %s [-cX] [-rX] [-mX] [-pX] [-wX] [-sX]\n", prog);
+  printf("\t-c log2 of the codeword length [Default %d]\n", code_size_log);
+  printf("\t-r Rate matching size [Default %d]\n", rate_matching_size);
+  printf("\t-m Message size [Default %d]\n", message_size);
+  printf("\t-p Parity-set size [Default %d]\n", parity_set_size);
+  printf("\t-w nWmPC [Default %d]\n", nWmPC);
+  printf("\t-s SNR [dB, Default %.2f dB] -- Use 100 for scan, and 101 for noiseless\n", snr_db);
+  printf("\t-o Print output results [Default %d] -- Use 0 for detailed, Use 1 for 1 line, Use 2 for vector form\n",
+         print_output);
+}
+
+/*!
+ * \brief Parses the input line.
+ */
+void parse_args(int argc, char** argv)
+{
+  int opt = 0;
+  while ((opt = getopt(argc, argv, "c:r:m:p:w:e:s:t:o:")) != -1) {
+    switch (opt) {
+      case 'c':
+        code_size_log = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'r':
+        rate_matching_size = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'm':
+        message_size = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'p':
+        parity_set_size = (int)strtol(optarg, NULL, 10);
+        break;
+      case 'w':
+        nWmPC = (int)strtol(optarg, NULL, 10);
+        break;
+      case 's':
+        snr_db = strtof(optarg, NULL);
+        break;
+      case 'o':
+        print_output = strtol(optarg, NULL, 10);
+        break;
+      default:
+        usage(argv[0]);
+        exit(-1);
+    }
+  }
+}
+
+/*!
+ * \brief Main function.
+ */
+int main(int argc, char** argv)
+{
+  uint8_t* data_tx        = NULL;
+  uint8_t* data_rx        = NULL;
+  uint8_t* data_rx_s      = NULL;
+  uint8_t* data_rx_c      = NULL;
+  uint8_t* data_rx_c_avx2 = NULL;
+
+  uint8_t* input_enc       = NULL; // input encoder
+  uint8_t* output_enc      = NULL; // output encoder
+  uint8_t* output_enc_avx2 = NULL; // output encoder
+
+  float*   llr        = NULL; // input decoder
+  int16_t* llr_s      = NULL; // input decoder
+  int8_t*  llr_c      = NULL; // input decoder
+  int8_t*  llr_c_avx2 = NULL; // input decoder
+
+  uint8_t* output_dec        = NULL; // output decoder
+  uint8_t* output_dec_s      = NULL; // output decoder
+  uint8_t* output_dec_c      = NULL; // output decoder
+  uint8_t* output_dec_c_avx2 = NULL; // output decoder
+
+  double var[SNR_POINTS + 1];
+
+  double snr_db_vec[SNR_POINTS + 1];
+  int    i = 0;
+
+  int j          = 0;
+  int snr_points = 0;
+
+  int errors_symb        = 0;
+  int errors_symb_s      = 0;
+  int errors_symb_c      = 0;
+  int errors_symb_c_avx2 = 0;
+
+  int n_error_words[SNR_POINTS + 1];
+  int n_error_words_s[SNR_POINTS + 1];
+  int n_error_words_c[SNR_POINTS + 1];
+  int n_error_words_c_avx2[SNR_POINTS + 1];
+
+  int last_i_batch[SNR_POINTS + 1];
+
+  struct timeval t[3];
+  double         elapsed_time_dec[SNR_POINTS + 1];
+  double         elapsed_time_dec_s[SNR_POINTS + 1];
+  double         elapsed_time_dec_c[SNR_POINTS + 1];
+  double         elapsed_time_dec_c_avx2[SNR_POINTS + 1];
+
+  double elapsed_time_enc[SNR_POINTS + 1];
+  double elapsed_time_enc_avx2[SNR_POINTS + 1];
+
+  // 16-bit quantizer
+  int16_t inf16       = (1U << 15U) - 1;
+  int8_t  inf8        = (1U << 7U) - 1;
+  float   gain_s      = NAN;
+  float   gain_c      = NAN;
+  float   gain_c_avx2 = NAN;
+
+  srslte_polar_sets_t    sets;
+  srslte_subchn_alloc_t  subch;
+  srslte_polar_encoder_t enc;
+  srslte_polar_decoder_t dec;
+  srslte_polar_decoder_t dec_s; // 16-bit
+  srslte_polar_decoder_t dec_c; // 8-bit
+#ifdef LV_HAVE_AVX2
+  srslte_polar_encoder_t enc_avx2;
+  srslte_polar_decoder_t dec_c_avx2; // 8-bit
+#endif                               // LV_HAVE_AVX2
+
+  parse_args(argc, argv);
+
+  uint16_t code_size = 1U << code_size_log;
+
+  printf("Test POLAR chain:\n");
+  printf("  Final code bits    -> E  = %d\n", rate_matching_size);
+  printf("  Code bits          -> N  = %d\n", code_size);
+  printf("  CRC + Data bits    -> K  = %d\n", message_size);
+  printf("  Parity Check bits  -> PC = %d \n", parity_set_size);
+  printf("  Code rate          -> (K + PC)/N = (%d + %d)/%d = %.2f\n",
+         message_size,
+         parity_set_size,
+         code_size,
+         (double)(message_size + parity_set_size) / code_size);
+
+  // read polar index sets from a file
+  srslte_polar_code_sets_read(&sets, message_size, code_size_log, rate_matching_size, parity_set_size, nWmPC);
+
+  // subchannel allocation
+  srslte_subchannel_allocation_init(&subch, code_size_log, message_size, sets.message_set);
+
+  // initialize encoder pipeline
+  srslte_polar_encoder_init(&enc, SRSLTE_POLAR_ENCODER_PIPELINED, code_size_log);
+
+  // initialize a POLAR decoder (float)
+  srslte_polar_decoder_init(&dec, SRSLTE_POLAR_DECODER_SSC_F, code_size_log, sets.frozen_set, sets.frozen_set_size);
+
+  // initialize a POLAR decoder (16 bit)
+  srslte_polar_decoder_init(&dec_s, SRSLTE_POLAR_DECODER_SSC_S, code_size_log, sets.frozen_set, sets.frozen_set_size);
+
+  // initialize a POLAR decoder (8 bit)
+  srslte_polar_decoder_init(&dec_c, SRSLTE_POLAR_DECODER_SSC_C, code_size_log, sets.frozen_set, sets.frozen_set_size);
+
+#ifdef LV_HAVE_AVX2
+
+  // initialize encoder  avx2
+  srslte_polar_encoder_init(&enc_avx2, SRSLTE_POLAR_ENCODER_AVX2, code_size_log);
+
+  // initialize a POLAR decoder (8 bit, avx2)
+  srslte_polar_decoder_init(
+      &dec_c_avx2, SRSLTE_POLAR_DECODER_SSC_C_AVX2, code_size_log, sets.frozen_set, sets.frozen_set_size);
+#endif // LV_HAVE_AVX2
+
+#ifdef DATA_ALL_ONES
+#else
+  srslte_random_t random_gen = srslte_random_init(0);
+#endif
+
+  data_tx        = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
+  data_rx        = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
+  data_rx_s      = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
+  data_rx_c      = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
+  data_rx_c_avx2 = srslte_vec_u8_malloc(message_size * BATCH_SIZE);
+
+  input_enc       = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
+  output_enc      = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
+  output_enc_avx2 = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
+
+  llr        = srslte_vec_f_malloc(code_size * BATCH_SIZE);
+  llr_s      = srslte_vec_i16_malloc(code_size * BATCH_SIZE);
+  llr_c      = srslte_vec_i8_malloc(code_size * BATCH_SIZE);
+  llr_c_avx2 = srslte_vec_i8_malloc(code_size * BATCH_SIZE);
+
+  output_dec        = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
+  output_dec_s      = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
+  output_dec_c      = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
+  output_dec_c_avx2 = srslte_vec_u8_malloc(code_size * BATCH_SIZE);
+
+  if (!data_tx || !data_rx || !data_rx_s || !data_rx_c || !data_rx_c_avx2 || !input_enc || !output_enc ||
+      !output_enc_avx2 || !llr || !llr_s || !llr_c || !llr_c_avx2 || !output_dec || !output_dec_s || !output_dec_c ||
+      !output_dec_c_avx2) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  // if snr_db = 100 compute a rage from SNR_MIN to SNR_MAX with SNR_POINTS
+  // else use the specified SNR.
+  double snr_inc = NAN;
+
+  snr_inc = (SNR_MAX - SNR_MIN) / SNR_POINTS;
+
+  if (snr_db == 100.0) {
+    snr_points = SNR_POINTS;
+    for (int32_t i = 0; i < snr_points; i++) {
+      snr_db        = SNR_MIN + i * snr_inc;
+      snr_db_vec[i] = snr_db;
+      var[i]        = srslte_convert_dB_to_amplitude(-snr_db);
+    }
+    snr_db_vec[snr_points] = 101; // include the no noise case
+    snr_points++;
+  } else {
+    snr_db_vec[0] = snr_db;
+    var[0]        = srslte_convert_dB_to_amplitude(-snr_db);
+    snr_points    = 1;
+  }
+
+  if (snr_db == 100) { // scan
+    printf("  SNR_MIN = %f, SNR_INC = %f, SNR_MAX = %f, snr_points: %d\n",
+           SNR_MIN,
+           snr_inc,
+           SNR_MIN + snr_inc * snr_points,
+           snr_points);
+  }
+
+  for (int32_t i_snr = 0; i_snr < snr_points; i_snr++) {
+    if (snr_db_vec[i_snr] == 101) {
+      printf("\n  Signal-to-Noise Ratio -> infinite\n");
+    } else {
+      printf("\n  Signal-to-Noise Ratio -> %.1f dB\n", snr_db_vec[i_snr]);
+    }
+
+    elapsed_time_enc[i_snr]        = 0;
+    elapsed_time_enc_avx2[i_snr]   = 0;
+    elapsed_time_dec[i_snr]        = 0;
+    elapsed_time_dec_s[i_snr]      = 0;
+    elapsed_time_dec_c[i_snr]      = 0;
+    elapsed_time_dec_c_avx2[i_snr] = 0;
+
+    n_error_words[i_snr]        = 0;
+    n_error_words_s[i_snr]      = 0;
+    n_error_words_c[i_snr]      = 0;
+    n_error_words_c_avx2[i_snr] = 0;
+
+    int i_batch = 0;
+    printf("\nBatch:\n  ");
+
+    int req_errors  = 0;
+    int max_n_batch = 0;
+    if (snr_db_vec[i_snr] == 101) {
+      req_errors  = 1;
+      max_n_batch = 1;
+    } else {
+      req_errors  = REQ_ERRORS;
+      max_n_batch = MAX_N_BATCH;
+    }
+
+    while ((n_error_words[i_snr] < req_errors) && (i_batch < max_n_batch)) {
+      i_batch++;
+
+      if (!(i_batch % 10)) {
+        printf("%8d", i_batch);
+        if (!(i_batch % 90)) {
+          printf("\n  ");
+        }
+      }
+
+// generate data_tx
+#ifdef DATA_ALL_ONES
+      for (i = 0; i < BATCH_SIZE; i++) {
+        for (j = 0; j < message_size; j++) {
+          data_tx[i * message_size + j] = 1;
+        }
+      }
+
+#else
+      for (i = 0; i < BATCH_SIZE; i++) {
+        for (j = 0; j < message_size; j++) {
+          data_tx[i * message_size + j] = srslte_random_uniform_int_dist(random_gen, 0, 1);
+        }
+      }
+#endif
+
+      // subchannel_allocation block
+      for (i = 0; i < BATCH_SIZE; i++) {
+        srslte_subchannel_allocation(&subch, data_tx + i * message_size, input_enc + i * code_size);
+      }
+
+      // encoding pipeline
+      gettimeofday(&t[1], NULL);
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_polar_encoder_encode(&enc, input_enc + j * code_size, output_enc + j * code_size, code_size_log);
+      }
+      gettimeofday(&t[2], NULL);
+      get_time_interval(t);
+
+      elapsed_time_enc[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+#ifdef LV_HAVE_AVX2
+      // encoding  avx2
+      gettimeofday(&t[1], NULL);
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_polar_encoder_encode(
+            &enc_avx2, input_enc + j * code_size, output_enc_avx2 + j * code_size, code_size_log);
+      }
+      gettimeofday(&t[2], NULL);
+      get_time_interval(t);
+
+      elapsed_time_enc_avx2[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+      // check encoders have the same output.
+
+      // check errors with respect the output of the pipeline encoder
+      for (i = 0; i < BATCH_SIZE; i++) {
+        if (srslte_bit_diff(output_enc + i * code_size, output_enc_avx2 + i * code_size, code_size) != 0) {
+          printf("ERROR: Wrong avx2 encoder output. SNR= %f, Batch: %d\n", snr_db_vec[i_snr], i);
+          exit(-1);
+        }
+      }
+#endif // LV_HAVE_AVX2
+
+      for (j = 0; j < code_size * BATCH_SIZE; j++) {
+        llr[j] = output_enc[j] ? -1 : 1;
+      }
+
+      // add noise
+      if (snr_db_vec[i_snr] != 101) {
+        srslte_ch_awgn_f(llr, llr, var[i_snr], BATCH_SIZE * code_size);
+
+        // Convert symbols into LLRs
+        for (j = 0; j < BATCH_SIZE * code_size; j++) {
+          llr[j] *= 2 / (var[i_snr] * var[i_snr]);
+        }
+      }
+
+      // decoding float point
+      gettimeofday(&t[1], NULL);
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_polar_decoder_decode_f(&dec, llr + j * code_size, output_dec + j * code_size);
+      }
+
+      gettimeofday(&t[2], NULL);
+      get_time_interval(t);
+      elapsed_time_dec[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+      // extract message bits - float decoder
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_subchannel_deallocation(&subch, output_dec + j * code_size, data_rx + j * message_size);
+      }
+
+      // check errors - float decpder
+      for (i = 0; i < BATCH_SIZE; i++) {
+        errors_symb = srslte_bit_diff(data_tx + i * message_size, data_rx + i * message_size, message_size);
+
+        if (errors_symb != 0) {
+          n_error_words[i_snr]++;
+        }
+      }
+
+      // decoding 16-bit
+      // 16-quantization
+      if (snr_db_vec[i_snr] == 101) {
+        srslte_vec_quant_fs(llr, llr_s, 8192, 0, 32767, BATCH_SIZE * code_size);
+      } else {
+        gain_s = inf16 * var[i_snr] / 20 / (1 / var[i_snr] + 2);
+        srslte_vec_quant_fs(llr, llr_s, gain_s, 0, inf16, BATCH_SIZE * code_size);
+      }
+
+      // decoding 16-bit
+      gettimeofday(&t[1], NULL);
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_polar_decoder_decode_s(&dec_s, llr_s + j * code_size, output_dec_s + j * code_size);
+      }
+
+      gettimeofday(&t[2], NULL);
+      get_time_interval(t);
+      elapsed_time_dec_s[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+      // extract message bits  16-bit decoder
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_subchannel_deallocation(&subch, output_dec_s + j * code_size, data_rx_s + j * message_size);
+      }
+
+      // check errors 16-bit decoder
+      for (i = 0; i < BATCH_SIZE; i++) {
+        errors_symb_s = srslte_bit_diff(data_tx + i * message_size, data_rx_s + i * message_size, message_size);
+
+        if (errors_symb_s != 0) {
+          n_error_words_s[i_snr]++;
+        }
+      }
+
+      // 8-bit decoding
+      // 8-bit quantization
+      if (snr_db_vec[i_snr] == 101) {
+        srslte_vec_quant_fc(llr, llr_c, 32, 0, 127, BATCH_SIZE * code_size);
+      } else {
+        gain_c = inf8 * var[i_snr] / 20 / (1 / var[i_snr] + 2);
+        srslte_vec_quant_fc(llr, llr_c, gain_c, 0, inf8, BATCH_SIZE * code_size);
+      }
+
+      gettimeofday(&t[1], NULL);
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_polar_decoder_decode_c(&dec_c, llr_c + j * code_size, output_dec_c + j * code_size);
+      }
+      gettimeofday(&t[2], NULL);
+      get_time_interval(t);
+      elapsed_time_dec_c[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+      // extract message bits
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_subchannel_deallocation(&subch, output_dec_c + j * code_size, data_rx_c + j * message_size);
+      }
+
+      // check errors 8-bits decoder
+      for (i = 0; i < BATCH_SIZE; i++) {
+
+        errors_symb_c = srslte_bit_diff(data_tx + i * message_size, data_rx_c + i * message_size, message_size);
+
+        if (errors_symb_c != 0) {
+          n_error_words_c[i_snr]++;
+        }
+      }
+
+#ifdef LV_HAVE_AVX2
+      // 8-bit avx2 decoding
+      // 8-bit quantization
+      if (snr_db_vec[i_snr] == 101) {
+        srslte_vec_quant_fc(llr, llr_c_avx2, 32, 0, 127, BATCH_SIZE * code_size);
+      } else {
+        gain_c_avx2 = inf8 * var[i_snr] / 20 / (1 / var[i_snr] + 2);
+        srslte_vec_quant_fc(llr, llr_c_avx2, gain_c_avx2, 0, inf8, BATCH_SIZE * code_size);
+      }
+
+      gettimeofday(&t[1], NULL);
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_polar_decoder_decode_c(&dec_c_avx2, llr_c_avx2 + j * code_size, output_dec_c_avx2 + j * code_size);
+      }
+      gettimeofday(&t[2], NULL);
+      get_time_interval(t);
+      elapsed_time_dec_c_avx2[i_snr] += t[0].tv_sec + 1e-6 * t[0].tv_usec;
+
+      // extract message bits
+      for (j = 0; j < BATCH_SIZE; j++) {
+        srslte_subchannel_deallocation(&subch, output_dec_c_avx2 + j * code_size, data_rx_c_avx2 + j * message_size);
+      }
+
+      // check errors 8-bits decoder
+      for (i = 0; i < BATCH_SIZE; i++) {
+
+        errors_symb_c_avx2 =
+            srslte_bit_diff(data_tx + i * message_size, data_rx_c_avx2 + i * message_size, message_size);
+
+        if (errors_symb_c_avx2 != 0) {
+          n_error_words_c_avx2[i_snr]++;
+        }
+      }
+#endif // LV_HAVE_AVX2
+
+      last_i_batch[i_snr] = i_batch;
+    } // end while BATCH
+
+  } // snr_db
+
+  printf("\n");
+  switch (print_output) {
+    case 2:
+
+      printf("SNR=[");
+      for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+        printf("%3.1f ", snr_db_vec[i_snr] - 3);
+      }
+      printf("];\n");
+      printf("WER=[");
+      for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+        printf("%e ", (float)n_error_words[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
+      }
+      printf("];\n");
+
+      printf("WER_16=[");
+      for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+        printf("%e ", (float)n_error_words_s[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
+      }
+      printf("];\n");
+
+      printf("WER_8=[");
+      for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+        printf("%e ", (float)n_error_words_c[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
+      }
+      printf("];\n");
+
+#ifdef LV_HAVE_AVX2
+      printf("WER_8_AVX2=[");
+      for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+        printf("%e ", (float)n_error_words_c_avx2[i_snr] / last_i_batch[i_snr] / BATCH_SIZE);
+      }
+      printf("];\n");
+#endif // LV_HAVE_AVX2
+      break;
+    case 1:
+      for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+        printf("SNR: %3.1f\t enc_pipe_thrpt(Mbps):  %.2f\t  enc_avx2_thrpt(Mbps):  "
+               "%.2f\n",
+               snr_db_vec[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_enc[i_snr]),
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_enc_avx2[i_snr]));
+
+        printf("SNR: %3.1f\t FLOAT WER: %.8f %d/%d \t  dec_thrput(Mbps): %.2f\n",
+               snr_db_vec[i_snr],
+               (double)n_error_words[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size,
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec[i_snr]));
+        printf("SNR: %3.1f\t INT16 WER: %.8f %d/%d \t dec_thrput(Mbps): %.2f\n",
+               snr_db_vec[i_snr],
+               (double)n_error_words_s[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words_s[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size,
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec_s[i_snr]));
+        printf("SNR: %3.1f\t INT8  WER: %.8f %d/%d \t dec_thrput(Mbps): %.2f\n",
+               snr_db_vec[i_snr],
+               (double)n_error_words_c[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words_c[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size,
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec_c[i_snr]));
+#ifdef LV_HAVE_AVX2
+        printf("SNR: %3.1f\t INT8-AVX2  WER: %.8f %d/%d \t dec_thrput(Mbps): %.2f\n",
+               snr_db_vec[i_snr],
+               (double)n_error_words_c_avx2[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words_c_avx2[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size,
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / (1000000 * elapsed_time_dec_c_avx2[i_snr]));
+#endif // LV_HAVE_AVX2
+        printf("\n");
+      }
+
+      break;
+    default:
+
+      for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+        printf("**** PIPELINE  ENCODER ****\n");
+        printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+               last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_enc[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_enc[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_enc[i_snr]);
+
+#ifdef LV_HAVE_AVX2
+        printf("\n**** AVX2 ENCODER ****\n");
+        printf("Estimated throughput:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s "
+               "(encoded)\n",
+               last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_enc_avx2[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_enc_avx2[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_enc_avx2[i_snr]);
+#endif // LV_HAVE_AVX2
+
+        printf("\n**** FLOATING POINT ****");
+        printf("\nEstimated word error rate:\n  %e (%d errors)\n",
+               (double)n_error_words[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words[i_snr]);
+
+        printf("Estimated throughput decoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+               last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec[i_snr]);
+
+        printf("\n**** FIXED POINT (16 bits) ****");
+        printf("\nEstimated word error rate:\n  %e (%d errors)\n",
+               (double)n_error_words_s[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words_s[i_snr]);
+
+        printf("Estimated throughput decoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+               last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec_s[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec_s[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec_s[i_snr]);
+
+        printf("\n**** FIXED POINT (8 bits) ****");
+        printf("\nEstimated word error rate:\n  %e (%d errors)\n",
+               (double)n_error_words_c[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words_c[i_snr]);
+
+        printf("Estimated throughput decoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+               last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec_c[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec_c[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec_c[i_snr]);
+
+#ifdef LV_HAVE_AVX2
+        printf("\n**** FIXED POINT (8 bits, AVX2) ****");
+        printf("\nEstimated word error rate:\n  %e (%d errors)\n",
+               (double)n_error_words_c_avx2[i_snr] / last_i_batch[i_snr] / BATCH_SIZE,
+               n_error_words_c_avx2[i_snr]);
+
+        printf("Estimated throughput decoder:\n  %e word/s\n  %e bit/s (information)\n  %e bit/s (encoded)\n",
+               last_i_batch[i_snr] * BATCH_SIZE / elapsed_time_dec_c_avx2[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * message_size / elapsed_time_dec_c_avx2[i_snr],
+               last_i_batch[i_snr] * BATCH_SIZE * code_size / elapsed_time_dec_c_avx2[i_snr]);
+#endif // LV_HAVE_AVX2
+
+        printf("\n");
+      }
+      break;
+  }
+
+  free(data_tx);
+  free(data_rx);
+  free(data_rx_s);
+  free(data_rx_c);
+
+  free(input_enc);
+  free(output_enc);
+  free(output_enc_avx2);
+
+  free(llr);
+  free(llr_s);
+  free(llr_c);
+
+  free(output_dec);
+  free(output_dec_s);
+  free(output_dec_c);
+
+#ifdef DATA_ALL_ONES
+#else
+  srslte_random_free(random_gen);
+#endif
+  // free sets
+  srslte_polar_code_sets_free(&sets);
+  srslte_polar_encoder_free(&enc);
+  srslte_polar_decoder_free(&dec);
+  srslte_polar_decoder_free(&dec_s);
+  srslte_polar_decoder_free(&dec_c);
+
+#ifdef LV_HAVE_AVX2
+  srslte_polar_encoder_free(&enc_avx2);
+  srslte_polar_decoder_free(&dec_c_avx2);
+#endif // LV_HAVE_AVX2
+
+  int expected_errors = 0;
+  int i_snr           = 0;
+  if (snr_db_vec[i_snr] == 101) {
+    if (n_error_words[0] > expected_errors) {
+      printf("\n(float) Test failed!\n\n");
+    } else {
+      printf("\n(float) Test completed successfully!\n\n");
+    }
+    printf("\r");
+
+    if (n_error_words_s[0] > expected_errors) {
+      printf("\n(16 bit) Test failed!\n\n");
+    } else {
+      printf("\n(16 bit) Test completed successfully!\n\n");
+    }
+    printf("\r");
+
+    if (n_error_words_c[0] > expected_errors) {
+      printf("\n(8 bit) Test failed!\n\n");
+    } else {
+      printf("\n(8 bit) Test completed successfully!\n\n");
+    }
+    printf("\r");
+
+#ifdef LV_HAVE_AVX2
+    if (n_error_words_c_avx2[0] > expected_errors) {
+      printf("\n(8 bit, avx2) Test failed!\n\n");
+    } else {
+      printf("\n(8 bit, avx2) Test completed successfully!\n\n");
+    }
+#endif // LV_HAVE_AVX2
+    printf("\r");
+
+    exit((n_error_words[0] > expected_errors) || (n_error_words_s[0] > expected_errors) ||
+         (n_error_words_c[0] > expected_errors)
+#ifdef LV_HAVE_AVX2
+         || (n_error_words_c_avx2[0] > expected_errors)
+#endif // LV_HAVE_AVX2
+    );
+
+  } else {
+    for (int i_snr = 0; i_snr < snr_points; i_snr++) {
+      if (n_error_words_s[i_snr] > 10 * n_error_words[i_snr]) {
+        perror("16-bit performance at SNR = %d too low!");
+        exit(-1);
+      }
+      if (n_error_words_c[i_snr] > 10 * n_error_words[i_snr]) {
+        perror("8-bit performance at SNR = %d too low!");
+        exit(-1);
+      }
+#ifdef LV_HAVE_AVX2
+      if (n_error_words_c_avx2[i_snr] > 10 * n_error_words[i_snr]) {
+        perror("8-bit avx2 performance at SNR = %d too low!");
+        exit(-1);
+      }
+#endif // LV_HAVE_AVX2
+    }
+
+    printf("\nTest completed successfully!\n\n");
+    printf("\r");
+  }
+}
--- a/lib/src/phy/fec/polar/test/polar_sets.c
+++ b/lib/src/phy/fec/polar/test/polar_sets.c
@ -0,0 +1,119 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_sets.c
+ * \brief Definition of the auxiliary function that reads polar index sets from a file.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * The message and parity check sets provided by this functions are needed by
+ * the subchannel allocation block.
+ * The frozen bit set provided by this function is used by the polar decoder.
+ *
+ */
+
+#include "polar_sets.h"
+
+#include <inttypes.h>
+#include <srslte/phy/utils/vector.h>
+#include <stdio.h>
+#include <stdlib.h> //exit
+#include <string.h>
+
+void srslte_polar_code_sets_free(srslte_polar_sets_t* c)
+{
+  if (c != NULL) {
+    free(c->frozen_set);
+    free(c->info_set);
+    free(c->message_set);
+    free(c->parity_set);
+  }
+}
+
+int srslte_polar_code_sets_read(srslte_polar_sets_t* c,
+                                const uint16_t       message_size,
+                                const uint8_t        code_size_log,
+                                const uint16_t       rate_matching_size,
+                                const uint8_t        parity_set_size,
+                                const uint8_t        nWmPC)
+{
+  FILE* fptr = NULL;
+  char  filename[50];
+
+  uint16_t code_size = 1U << code_size_log;
+
+  c->frozen_set_size  = code_size - message_size - parity_set_size;
+  c->parity_set_size  = parity_set_size;
+  c->info_set_size    = message_size + parity_set_size;
+  c->message_set_size = message_size;
+
+  c->frozen_set = srslte_vec_u16_malloc(c->frozen_set_size);
+  if (!c->frozen_set) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  c->info_set = srslte_vec_u16_malloc(c->info_set_size);
+  if (!c->info_set) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  c->message_set = srslte_vec_u16_malloc(c->message_set_size);
+  if (!c->message_set) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  c->parity_set = srslte_vec_u16_malloc(parity_set_size);
+  if (!c->parity_set) {
+    perror("malloc");
+    exit(-1);
+  }
+
+  sprintf(filename,
+          "frozensets/polar_code_sets_%hu_%hu_%hu_%hu_%u.bin",
+          code_size,
+          rate_matching_size,
+          c->message_set_size,
+          c->parity_set_size,
+          nWmPC);
+
+  fptr = fopen(filename, "rbe");
+
+  if (fptr == NULL) {
+    printf("Error! file: %s does not exit. Probably, the polar set file is missing in folder "
+           "/frozensets for the provided code parameters.\n",
+           filename);
+    exit(1);
+  }
+
+  fread(c->info_set, sizeof(uint16_t), c->info_set_size, fptr);
+  fread(c->message_set, sizeof(uint16_t), c->message_set_size, fptr);
+  fread(c->parity_set, sizeof(uint16_t), c->parity_set_size, fptr);
+  fread(c->frozen_set, sizeof(uint16_t), c->frozen_set_size, fptr);
+
+  fclose(fptr);
+  return 0;
+}
--- a/lib/src/phy/fec/polar/test/polar_sets.h
+++ b/lib/src/phy/fec/polar/test/polar_sets.h
@ -0,0 +1,80 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file polar_sets.h
+ * \brief Declaration of the auxiliary function that reads polar index sets from a file.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * The message and parity check sets provided by this functions are needed by
+ * the subchannel allocation block.
+ * The frozen bit set provided by this function is used by the polar decoder.
+ *
+ */
+
+#ifndef SRSLTE_POLAR_SETS_H
+#define SRSLTE_POLAR_SETS_H
+
+#include "srslte/config.h"
+#include <stdint.h>
+
+/*!
+ * \brief Describes a polar set.
+ */
+typedef struct {
+  uint16_t  message_set_size; /*!< \brief Number of message bits (data and CRC). */
+  uint16_t  info_set_size;    /*!< \brief Number of message bits plus parity bits. */
+  uint16_t  parity_set_size;  /*!< \brief Number of parity check bits. */
+  uint16_t  frozen_set_size;  /*!< \brief Number of frozen bits. */
+  uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
+  uint16_t* info_set;    /*!< \brief Pointer to the indices of the encoder input vector containing data, CRC and
+                       parity check bits.*/
+  uint16_t* parity_set;  /*!< \brief Pointer to the indices of the encoder input vector containing the parity bits.*/
+  uint16_t* frozen_set;  /*!< \brief Pointer to the indices of the encoder input vector containing frozen bits.*/
+} srslte_polar_sets_t;
+
+/*!
+ * Initializes the different index sets as needed by the subchannel allocation block and/or by the polar decoder.
+ * \param[out] c A pointer to the initialized polar set.
+ * \param[in] message_size Number of data + CRC bits.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
+ * \param[in] rate_matching_size Number of bits of the codeword after rate matching.
+ * \param[in] parity_set_size Number of parity bits.
+ * \param[in] nWmPC Number of parity bits of minimum weight type.
+ * \return An integer: 0 if the function executes correctly, -1 otherwise.
+ */
+int srslte_polar_code_sets_read(srslte_polar_sets_t* c,
+                                uint16_t             message_size,
+                                uint8_t              code_size_log,
+                                uint16_t             rate_matching_size,
+                                uint8_t              parity_set_size,
+                                uint8_t              nWmPC);
+
+/*!
+ * The polar set "destructor": it frees all the resources.
+ * \param[in] c A pointer to the dismantled polar set.
+ */
+void srslte_polar_code_sets_free(srslte_polar_sets_t* c);
+
+#endif // SRSLTE_POLAR_SETS_H
--- a/lib/src/phy/fec/polar/test/subchannel_allocation.c
+++ b/lib/src/phy/fec/polar/test/subchannel_allocation.c
@ -0,0 +1,66 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file subchannel_allocation.c
+ * \brief Defiition of the auxiliary subchannel allocation block.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * These functions are not fully functional nor tested to be 3gpp-5G compliant.
+ * Please, use only for testing purposes.
+ *
+ */
+
+#include "subchannel_allocation.h"
+#include <string.h> //memset
+
+void srslte_subchannel_allocation_init(srslte_subchn_alloc_t* c,
+                                       const uint8_t          code_size_log,
+                                       const uint16_t         message_set_size,
+                                       uint16_t*              message_set)
+{
+  c->code_size    = 1U << code_size_log;
+  c->message_size = message_set_size;
+  c->message_set  = message_set;
+}
+
+void srslte_subchannel_allocation(const srslte_subchn_alloc_t* c, const uint8_t* message, uint8_t* input_encoder)
+{
+  memset(input_encoder, 0, c->code_size * sizeof(uint8_t));
+
+  uint16_t i_o = 0;
+  for (uint16_t i = 0; i < c->message_size; i++) {
+    i_o                = c->message_set[i];
+    input_encoder[i_o] = message[i];
+  }
+}
+
+void srslte_subchannel_deallocation(const srslte_subchn_alloc_t* c, const uint8_t* output_decoder, uint8_t* message)
+{
+  uint16_t i_o = 0;
+  for (uint16_t i = 0; i < c->message_size; i++) {
+    i_o        = c->message_set[i];
+    message[i] = output_decoder[i_o];
+  }
+}
--- a/lib/src/phy/fec/polar/test/subchannel_allocation.h
+++ b/lib/src/phy/fec/polar/test/subchannel_allocation.h
@ -0,0 +1,86 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+/*!
+ * \file subchannel_allocation.h
+ * \brief Declaration of the auxiliary subchannel allocation block.
+ * \author Jesus Gomez (CTTC)
+ * \date 2020
+ *
+ * \copyright Software Radio Systems Limited
+ *
+ * These functions are not fully functional nor tested to be 3gpp-5G compliant.
+ * Please, use only for testing purposes.
+ *
+ */
+
+#ifndef SRSLTE_SUB_CHANNEL_ALLOC_H
+#define SRSLTE_SUB_CHANNEL_ALLOC_H
+
+#include "srslte/config.h"
+#include "stdint.h"
+
+/*!
+ * \brief Describes a subchannel allocation.
+ */
+typedef struct SRSLTE_API {
+  uint16_t  code_size;    /*!< \brief Number of bits, \f$N\f$, of the encoder input/output vector. */
+  uint16_t  message_size; /*!< \brief Number of bits, \f$K\f$, of data + CRC. */
+  uint16_t* message_set; /*!< \brief Pointer to the indices of the encoder input vector containing data and CRC bits. */
+} srslte_subchn_alloc_t;
+
+/*!
+ * Initializes a subchannel allocation instance.
+ * \param[out] c  A pointer to the  srslte_subchn_alloc_t structure
+ *     containing the parameters needed by the subchannel allocation function.
+ * \param[in] code_size_log The \f$ log_2\f$ of the number of bits of the decoder input/output vector.
+ * \param[in] message_set_size Number of data + CRC bits.
+ * \param[in] message_set Pointer to the indices of the encoder input vector containing
+ * data and CRC bits.
+ */
+void srslte_subchannel_allocation_init(srslte_subchn_alloc_t* c,
+                                       uint8_t                code_size_log,
+                                       uint16_t               message_set_size,
+                                       uint16_t*              message_set);
+
+/*!
+ * Allocates message bits (data + CRC) to the encoder input bit vector at the
+ * positions specified in \a c->message_set and zeros to the remaining
+ * positions. This function is not fully 5G compliant as parity bits positions
+ * are set to 0.
+ * \param[in] c A pointer to the srslte_subchn_alloc_t structure containing
+ *     the parameters needed by the subchannel allocation function.
+ * \param[in] message A pointer to the vector with the message bits (data and CRC).
+ * \param[out] input_encoder A pointer to the encoder input bit vector.
+ */
+void srslte_subchannel_allocation(const srslte_subchn_alloc_t* c, const uint8_t* message, uint8_t* input_encoder);
+
+/*!
+ * Extracts message bits (data + CRC) from the decoder output vector
+ * according to the positions specified in  \a c->message_set.
+ * \param[in] c A pointer to the srslte_subchn_alloc_t structure containing the
+ *     parameters needed by the subchannel allocation function.
+ * \param[in] output_decoder A pointer to the decoder output bit vector.
+ * \param[out] message A pointer to the vector with the message bits (data and CRC).
+ */
+void srslte_subchannel_deallocation(const srslte_subchn_alloc_t* c, const uint8_t* output_decoder, uint8_t* message);
+
+#endif // SRSLTE_SUB_CHANNEL_ALLOC_H
--- a/lib/src/phy/scrambling/scrambling.c
+++ b/lib/src/phy/scrambling/scrambling.c
@ -65,7 +65,7 @@ void srslte_scrambling_c_offset(srslte_sequence_t* s, cf_t* data, int offset, in

 static inline void scrambling_b(uint8_t* c, uint8_t* data, int len)
 {
-  srslte_vec_xor_bbb((int8_t*)c, (int8_t*)data, (int8_t*)data, len);
+  srslte_vec_xor_bbb(c, data, data, len);
 }

 void srslte_scrambling_b(srslte_sequence_t* s, uint8_t* data)
--- a/lib/src/phy/utils/convolution.c
+++ b/lib/src/phy/utils/convolution.c
@ -19,12 +19,12 @@
 *
 */

-#include "srslte/srslte.h"
 #include <stdlib.h>
 #include <string.h>

 #include "srslte/phy/dft/dft.h"
 #include "srslte/phy/utils/convolution.h"
+#include "srslte/phy/utils/debug.h"
 #include "srslte/phy/utils/vector.h"

 int srslte_conv_fft_cc_init(srslte_conv_fft_cc_t* q, uint32_t input_len, uint32_t filter_len)
--- a/lib/src/phy/utils/test/vector_test.c
+++ b/lib/src/phy/utils/test/vector_test.c
@ -87,7 +87,7 @@ float squared_error(cf_t a, cf_t b)
 }

 TEST(
-    srslte_vec_xor_bbb, MALLOC(int8_t, x); MALLOC(int8_t, y); MALLOC(int8_t, z);
+    srslte_vec_xor_bbb, MALLOC(uint8_t, x); MALLOC(uint8_t, y); MALLOC(uint8_t, z);

    cf_t gold = 0.0f;
    for (int i = 0; i < block_size; i++) {
--- a/lib/src/phy/utils/vector.c
+++ b/lib/src/phy/utils/vector.c
@ -31,7 +31,7 @@
 #include "srslte/phy/utils/vector.h"
 #include "srslte/phy/utils/vector_simd.h"

-void srslte_vec_xor_bbb(int8_t* x, int8_t* y, int8_t* z, const uint32_t len)
+void srslte_vec_xor_bbb(const uint8_t* x, const uint8_t* y, uint8_t* z, const uint32_t len)
 {
  srslte_vec_xor_bbb_simd(x, y, z, len);
 }
@ -597,6 +597,62 @@ uint32_t srslte_vec_max_abs_ci(const cf_t* x, const uint32_t len)
  return srslte_vec_max_ci_simd(x, len);
 }

+void srslte_vec_quant_fs(const float*   in,
+                         int16_t*       out,
+                         const float    gain,
+                         const float    offset,
+                         const float    clip,
+                         const uint32_t len)
+{
+  int  i   = 0;
+  long tmp = 0;
+
+  const int16_t inf = (1U << 15U) - 1;
+
+  for (i = 0; i < len; i++) {
+    if (isinf(in[i])) {
+      tmp = inf * (-2 * (in[i] < 0) + 1);
+    } else {
+      tmp = (long)(offset + gain * in[i] + INT16_MAX + 0.5) - INT16_MAX;
+      if (tmp < -clip) {
+        tmp = -clip;
+      }
+      if (tmp > clip) {
+        tmp = clip;
+      }
+    }
+
+    out[i] = (int16_t)tmp;
+  }
+}
+
+void srslte_vec_quant_fc(const float*   in,
+                         int8_t*        out,
+                         const float    gain,
+                         const float    offset,
+                         const float    clip,
+                         const uint32_t len)
+{
+  int  i   = 0;
+  long tmp = 0;
+
+  for (i = 0; i < len; i++) {
+    if (isinf(in[i])) {
+      tmp = 127 * (-2 * (in[i] < 0) + 1);
+    } else {
+      tmp = (long)(offset + gain * in[i] + INT8_MAX + 0.5) - INT8_MAX;
+      if (tmp < -clip) {
+        tmp = -clip;
+      }
+      if (tmp > clip) {
+        tmp = clip;
+      }
+    }
+
+    out[i] = (int8_t)tmp;
+  }
+}
+
 void srslte_vec_quant_fus(const float*   in,
                          uint16_t*      out,
                          const float    gain,
--- a/lib/src/phy/utils/vector_simd.c
+++ b/lib/src/phy/utils/vector_simd.c
@ -30,27 +30,27 @@
 #include "srslte/phy/utils/simd.h"
 #include "srslte/phy/utils/vector_simd.h"

-void srslte_vec_xor_bbb_simd(const int8_t* x, const int8_t* y, int8_t* z, const int len)
+void srslte_vec_xor_bbb_simd(const uint8_t* x, const uint8_t* y, uint8_t* z, const int len)
 {
  int i = 0;
 #if SRSLTE_SIMD_B_SIZE
  if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) {
    for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
-      simd_b_t a = srslte_simd_b_load(&x[i]);
-      simd_b_t b = srslte_simd_b_load(&y[i]);
+      simd_b_t a = srslte_simd_b_load((int8_t*)&x[i]);
+      simd_b_t b = srslte_simd_b_load((int8_t*)&y[i]);

      simd_b_t r = srslte_simd_b_xor(a, b);

-      srslte_simd_b_store(&z[i], r);
+      srslte_simd_b_store((int8_t*)&z[i], r);
    }
  } else {
    for (; i < len - SRSLTE_SIMD_B_SIZE + 1; i += SRSLTE_SIMD_B_SIZE) {
-      simd_b_t a = srslte_simd_b_loadu(&x[i]);
-      simd_b_t b = srslte_simd_b_loadu(&y[i]);
+      simd_b_t a = srslte_simd_b_loadu((int8_t*)&x[i]);
+      simd_b_t b = srslte_simd_b_loadu((int8_t*)&y[i]);

      simd_b_t r = srslte_simd_b_xor(a, b);

-      srslte_simd_b_storeu(&z[i], r);
+      srslte_simd_b_storeu((int8_t*)&z[i], r);
    }
  }
 #endif /* SRSLTE_SIMD_B_SIZE */