diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7a4c9a499..2e2b07af8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,7 +89,7 @@ option(USE_LTE_RATES   "Use standard LTE sampling rates"          OFF)
 option(USE_GLIBC_IPV6  "Use glibc's own ipv6.h"                   ON)
 option(USE_MKL         "Use MKL instead of fftw"                  OFF)
 
-option(ENABLE_TIMEPROF "Enable time profiling"                    OFF)
+option(ENABLE_TIMEPROF "Enable time profiling"                    ON)
 
 option(FORCE_32BIT     "Add flags to force 32 bit compilation"    OFF)
 
diff --git a/lib/include/srslte/common/time_prof.h b/lib/include/srslte/common/time_prof.h
index 05b503657..a69435798 100644
--- a/lib/include/srslte/common/time_prof.h
+++ b/lib/include/srslte/common/time_prof.h
@@ -22,78 +22,102 @@
 #ifndef SRSLTE_TIME_PROF_H
 #define SRSLTE_TIME_PROF_H
 
-#ifdef ENABLE_TIMEPROF
-
 #include "srslte/common/logmap.h"
 #include <chrono>
 
 namespace srslte {
 
+#ifdef ENABLE_TIMEPROF
+
 template <typename Prof>
 class tprof
 {
 public:
   using tpoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
 
+  template <typename... Args>
+  explicit tprof(Args&&... args) : prof(std::forward<Args>(args)...)
+  {
+    srslte::logmap::get("TPROF")->set_level(LOG_LEVEL_INFO);
+  }
+
+  void start() { t1 = std::chrono::high_resolution_clock::now(); }
+
+  std::chrono::nanoseconds stop()
+  {
+    auto t2 = std::chrono::high_resolution_clock::now();
+    auto d  = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);
+    prof.process(d);
+    return d;
+  }
+
+  tpoint t1;
+  Prof   prof;
+};
+
+template <typename Prof>
+struct mutexed_tprof {
+  using tpoint = std::chrono::time_point<std::chrono::high_resolution_clock>;
   struct measure {
   public:
-    measure(tprof<Prof>* h_) : t1(std::chrono::high_resolution_clock::now()), h(h_) {}
+    measure(mutexed_tprof<Prof>* h_) : t1(std::chrono::high_resolution_clock::now()), h(h_) {}
     ~measure()
     {
       if (deferred) {
         stop();
       }
     }
-    void stop()
+    std::chrono::nanoseconds stop()
     {
-      auto t2 = std::chrono::high_resolution_clock::now();
-      h->process(std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count());
+      auto                        t2 = std::chrono::high_resolution_clock::now();
+      std::lock_guard<std::mutex> lock(h->mutex);
+      std::chrono::nanoseconds    d = std::chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1);
+      h->prof.process(d);
+      return d;
     }
     void defer_stop() { deferred = true; }
 
-    tpoint       t1;
-    tprof<Prof>* h;
-    bool         deferred = false;
+    tpoint               t1;
+    mutexed_tprof<Prof>* h;
+    bool                 deferred = false;
   };
 
   template <typename... Args>
-  explicit tprof(Args&&... args) : prof(std::forward<Args>(args)...)
+  explicit mutexed_tprof(Args&&... args) : prof(std::forward<Args>(args)...)
   {
     srslte::logmap::get("TPROF")->set_level(LOG_LEVEL_INFO);
   }
-
   measure start() { return measure{this}; }
 
-  virtual void process(long duration) { prof.process(duration); }
-
   Prof prof;
-};
 
-template <typename Prof>
-struct mutexed_tprof : public tprof<Prof> {
-  using tprof<Prof>::tprof;
-  void process(long duration) final
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-    tprof<Prof>::prof.process(duration);
-  }
+private:
   std::mutex mutex;
 };
 
 #else
 
-namespace srslte {
-
 template <typename Prof>
 struct tprof {
+  template <typename... Args>
+  explicit tprof(Args&&... args)
+  {
+  }
+
+  void                     start() {}
+  std::chrono::nanoseconds stop() { return {}; }
+};
+
+template <typename Prof>
+struct mutexed_tprof {
   struct measure {
   public:
-    void stop() {}
-    void defer_stop() {}
+    std::chrono::nanoseconds stop() { return {}; }
+    void                     defer_stop() {}
   };
 
   template <typename... Args>
-  explicit tprof(Args&&... args)
+  explicit mutexed_tprof(Args&&... args)
   {
   }
 
@@ -102,29 +126,12 @@ struct tprof {
 private:
   void process(long duration) {}
 };
-template <typename Prof>
-using mutexed_tprof = tprof<Prof>;
 
 #endif
 
 struct avg_time_stats {
   avg_time_stats(const char* name_, size_t print_period_) : name(name_), print_period(print_period_) {}
-
-  void process(long duration)
-  {
-    count++;
-    avg_val = avg_val * (count - 1) / count + static_cast<double>(duration) / count;
-    max_val = std::max(max_val, duration);
-    min_val = std::min(min_val, duration);
-    if (count % print_period == 0) {
-      log_ptr->info("%s: Mean=%0.1fusec, Max=%ldusec, Min=%ldusec, nof_samples=%ld",
-                    name.c_str(),
-                    avg_val / 1e3,
-                    max_val / 1000,
-                    min_val / 1000,
-                    count);
-    }
-  }
+  void process(std::chrono::nanoseconds duration);
 
   srslte::log_ref log_ptr = srslte::logmap::get("TPROF");
   std::string     name;
@@ -133,6 +140,21 @@ struct avg_time_stats {
   long            print_period = 0;
 };
 
+template <typename TUnit>
+class sliding_window_stats
+{
+public:
+  sliding_window_stats(const char* name_, size_t print_period_ = 10, TUnit warn_thres_ = 0);
+  void process(std::chrono::nanoseconds duration);
+
+  srslte::log_ref                       log_ptr = srslte::logmap::get("TPROF");
+  std::string                           name;
+  std::vector<std::chrono::nanoseconds> sliding_window;
+  size_t                                window_idx = 0;
+  TUnit                                 warn_thres = 0;
+};
+using sliding_window_stats_ms = sliding_window_stats<std::chrono::milliseconds>;
+
 } // namespace srslte
 
 #endif // SRSLTE_TIME_PROF_H
diff --git a/lib/src/common/CMakeLists.txt b/lib/src/common/CMakeLists.txt
index 3c54b9beb..1891580d1 100644
--- a/lib/src/common/CMakeLists.txt
+++ b/lib/src/common/CMakeLists.txt
@@ -41,6 +41,7 @@ set(SOURCES arch_select.cc
             thread_pool.cc
             threads.c
             tti_sync_cv.cc
+            time_prof.cc
             version.c
             zuc.cc)
 
diff --git a/lib/src/common/time_prof.cc b/lib/src/common/time_prof.cc
new file mode 100644
index 000000000..a43b91cec
--- /dev/null
+++ b/lib/src/common/time_prof.cc
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2013-2020 Software Radio Systems Limited
+ *
+ * This file is part of srsLTE.
+ *
+ * srsLTE is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of
+ * the License, or (at your option) any later version.
+ *
+ * srsLTE is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * A copy of the GNU Affero General Public License can be found in
+ * the LICENSE file in the top-level directory of this distribution
+ * and at http://www.gnu.org/licenses/.
+ *
+ */
+
+#include "srslte/common/time_prof.h"
+#include <algorithm>
+#include <numeric>
+
+using namespace srslte;
+using std::chrono::nanoseconds;
+
+template <typename TUnit>
+const char* get_tunit_str()
+{
+  return "";
+}
+template <>
+const char* get_tunit_str<nanoseconds>()
+{
+  return "nsec";
+}
+template <>
+const char* get_tunit_str<std::chrono::microseconds>()
+{
+  return "usec";
+}
+template <>
+const char* get_tunit_str<std::chrono::milliseconds>()
+{
+  return "msec";
+}
+template <>
+const char* get_tunit_str<std::chrono::seconds>()
+{
+  return "sec";
+}
+template <>
+const char* get_tunit_str<std::chrono::minutes>()
+{
+  return "min";
+}
+
+void avg_time_stats::process(nanoseconds duration)
+{
+  count++;
+  avg_val = avg_val * (count - 1) / count + static_cast<double>(duration.count()) / count;
+  max_val = std::max(max_val, duration.count());
+  min_val = std::min(min_val, duration.count());
+  if (count % print_period == 0) {
+    log_ptr->info("%s: mean,max,min=%0.1f,%ld,%ld usec, nof_samples=%ld",
+                  name.c_str(),
+                  avg_val / 1e3,
+                  max_val / 1000,
+                  min_val / 1000,
+                  count);
+  }
+}
+
+template <typename TUnit>
+sliding_window_stats<TUnit>::sliding_window_stats(const char* name_, size_t print_period_, TUnit warn_thres_) :
+  name(name_),
+  sliding_window(print_period_),
+  warn_thres(warn_thres_)
+{
+}
+
+template <typename TUnit>
+void sliding_window_stats<TUnit>::process(nanoseconds duration)
+{
+  using std::chrono::duration_cast;
+  const char* unit_str = get_tunit_str<TUnit>();
+  TUnit       dur      = std::chrono::duration_cast<TUnit>(duration);
+
+  log_ptr->debug("%s: duration=%ld %s\n", name.c_str(), dur.count(), unit_str);
+  if (warn_thres > TUnit{0} and dur > warn_thres) {
+    log_ptr->warning("%s: detected long duration=%ld %s\n", name.c_str(), dur.count(), unit_str);
+  }
+
+  sliding_window[window_idx++] = duration;
+  if (window_idx == sliding_window.size()) {
+    nanoseconds tsum  = accumulate(sliding_window.begin(), sliding_window.end(), std::chrono::nanoseconds{0});
+    nanoseconds tmax  = *std::max_element(sliding_window.begin(), sliding_window.end());
+    nanoseconds tmin  = *std::min_element(sliding_window.begin(), sliding_window.end());
+    double      tmean = static_cast<double>(duration_cast<TUnit>(tsum).count()) / sliding_window.size();
+
+    log_ptr->info("%s: mean,max,min = %0.2f,%ld,%ld %s\n",
+                  name.c_str(),
+                  tmean,
+                  duration_cast<TUnit>(tmax).count(),
+                  duration_cast<TUnit>(tmin).count(),
+                  unit_str);
+    window_idx = 0;
+  }
+}
+
+template class srslte::sliding_window_stats<std::chrono::microseconds>;
+template class srslte::sliding_window_stats<std::chrono::milliseconds>;
diff --git a/srsue/hdr/stack/ue_stack_lte.h b/srsue/hdr/stack/ue_stack_lte.h
index 1b75588b4..0581a2190 100644
--- a/srsue/hdr/stack/ue_stack_lte.h
+++ b/srsue/hdr/stack/ue_stack_lte.h
@@ -45,6 +45,7 @@
 #include "srslte/common/thread_pool.h"
 #include "srslte/interfaces/ue_interfaces.h"
 
+#include "srslte/common/time_prof.h"
 #include "srsue/hdr/ue_metrics_interface.h"
 #include "ue_stack_base.h"
 
@@ -129,15 +130,14 @@ public:
 private:
   void run_thread() final;
   void run_tti_impl(uint32_t tti);
-  void           calc_tti_stats(const uint32_t duration_us);
-  const uint32_t TTI_STAT_PERIOD           = 1024;
-  const uint32_t TTI_WARN_THRESHOLD_US     = 5000;
-  const uint32_t SYNC_QUEUE_WARN_THRESHOLD = 5;
-  const float    US_PER_MS                 = 1000.0;
   void stop_impl();
 
-  bool                running;
-  srsue::stack_args_t args;
+  const uint32_t                  TTI_STAT_PERIOD = 1024;
+  const std::chrono::milliseconds TTI_WARN_THRESHOLD_MS{5};
+  const uint32_t                  SYNC_QUEUE_WARN_THRESHOLD = 5;
+
+  bool                  running;
+  srsue::stack_args_t   args;
   std::vector<uint32_t> proc_time;
 
   // timers
@@ -172,6 +172,9 @@ private:
   srslte::task_multiqueue pending_tasks;
   int sync_queue_id = -1, ue_queue_id = -1, gw_queue_id = -1, mac_queue_id = -1, background_queue_id = -1;
   srslte::task_thread_pool background_tasks; ///< Thread pool used for long, low-priority tasks
+
+  // TTI stats
+  srslte::tprof<srslte::sliding_window_stats_ms> tti_tprof;
 };
 
 } // namespace srsue
diff --git a/srsue/src/stack/ue_stack_lte.cc b/srsue/src/stack/ue_stack_lte.cc
index c9a65f615..64e333185 100644
--- a/srsue/src/stack/ue_stack_lte.cc
+++ b/srsue/src/stack/ue_stack_lte.cc
@@ -44,7 +44,8 @@ ue_stack_lte::ue_stack_lte() :
   nas(&timers),
   thread("STACK"),
   pending_tasks(512),
-  background_tasks(2)
+  background_tasks(2),
+  tti_tprof("proc_time", TTI_STAT_PERIOD, TTI_WARN_THRESHOLD_MS)
 {
   ue_queue_id         = pending_tasks.add_queue();
   sync_queue_id       = pending_tasks.add_queue();
@@ -53,8 +54,6 @@ ue_stack_lte::ue_stack_lte() :
   background_queue_id = pending_tasks.add_queue();
 
   background_tasks.start();
-
-  proc_time.reserve(TTI_STAT_PERIOD);
 }
 
 ue_stack_lte::~ue_stack_lte()
@@ -306,7 +305,7 @@ void ue_stack_lte::run_tti(uint32_t tti)
 
 void ue_stack_lte::run_tti_impl(uint32_t tti)
 {
-  auto start = std::chrono::steady_clock::now();
+  tti_tprof.start();
 
   // perform tasks in this TTI
   mac.run_tti(tti);
@@ -315,8 +314,7 @@ void ue_stack_lte::run_tti_impl(uint32_t tti)
   timers.step_all();
 
   if (args.have_tti_time_stats) {
-    auto end = std::chrono::steady_clock::now();
-    calc_tti_stats(std::chrono::duration_cast<std::chrono::microseconds>(end - start).count());
+    tti_tprof.stop();
   }
 
   // print warning if PHY pushes new TTI messages faster than we process them
@@ -325,28 +323,6 @@ void ue_stack_lte::run_tti_impl(uint32_t tti)
   }
 }
 
-void ue_stack_lte::calc_tti_stats(const uint32_t duration_us)
-{
-  log.debug("proc_time=%.2fms\n", duration_us / US_PER_MS);
-  if (duration_us > TTI_WARN_THRESHOLD_US) {
-    log.warning("Long TTI proc_time=%.2fms\n", duration_us / US_PER_MS);
-  }
-
-  proc_time.push_back(duration_us);
-  if (proc_time.size() == TTI_STAT_PERIOD) {
-    uint32_t min = 0, max = 0, sum = 0;
-    for (auto& item : proc_time) {
-      max = std::max(max, item);
-      min = std::min(min, item);
-      sum += item;
-    }
-    auto avg = sum / proc_time.size();
-    log.info("proc_time=%.2f,%.2f,%.2f (min,avg,max)\n", min / US_PER_MS, avg / US_PER_MS, max / US_PER_MS);
-
-    proc_time.clear();
-  }
-}
-
 /********************
  * low MAC Interface
  *******************/