diff --git a/CMakeLists.txt b/CMakeLists.txt index 7a4c9a499..2e2b07af8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,7 +89,7 @@ option(USE_LTE_RATES "Use standard LTE sampling rates" OFF) option(USE_GLIBC_IPV6 "Use glibc's own ipv6.h" ON) option(USE_MKL "Use MKL instead of fftw" OFF) -option(ENABLE_TIMEPROF "Enable time profiling" OFF) +option(ENABLE_TIMEPROF "Enable time profiling" ON) option(FORCE_32BIT "Add flags to force 32 bit compilation" OFF) diff --git a/lib/include/srslte/common/time_prof.h b/lib/include/srslte/common/time_prof.h index 05b503657..a69435798 100644 --- a/lib/include/srslte/common/time_prof.h +++ b/lib/include/srslte/common/time_prof.h @@ -22,78 +22,102 @@ #ifndef SRSLTE_TIME_PROF_H #define SRSLTE_TIME_PROF_H -#ifdef ENABLE_TIMEPROF - #include "srslte/common/logmap.h" #include namespace srslte { +#ifdef ENABLE_TIMEPROF + template class tprof { public: using tpoint = std::chrono::time_point; + template + explicit tprof(Args&&... args) : prof(std::forward(args)...) + { + srslte::logmap::get("TPROF")->set_level(LOG_LEVEL_INFO); + } + + void start() { t1 = std::chrono::high_resolution_clock::now(); } + + std::chrono::nanoseconds stop() + { + auto t2 = std::chrono::high_resolution_clock::now(); + auto d = std::chrono::duration_cast(t2 - t1); + prof.process(d); + return d; + } + + tpoint t1; + Prof prof; +}; + +template +struct mutexed_tprof { + using tpoint = std::chrono::time_point; struct measure { public: - measure(tprof* h_) : t1(std::chrono::high_resolution_clock::now()), h(h_) {} + measure(mutexed_tprof* h_) : t1(std::chrono::high_resolution_clock::now()), h(h_) {} ~measure() { if (deferred) { stop(); } } - void stop() + std::chrono::nanoseconds stop() { - auto t2 = std::chrono::high_resolution_clock::now(); - h->process(std::chrono::duration_cast(t2 - t1).count()); + auto t2 = std::chrono::high_resolution_clock::now(); + std::lock_guard lock(h->mutex); + std::chrono::nanoseconds d = std::chrono::duration_cast(t2 - t1); + h->prof.process(d); + return d; } void defer_stop() { deferred = true; } - tpoint t1; - tprof* h; - bool deferred = false; + tpoint t1; + mutexed_tprof* h; + bool deferred = false; }; template - explicit tprof(Args&&... args) : prof(std::forward(args)...) + explicit mutexed_tprof(Args&&... args) : prof(std::forward(args)...) { srslte::logmap::get("TPROF")->set_level(LOG_LEVEL_INFO); } - measure start() { return measure{this}; } - virtual void process(long duration) { prof.process(duration); } - Prof prof; -}; -template -struct mutexed_tprof : public tprof { - using tprof::tprof; - void process(long duration) final - { - std::lock_guard lock(mutex); - tprof::prof.process(duration); - } +private: std::mutex mutex; }; #else -namespace srslte { - template struct tprof { + template + explicit tprof(Args&&... args) + { + } + + void start() {} + std::chrono::nanoseconds stop() { return {}; } +}; + +template +struct mutexed_tprof { struct measure { public: - void stop() {} - void defer_stop() {} + std::chrono::nanoseconds stop() { return {}; } + void defer_stop() {} }; template - explicit tprof(Args&&... args) + explicit mutexed_tprof(Args&&... args) { } @@ -102,29 +126,12 @@ struct tprof { private: void process(long duration) {} }; -template -using mutexed_tprof = tprof; #endif struct avg_time_stats { avg_time_stats(const char* name_, size_t print_period_) : name(name_), print_period(print_period_) {} - - void process(long duration) - { - count++; - avg_val = avg_val * (count - 1) / count + static_cast(duration) / count; - max_val = std::max(max_val, duration); - min_val = std::min(min_val, duration); - if (count % print_period == 0) { - log_ptr->info("%s: Mean=%0.1fusec, Max=%ldusec, Min=%ldusec, nof_samples=%ld", - name.c_str(), - avg_val / 1e3, - max_val / 1000, - min_val / 1000, - count); - } - } + void process(std::chrono::nanoseconds duration); srslte::log_ref log_ptr = srslte::logmap::get("TPROF"); std::string name; @@ -133,6 +140,21 @@ struct avg_time_stats { long print_period = 0; }; +template +class sliding_window_stats +{ +public: + sliding_window_stats(const char* name_, size_t print_period_ = 10, TUnit warn_thres_ = 0); + void process(std::chrono::nanoseconds duration); + + srslte::log_ref log_ptr = srslte::logmap::get("TPROF"); + std::string name; + std::vector sliding_window; + size_t window_idx = 0; + TUnit warn_thres = 0; +}; +using sliding_window_stats_ms = sliding_window_stats; + } // namespace srslte #endif // SRSLTE_TIME_PROF_H diff --git a/lib/src/common/CMakeLists.txt b/lib/src/common/CMakeLists.txt index 3c54b9beb..1891580d1 100644 --- a/lib/src/common/CMakeLists.txt +++ b/lib/src/common/CMakeLists.txt @@ -41,6 +41,7 @@ set(SOURCES arch_select.cc thread_pool.cc threads.c tti_sync_cv.cc + time_prof.cc version.c zuc.cc) diff --git a/lib/src/common/time_prof.cc b/lib/src/common/time_prof.cc new file mode 100644 index 000000000..a43b91cec --- /dev/null +++ b/lib/src/common/time_prof.cc @@ -0,0 +1,114 @@ +/* + * Copyright 2013-2020 Software Radio Systems Limited + * + * This file is part of srsLTE. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include "srslte/common/time_prof.h" +#include +#include + +using namespace srslte; +using std::chrono::nanoseconds; + +template +const char* get_tunit_str() +{ + return ""; +} +template <> +const char* get_tunit_str() +{ + return "nsec"; +} +template <> +const char* get_tunit_str() +{ + return "usec"; +} +template <> +const char* get_tunit_str() +{ + return "msec"; +} +template <> +const char* get_tunit_str() +{ + return "sec"; +} +template <> +const char* get_tunit_str() +{ + return "min"; +} + +void avg_time_stats::process(nanoseconds duration) +{ + count++; + avg_val = avg_val * (count - 1) / count + static_cast(duration.count()) / count; + max_val = std::max(max_val, duration.count()); + min_val = std::min(min_val, duration.count()); + if (count % print_period == 0) { + log_ptr->info("%s: mean,max,min=%0.1f,%ld,%ld usec, nof_samples=%ld", + name.c_str(), + avg_val / 1e3, + max_val / 1000, + min_val / 1000, + count); + } +} + +template +sliding_window_stats::sliding_window_stats(const char* name_, size_t print_period_, TUnit warn_thres_) : + name(name_), + sliding_window(print_period_), + warn_thres(warn_thres_) +{ +} + +template +void sliding_window_stats::process(nanoseconds duration) +{ + using std::chrono::duration_cast; + const char* unit_str = get_tunit_str(); + TUnit dur = std::chrono::duration_cast(duration); + + log_ptr->debug("%s: duration=%ld %s\n", name.c_str(), dur.count(), unit_str); + if (warn_thres > TUnit{0} and dur > warn_thres) { + log_ptr->warning("%s: detected long duration=%ld %s\n", name.c_str(), dur.count(), unit_str); + } + + sliding_window[window_idx++] = duration; + if (window_idx == sliding_window.size()) { + nanoseconds tsum = accumulate(sliding_window.begin(), sliding_window.end(), std::chrono::nanoseconds{0}); + nanoseconds tmax = *std::max_element(sliding_window.begin(), sliding_window.end()); + nanoseconds tmin = *std::min_element(sliding_window.begin(), sliding_window.end()); + double tmean = static_cast(duration_cast(tsum).count()) / sliding_window.size(); + + log_ptr->info("%s: mean,max,min = %0.2f,%ld,%ld %s\n", + name.c_str(), + tmean, + duration_cast(tmax).count(), + duration_cast(tmin).count(), + unit_str); + window_idx = 0; + } +} + +template class srslte::sliding_window_stats; +template class srslte::sliding_window_stats; diff --git a/srsue/hdr/stack/ue_stack_lte.h b/srsue/hdr/stack/ue_stack_lte.h index 1b75588b4..0581a2190 100644 --- a/srsue/hdr/stack/ue_stack_lte.h +++ b/srsue/hdr/stack/ue_stack_lte.h @@ -45,6 +45,7 @@ #include "srslte/common/thread_pool.h" #include "srslte/interfaces/ue_interfaces.h" +#include "srslte/common/time_prof.h" #include "srsue/hdr/ue_metrics_interface.h" #include "ue_stack_base.h" @@ -129,15 +130,14 @@ public: private: void run_thread() final; void run_tti_impl(uint32_t tti); - void calc_tti_stats(const uint32_t duration_us); - const uint32_t TTI_STAT_PERIOD = 1024; - const uint32_t TTI_WARN_THRESHOLD_US = 5000; - const uint32_t SYNC_QUEUE_WARN_THRESHOLD = 5; - const float US_PER_MS = 1000.0; void stop_impl(); - bool running; - srsue::stack_args_t args; + const uint32_t TTI_STAT_PERIOD = 1024; + const std::chrono::milliseconds TTI_WARN_THRESHOLD_MS{5}; + const uint32_t SYNC_QUEUE_WARN_THRESHOLD = 5; + + bool running; + srsue::stack_args_t args; std::vector proc_time; // timers @@ -172,6 +172,9 @@ private: srslte::task_multiqueue pending_tasks; int sync_queue_id = -1, ue_queue_id = -1, gw_queue_id = -1, mac_queue_id = -1, background_queue_id = -1; srslte::task_thread_pool background_tasks; ///< Thread pool used for long, low-priority tasks + + // TTI stats + srslte::tprof tti_tprof; }; } // namespace srsue diff --git a/srsue/src/stack/ue_stack_lte.cc b/srsue/src/stack/ue_stack_lte.cc index c9a65f615..64e333185 100644 --- a/srsue/src/stack/ue_stack_lte.cc +++ b/srsue/src/stack/ue_stack_lte.cc @@ -44,7 +44,8 @@ ue_stack_lte::ue_stack_lte() : nas(&timers), thread("STACK"), pending_tasks(512), - background_tasks(2) + background_tasks(2), + tti_tprof("proc_time", TTI_STAT_PERIOD, TTI_WARN_THRESHOLD_MS) { ue_queue_id = pending_tasks.add_queue(); sync_queue_id = pending_tasks.add_queue(); @@ -53,8 +54,6 @@ ue_stack_lte::ue_stack_lte() : background_queue_id = pending_tasks.add_queue(); background_tasks.start(); - - proc_time.reserve(TTI_STAT_PERIOD); } ue_stack_lte::~ue_stack_lte() @@ -306,7 +305,7 @@ void ue_stack_lte::run_tti(uint32_t tti) void ue_stack_lte::run_tti_impl(uint32_t tti) { - auto start = std::chrono::steady_clock::now(); + tti_tprof.start(); // perform tasks in this TTI mac.run_tti(tti); @@ -315,8 +314,7 @@ void ue_stack_lte::run_tti_impl(uint32_t tti) timers.step_all(); if (args.have_tti_time_stats) { - auto end = std::chrono::steady_clock::now(); - calc_tti_stats(std::chrono::duration_cast(end - start).count()); + tti_tprof.stop(); } // print warning if PHY pushes new TTI messages faster than we process them @@ -325,28 +323,6 @@ void ue_stack_lte::run_tti_impl(uint32_t tti) } } -void ue_stack_lte::calc_tti_stats(const uint32_t duration_us) -{ - log.debug("proc_time=%.2fms\n", duration_us / US_PER_MS); - if (duration_us > TTI_WARN_THRESHOLD_US) { - log.warning("Long TTI proc_time=%.2fms\n", duration_us / US_PER_MS); - } - - proc_time.push_back(duration_us); - if (proc_time.size() == TTI_STAT_PERIOD) { - uint32_t min = 0, max = 0, sum = 0; - for (auto& item : proc_time) { - max = std::max(max, item); - min = std::min(min, item); - sum += item; - } - auto avg = sum / proc_time.size(); - log.info("proc_time=%.2f,%.2f,%.2f (min,avg,max)\n", min / US_PER_MS, avg / US_PER_MS, max / US_PER_MS); - - proc_time.clear(); - } -} - /******************** * low MAC Interface *******************/