From c98aa936ad70d86263fa2fda1408374e0d6d1f60 Mon Sep 17 00:00:00 2001 From: "Zed A. Shaw" Date: Sun, 19 Jan 2025 12:58:05 -0500 Subject: [PATCH] The tracy directory now has an experiment in getting Tracy to work. It's _not_ as easy as it is touted to be. --- Makefile | 2 +- amt/thread.hpp | 253 ++++++++++++++++++++++++++++++++++++++++++ constants.hpp | 4 +- main.cpp | 6 +- meson.build | 2 +- raycaster.cpp | 1 - tracy/TracyClient.cpp | 61 ++++++++++ tracy/meson.build | 77 +++++++++++++ tracy/meson.options | 1 + 9 files changed, 400 insertions(+), 7 deletions(-) create mode 100644 amt/thread.hpp create mode 100644 tracy/TracyClient.cpp create mode 100644 tracy/meson.build create mode 100644 tracy/meson.options diff --git a/Makefile b/Makefile index eca5801..d5638ee 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ debug_build: meson compile -j 10 -C builddir tracy_build: - meson setup --wipe builddir --buildtype debugoptimized -Dtracy_enable=true + meson setup --wipe builddir --buildtype debugoptimized -Dtracy_enable=true -Dtracy:on_demand=true meson compile -j 10 -C builddir test: build diff --git a/amt/thread.hpp b/amt/thread.hpp new file mode 100644 index 0000000..841199b --- /dev/null +++ b/amt/thread.hpp @@ -0,0 +1,253 @@ +#ifndef AMT_THREAD_HPP +#define AMT_THREAD_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace amt { + + // NOTE: Could implement lock-free queue. + template + struct Queue { + using base_type = std::deque; + using value_type = typename base_type::value_type; + using pointer = typename base_type::pointer; + using const_pointer = typename base_type::const_pointer; + using reference = typename base_type::reference; + using const_reference = typename base_type::const_reference; + using iterator = typename base_type::iterator; + using const_iterator = typename base_type::const_iterator; + using reverse_iterator = typename base_type::reverse_iterator; + using const_reverse_iterator = typename base_type::const_reverse_iterator; + using difference_type = typename base_type::difference_type; + using size_type = typename base_type::size_type; + + constexpr Queue() noexcept = default; + constexpr Queue(Queue const&) noexcept = delete; + constexpr Queue(Queue &&) noexcept = default; + constexpr Queue& operator=(Queue const&) noexcept = delete; + constexpr Queue& operator=(Queue &&) noexcept = default; + constexpr ~Queue() noexcept = default; + + template + requires std::same_as, value_type> + void push(U&& u) { + std::lock_guard m(m_mutex); + m_data.push_back(std::forward(u)); + } + + template + void emplace(Args&&... args) { + std::lock_guard m(m_mutex); + m_data.emplace_back(std::forward(args)...); + } + + std::optional pop() { + std::lock_guard m(m_mutex); + if (empty_unsafe()) return std::nullopt; + auto el = std::move(m_data.front()); + m_data.pop_front(); + return std::move(el); + } + + auto size() const noexcept -> size_type { + std::lock_guard m(m_mutex); + return m_data.size(); + } + auto empty() const noexcept -> bool { + std::lock_guard m(m_mutex); + return m_data.empty(); + } + constexpr auto size_unsafe() const noexcept -> size_type { return m_data.size(); } + constexpr auto empty_unsafe() const noexcept -> bool { return m_data.empty(); } + + private: + base_type m_data; + mutable std::mutex m_mutex; + }; + + template + struct ThreadPool; + + template + struct Worker { + using parent_t = ThreadPool*; + using work_t = Fn; + using size_type = std::size_t; + constexpr Worker() noexcept = default; + constexpr Worker(Worker const&) noexcept = default; + constexpr Worker(Worker &&) noexcept = default; + constexpr Worker& operator=(Worker const&) noexcept = default; + constexpr Worker& operator=(Worker &&) noexcept = default; + ~Worker() { + stop(); + } + + void start(parent_t pool, size_type id) { + assert((m_running.load(std::memory_order::acquire) == false) && "Thread is already running"); + m_running.store(true); + m_parent.store(pool); + m_id = id; + m_thread = std::thread([this]() { + while (m_running.load(std::memory_order::relaxed)) { + std::unique_lock lk(m_mutex); + m_cv.wait(lk, [this] { + return !m_queue.empty_unsafe() || !m_running.load(std::memory_order::relaxed); + }); + auto item = pop_task(); + if (!item) { + item = try_steal(); + if (!item) continue; + } + + process_work(std::move(*item)); + } + }); + } + + void process_work(work_t&& work) const noexcept { + std::invoke(std::move(work)); + auto ptr = m_parent.load(); + if (ptr) ptr->task_completed(); + } + + void stop() { + if (!m_running.load()) return; + { + std::lock_guard lock(m_mutex); + m_running.store(false); + } + m_cv.notify_all(); + m_thread.join(); + m_parent.store(nullptr); + } + + void add(work_t&& work) { + std::lock_guard lock(m_mutex); + m_queue.push(std::move(work)); + m_cv.notify_one(); + } + + std::optional pop_task() noexcept { + return m_queue.pop(); + } + + + std::optional try_steal() noexcept { + auto ptr = m_parent.load(); + if (ptr) return ptr->try_steal(m_id); + return {}; + } + + constexpr bool empty() const noexcept { return m_queue.empty_unsafe(); } + constexpr size_type size() const noexcept { return m_queue.size_unsafe(); } + constexpr size_type id() const noexcept { return m_id; } + constexpr bool running() const noexcept { return m_running.load(std::memory_order::relaxed); } + + private: + Queue m_queue{}; + std::thread m_thread; + std::atomic m_running{false}; + std::mutex m_mutex{}; + std::condition_variable m_cv{}; + std::atomic m_parent{nullptr}; + size_type m_id; + }; + + template + struct ThreadPool { + using worker_t = Worker; + using work_t = typename worker_t::work_t; + using size_type = std::size_t; + + constexpr ThreadPool(ThreadPool const&) noexcept = delete; + constexpr ThreadPool(ThreadPool &&) noexcept = default; + constexpr ThreadPool& operator=(ThreadPool const&) noexcept = delete; + constexpr ThreadPool& operator=(ThreadPool &&) noexcept = default; + ~ThreadPool() { + stop(); + } + + ThreadPool(size_type n = std::thread::hardware_concurrency()) + : m_workers(std::max(n, size_type{1})) + { + for (auto i = 0ul; i < m_workers.size(); ++i) { + m_workers[i].start(this, i); + } + } + + void stop() { + for (auto& w: m_workers) w.stop(); + } + + void add(Fn&& work) { + m_active_tasks.fetch_add(1, std::memory_order::relaxed); + m_workers[m_last_added].add(std::move(work)); + m_last_added = (m_last_added + 1) % m_workers.size(); + } + + std::optional try_steal(size_type id) { + for (auto& w: m_workers) { + if (w.id() == id) continue; + auto item = w.pop_task(); + if (item) return item; + } + return {}; + } + + void task_completed() { + if (m_active_tasks.fetch_sub(1, std::memory_order::release) == 1) { + m_wait_cv.notify_all(); + } + } + + void wait() { + std::unique_lock lock(m_wait_mutex); + m_wait_cv.wait(lock, [this] { + return m_active_tasks.load(std::memory_order::acquire) == 0; + }); + } + + + private: + std::vector m_workers; + size_type m_last_added{}; + std::mutex m_wait_mutex; + std::condition_variable m_wait_cv; + std::atomic m_active_tasks{0}; + }; + + using thread_pool_t = ThreadPool>; + + // WARNING: Do not capture the stack variable if you're defering wait on pool. + // If you want to capture them, either capture them value or do "pool.wait()" at the end of the scope. + template + requires (std::is_invocable_v) + constexpr auto parallel_for(thread_pool_t& pool, std::size_t start, std::size_t end, Fn&& body) noexcept { + if (start >= end) return; + + auto const size = (end - start); + auto const chunk_size = std::max(size_t{1}, (size + Split - 1) / Split); + auto const num_chunks = (size + chunk_size - 1) / chunk_size; + + for (auto chunk = 0ul; chunk < num_chunks; ++chunk) { + auto const chunk_start = std::min(start + (chunk * chunk_size), end); + auto const chunk_end = std::min(chunk_start + (chunk_size), end); + pool.add([chunk_start, chunk_end, body] { + for (auto i = chunk_start; i < chunk_end; ++i) { + std::invoke(body, i); + } + }); + } + } +} // nsmespace amt + +#endif // AMT_THREAD_HPP diff --git a/constants.hpp b/constants.hpp index 0d20c1a..45fd7db 100644 --- a/constants.hpp +++ b/constants.hpp @@ -6,8 +6,8 @@ constexpr const int RAY_VIEW_X=(1280 - RAY_VIEW_WIDTH); constexpr const int RAY_VIEW_Y=0; constexpr const int SCREEN_HEIGHT=720; constexpr const int SCREEN_WIDTH=1280; -constexpr const bool VSYNC=false; -constexpr const int FRAME_LIMIT=30; +constexpr const bool VSYNC=true; +constexpr const int FRAME_LIMIT=60; #ifdef NDEBUG constexpr const bool DEBUG_BUILD=false; #else diff --git a/main.cpp b/main.cpp index 4692524..7c3e5af 100644 --- a/main.cpp +++ b/main.cpp @@ -26,8 +26,8 @@ void draw_gui(sf::RenderWindow &window, sf::Text &text, Stats &stats) { window.draw(rect); text.setString( - fmt::format("FPS\nmean:{:>8.5}\nsdev: {:>8.5}\nmin: {:>8.5}\nmax: {:>8.5}\ncount:{:<10}\n\nVSync? {}\nDebug? {}\n\nHit R to reset.", - stats.mean(), stats.stddev(), stats.min, stats.max, stats.n, VSYNC, DEBUG_BUILD)); + fmt::format("FPS\nmean:{:>8.5}\nsdev: {:>8.5}\nmin: {:>8.5}\nmax: {:>8.5}\ncount:{:<10}\n\nVSync? {}\nFR Limit: {}\nDebug? {}\n\nHit R to reset.", + stats.mean(), stats.stddev(), stats.min, stats.max, stats.n, VSYNC, FRAME_LIMIT, DEBUG_BUILD)); window.draw(text); } @@ -57,6 +57,7 @@ int main() { Stats stats; + window.setVerticalSyncEnabled(VSYNC); window.setFramerateLimit(FRAME_LIMIT); while(window.isOpen()) { @@ -66,6 +67,7 @@ int main() { auto elapsed = std::chrono::duration(end - start); stats.sample(1/elapsed.count()); + draw_gui(window, text, stats); window.display(); diff --git a/meson.build b/meson.build index d37a42d..56d8485 100644 --- a/meson.build +++ b/meson.build @@ -35,7 +35,7 @@ dependencies = [ ] # use this for common options only for our executables -cpp_args=[ ] +cpp_args=[] executable('runtests', [ 'dbc.cpp', diff --git a/raycaster.cpp b/raycaster.cpp index a8432e3..eac488d 100644 --- a/raycaster.cpp +++ b/raycaster.cpp @@ -38,7 +38,6 @@ Raycaster::Raycaster(sf::RenderWindow& window, Matrix &map, int width, int heigh spriteDistance($textures.NUM_SPRITES), ZBuffer(width) { - $window.setVerticalSyncEnabled(VSYNC); $view_sprite.setPosition({0, 0}); $pixels = make_unique($width * $height); $textures.load_textures(); diff --git a/tracy/TracyClient.cpp b/tracy/TracyClient.cpp new file mode 100644 index 0000000..6224f48 --- /dev/null +++ b/tracy/TracyClient.cpp @@ -0,0 +1,61 @@ +// +// Tracy profiler +// ---------------- +// +// For fast integration, compile and +// link with this source file (and none +// other) in your executable (or in the +// main DLL / shared object on multi-DLL +// projects). +// + +// Define TRACY_ENABLE to enable profiler. + +#include "common/TracySystem.cpp" + +#ifdef TRACY_ENABLE + +#ifdef _MSC_VER +# pragma warning(push, 0) +#endif + +#include "common/tracy_lz4.cpp" +#include "client/TracyProfiler.cpp" +#include "client/TracyCallstack.cpp" +#include "client/TracySysPower.cpp" +#include "client/TracySysTime.cpp" +#include "client/TracySysTrace.cpp" +#include "common/TracySocket.cpp" +#include "client/tracy_rpmalloc.cpp" +#include "client/TracyDxt1.cpp" +#include "client/TracyAlloc.cpp" +#include "client/TracyOverride.cpp" +#include "client/TracyKCore.cpp" + +#if defined(TRACY_HAS_CALLSTACK) +# if TRACY_HAS_CALLSTACK == 2 || TRACY_HAS_CALLSTACK == 3 || TRACY_HAS_CALLSTACK == 4 || TRACY_HAS_CALLSTACK == 6 +# include "libbacktrace/alloc.cpp" +# include "libbacktrace/dwarf.cpp" +# include "libbacktrace/fileline.cpp" +# include "libbacktrace/mmapio.cpp" +# include "libbacktrace/posix.cpp" +# include "libbacktrace/sort.cpp" +# include "libbacktrace/state.cpp" +# if TRACY_HAS_CALLSTACK == 4 +# include "libbacktrace/macho.cpp" +# else +# include "libbacktrace/elf.cpp" +# endif +# include "common/TracyStackFrames.cpp" +# endif +#endif + +#ifdef _MSC_VER +# pragma comment(lib, "ws2_32.lib") +# pragma comment(lib, "dbghelp.lib") +# pragma comment(lib, "advapi32.lib") +# pragma comment(lib, "user32.lib") +# pragma warning(pop) +#endif + +#endif diff --git a/tracy/meson.build b/tracy/meson.build new file mode 100644 index 0000000..3468e2c --- /dev/null +++ b/tracy/meson.build @@ -0,0 +1,77 @@ +project('raycaster', 'cpp', + version: '0.1.0', + default_options: [ + 'cpp_std=c++20', + #'cpp_args=-DTRACY_ENABLE=1 -D_GLIBCXX_DEBUG=1 -D_GLIBCXX_DEBUG_PEDANTIC=1', + 'cpp_args=-DTRACY_ENABLE=1', + ]) + +#exe_defaults = ['warning_level=2', 'werror=true'] +exe_defaults = [] +cc = meson.get_compiler('cpp') + +tracy = dependency('tracy', static: true) +catch2 = dependency('catch2-with-main') +fmt = dependency('fmt', allow_fallback: true) +freetype2 = dependency('freetype2') +json = dependency('nlohmann_json') +opengl32 = cc.find_library('opengl32', required: true) +winmm = cc.find_library('winmm', required: true) +gdi32 = cc.find_library('gdi32', required: true) +ws2_32 = cc.find_library('ws2_32', required: true) +dbghelp = cc.find_library('dbghelp', required: true) + +sfml_audio = dependency('sfml_audio') +sfml_graphics = dependency('sfml_graphics') +sfml_main = dependency('sfml_main') +sfml_network = dependency('sfml_network') +sfml_system = dependency('sfml_system') +sfml_window = dependency('sfml_window') + +if get_option('tracy_enable') and get_option('buildtype') != 'debugoptimized' + warning('Profiling builds should set --buildtype=debugoptimized') +endif + +dependencies = [ + fmt, json, opengl32, freetype2, + winmm, gdi32, sfml_audio, sfml_graphics, + sfml_main, sfml_network, sfml_system, + sfml_window, ws2_32, dbghelp, tracy +] + +# use this for common options only for our executables +cpp_args=[ ] + +executable('runtests', [ + 'dbc.cpp', + 'matrix.cpp', + 'TracyClient.cpp', + 'tests/base.cpp', + ], override_options: exe_defaults, + dependencies: dependencies + [catch2]) + +executable('zedcaster', [ + 'dbc.cpp', + 'matrix.cpp', + 'config.cpp', + 'texture.cpp', + 'raycaster.cpp', + 'TracyClient.cpp', + 'stats.cpp', + 'main.cpp' + ], + cpp_args: cpp_args, + override_options: exe_defaults, + dependencies: dependencies) + +executable('amtcaster', [ + 'dbc.cpp', + 'config.cpp', + 'amt/texture.cpp', + 'TracyClient.cpp', + 'amt/raycaster.cpp', + 'amt/main.cpp' + ], + cpp_args: ['-std=c++23'], + override_options: exe_defaults, + dependencies: dependencies) diff --git a/tracy/meson.options b/tracy/meson.options new file mode 100644 index 0000000..906ac3a --- /dev/null +++ b/tracy/meson.options @@ -0,0 +1 @@ +option('tracy_enable', type: 'boolean', value: false, description: 'Enable profiling')