turn_transport: fix race condition in shutdown function

GitLab: #27
Change-Id: Id3b73839951ef40e99a424148290252a99879a8b
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4ffb09..e71ca35 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -372,6 +372,10 @@
     target_link_libraries(tests_ice PRIVATE dhtnet fmt::fmt PkgConfig::Cppunit)
     add_test(NAME tests_ice COMMAND tests_ice)
 
+    add_executable(tests_turnCache tests/turnCache.cpp)
+    target_link_libraries(tests_turnCache PRIVATE dhtnet fmt::fmt PkgConfig::Cppunit)
+    add_test(NAME tests_turnCache  COMMAND tests_turnCache)
+
     #add_executable(tests_stringutils tests/testString_utils.cpp)
     #target_link_libraries(tests_stringutils PRIVATE dhtnet fmt::fmt PkgConfig::Cppunit)
     #add_test(NAME tests_stringutils COMMAND tests_stringutils)
diff --git a/src/turn/turn_cache.cpp b/src/turn/turn_cache.cpp
index 0cf9267..a390df1 100644
--- a/src/turn/turn_cache.cpp
+++ b/src/turn/turn_cache.cpp
@@ -32,6 +32,8 @@
                      bool enabled)
     : accountId_(accountId)
     , cachePath_(cachePath)
+    , params_(params)
+    , enabled_(enabled)
     , io_context(io_ctx)
     , logger_(logger)
 {
@@ -65,6 +67,7 @@
 std::optional<IpAddr>
 TurnCache::getResolvedTurn(uint16_t family) const
 {
+    std::lock_guard lk(cachedTurnMutex_);
     if (family == AF_INET && cacheTurnV4_) {
         return *cacheTurnV4_;
     } else if (family == AF_INET6 && cacheTurnV6_) {
diff --git a/src/turn/turn_transport.cpp b/src/turn/turn_transport.cpp
index 75525ff..49fdf59 100644
--- a/src/turn/turn_transport.cpp
+++ b/src/turn/turn_transport.cpp
@@ -77,16 +77,25 @@
         ioWorker = std::thread([this] { ioJob(); });
     }
 
-    void shutdown() {
+    void shutdown()
+    {
         std::lock_guard lock(shutdownMtx_);
+        // The ioWorker thread must be stopped before caling pj_turn_sock_destroy,
+        // otherwise there's a potential race condition where pj_turn_sock_destroy
+        // sets the state of the TURN session to PJ_TURN_STATE_DESTROYING, and then
+        // ioWorker tries to execute a callback which expects the session to be in
+        // an earlier state. (This causes a crash if PJSIP was compiled with asserts
+        // enabled, see https://git.jami.net/savoirfairelinux/jami-daemon/-/issues/974)
+        if (ioWorker.joinable()) {
+            stopped_ = true;
+            ioWorker.join();
+        }
         if (relay) {
             pj_turn_sock_destroy(relay);
             relay = nullptr;
         }
         turnLock.reset();
-        if (ioWorker.joinable())
-            ioWorker.join();
-     }
+    }
 
     TurnTransportParams settings;
 
diff --git a/tests/turnCache.cpp b/tests/turnCache.cpp
new file mode 100644
index 0000000..4767c64
--- /dev/null
+++ b/tests/turnCache.cpp
@@ -0,0 +1,155 @@
+/*
+ *  Copyright (C) 2024 Savoir-faire Linux Inc.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 3 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+
+#include <asio/executor_work_guard.hpp>
+#include <asio/io_context.hpp>
+#include <opendht/log.h>
+
+#include "multiplexed_socket.h"
+#include "test_runner.h"
+#include "turn_cache.h"
+
+
+namespace dhtnet {
+namespace test {
+
+class TurnCacheTest : public CppUnit::TestFixture
+{
+public:
+    TurnCacheTest()
+    {
+        testDir_ = std::filesystem::current_path() / "tmp_tests_turnCache";
+    }
+    ~TurnCacheTest() {}
+    static std::string name() { return "TurnCache"; }
+    void setUp();
+    void tearDown();
+
+    std::shared_ptr<asio::io_context> ioContext;
+    std::shared_ptr<std::thread> ioContextRunner;
+    std::shared_ptr<Logger> logger = dht::log::getStdLogger();
+
+private:
+    std::filesystem::path testDir_;
+
+    void testTurnResolution();
+    void testRefreshMultipleTimes();
+
+    CPPUNIT_TEST_SUITE(TurnCacheTest);
+    CPPUNIT_TEST(testTurnResolution);
+    CPPUNIT_TEST(testRefreshMultipleTimes);
+    CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(TurnCacheTest, TurnCacheTest::name());
+
+void
+TurnCacheTest::setUp()
+{
+    if (!ioContext) {
+        ioContext = std::make_shared<asio::io_context>();
+        ioContextRunner = std::make_shared<std::thread>([&] {
+            auto work = asio::make_work_guard(*ioContext);
+            ioContext->run();
+        });
+    }
+}
+
+
+void
+TurnCacheTest::tearDown()
+{
+    ioContext->stop();
+    if (ioContextRunner && ioContextRunner->joinable()) {
+        ioContextRunner->join();
+    }
+    std::filesystem::remove_all(testDir_);
+}
+
+void
+TurnCacheTest::testTurnResolution()
+{
+    auto cachePath = testDir_ / "cache";
+
+    TurnTransportParams turnParams;
+    turnParams.domain = "turn.jami.net";
+    turnParams.realm = "ring";
+    turnParams.username = "ring";
+    turnParams.password = "ring";
+
+    auto turnCache = std::make_shared<TurnCache>("dummyAccount",
+                                                 cachePath.string(),
+                                                 ioContext,
+                                                 logger,
+                                                 turnParams,
+                                                 true);
+    turnCache->refresh();
+
+    // Wait up to 30 seconds for the resolution of the TURN server
+    int timeout = 30 * 1000;
+    int waitTime = 0;
+    int delay = 25;
+    while (waitTime < timeout) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(delay));
+        waitTime += delay;
+
+        if (turnCache->getResolvedTurn(AF_INET) ||
+            turnCache->getResolvedTurn(AF_INET6)) {
+            logger->debug("Waited {} ms for TURN resolution", waitTime);
+            break;
+        }
+    }
+
+    CPPUNIT_ASSERT(turnCache->getResolvedTurn(AF_INET) ||
+                   turnCache->getResolvedTurn(AF_INET6));
+}
+
+void
+TurnCacheTest::testRefreshMultipleTimes()
+{
+    auto cachePath = testDir_ / "cache";
+    bool enabled = true;
+
+    TurnTransportParams turnParams;
+    turnParams.domain = "turn.jami.net";
+    turnParams.realm = "ring";
+    turnParams.username = "ring";
+    turnParams.password = "ring";
+
+    auto turnCache = std::make_shared<TurnCache>("dummyAccount",
+                                                 cachePath.string(),
+                                                 ioContext,
+                                                 logger,
+                                                 turnParams,
+                                                 enabled);
+    // This test is meant to be a regression test for the following issue:
+    // https://git.jami.net/savoirfairelinux/dhtnet/-/issues/27
+    // Calling refresh twice causes the TurnTransport created by the first call to
+    // be destroyed shortly thereafter, which seems to be enough to reliably
+    // trigger the bug described in the GitLab issue linked above.
+    turnCache->refresh();
+    turnCache->refresh();
+}
+
+} // namespace test
+}
+
+JAMI_TEST_RUNNER(dhtnet::test::TurnCacheTest::name())