turn_transport: fix race condition in shutdown function
GitLab: #27
Change-Id: Id3b73839951ef40e99a424148290252a99879a8b
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d4ffb09..e71ca35 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -372,6 +372,10 @@
target_link_libraries(tests_ice PRIVATE dhtnet fmt::fmt PkgConfig::Cppunit)
add_test(NAME tests_ice COMMAND tests_ice)
+ add_executable(tests_turnCache tests/turnCache.cpp)
+ target_link_libraries(tests_turnCache PRIVATE dhtnet fmt::fmt PkgConfig::Cppunit)
+ add_test(NAME tests_turnCache COMMAND tests_turnCache)
+
#add_executable(tests_stringutils tests/testString_utils.cpp)
#target_link_libraries(tests_stringutils PRIVATE dhtnet fmt::fmt PkgConfig::Cppunit)
#add_test(NAME tests_stringutils COMMAND tests_stringutils)
diff --git a/src/turn/turn_cache.cpp b/src/turn/turn_cache.cpp
index 0cf9267..a390df1 100644
--- a/src/turn/turn_cache.cpp
+++ b/src/turn/turn_cache.cpp
@@ -32,6 +32,8 @@
bool enabled)
: accountId_(accountId)
, cachePath_(cachePath)
+ , params_(params)
+ , enabled_(enabled)
, io_context(io_ctx)
, logger_(logger)
{
@@ -65,6 +67,7 @@
std::optional<IpAddr>
TurnCache::getResolvedTurn(uint16_t family) const
{
+ std::lock_guard lk(cachedTurnMutex_);
if (family == AF_INET && cacheTurnV4_) {
return *cacheTurnV4_;
} else if (family == AF_INET6 && cacheTurnV6_) {
diff --git a/src/turn/turn_transport.cpp b/src/turn/turn_transport.cpp
index 75525ff..49fdf59 100644
--- a/src/turn/turn_transport.cpp
+++ b/src/turn/turn_transport.cpp
@@ -77,16 +77,25 @@
ioWorker = std::thread([this] { ioJob(); });
}
- void shutdown() {
+ void shutdown()
+ {
std::lock_guard lock(shutdownMtx_);
+ // The ioWorker thread must be stopped before caling pj_turn_sock_destroy,
+ // otherwise there's a potential race condition where pj_turn_sock_destroy
+ // sets the state of the TURN session to PJ_TURN_STATE_DESTROYING, and then
+ // ioWorker tries to execute a callback which expects the session to be in
+ // an earlier state. (This causes a crash if PJSIP was compiled with asserts
+ // enabled, see https://git.jami.net/savoirfairelinux/jami-daemon/-/issues/974)
+ if (ioWorker.joinable()) {
+ stopped_ = true;
+ ioWorker.join();
+ }
if (relay) {
pj_turn_sock_destroy(relay);
relay = nullptr;
}
turnLock.reset();
- if (ioWorker.joinable())
- ioWorker.join();
- }
+ }
TurnTransportParams settings;
diff --git a/tests/turnCache.cpp b/tests/turnCache.cpp
new file mode 100644
index 0000000..4767c64
--- /dev/null
+++ b/tests/turnCache.cpp
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2024 Savoir-faire Linux Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <cppunit/TestAssert.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/extensions/HelperMacros.h>
+
+#include <asio/executor_work_guard.hpp>
+#include <asio/io_context.hpp>
+#include <opendht/log.h>
+
+#include "multiplexed_socket.h"
+#include "test_runner.h"
+#include "turn_cache.h"
+
+
+namespace dhtnet {
+namespace test {
+
+class TurnCacheTest : public CppUnit::TestFixture
+{
+public:
+ TurnCacheTest()
+ {
+ testDir_ = std::filesystem::current_path() / "tmp_tests_turnCache";
+ }
+ ~TurnCacheTest() {}
+ static std::string name() { return "TurnCache"; }
+ void setUp();
+ void tearDown();
+
+ std::shared_ptr<asio::io_context> ioContext;
+ std::shared_ptr<std::thread> ioContextRunner;
+ std::shared_ptr<Logger> logger = dht::log::getStdLogger();
+
+private:
+ std::filesystem::path testDir_;
+
+ void testTurnResolution();
+ void testRefreshMultipleTimes();
+
+ CPPUNIT_TEST_SUITE(TurnCacheTest);
+ CPPUNIT_TEST(testTurnResolution);
+ CPPUNIT_TEST(testRefreshMultipleTimes);
+ CPPUNIT_TEST_SUITE_END();
+};
+
+CPPUNIT_TEST_SUITE_NAMED_REGISTRATION(TurnCacheTest, TurnCacheTest::name());
+
+void
+TurnCacheTest::setUp()
+{
+ if (!ioContext) {
+ ioContext = std::make_shared<asio::io_context>();
+ ioContextRunner = std::make_shared<std::thread>([&] {
+ auto work = asio::make_work_guard(*ioContext);
+ ioContext->run();
+ });
+ }
+}
+
+
+void
+TurnCacheTest::tearDown()
+{
+ ioContext->stop();
+ if (ioContextRunner && ioContextRunner->joinable()) {
+ ioContextRunner->join();
+ }
+ std::filesystem::remove_all(testDir_);
+}
+
+void
+TurnCacheTest::testTurnResolution()
+{
+ auto cachePath = testDir_ / "cache";
+
+ TurnTransportParams turnParams;
+ turnParams.domain = "turn.jami.net";
+ turnParams.realm = "ring";
+ turnParams.username = "ring";
+ turnParams.password = "ring";
+
+ auto turnCache = std::make_shared<TurnCache>("dummyAccount",
+ cachePath.string(),
+ ioContext,
+ logger,
+ turnParams,
+ true);
+ turnCache->refresh();
+
+ // Wait up to 30 seconds for the resolution of the TURN server
+ int timeout = 30 * 1000;
+ int waitTime = 0;
+ int delay = 25;
+ while (waitTime < timeout) {
+ std::this_thread::sleep_for(std::chrono::milliseconds(delay));
+ waitTime += delay;
+
+ if (turnCache->getResolvedTurn(AF_INET) ||
+ turnCache->getResolvedTurn(AF_INET6)) {
+ logger->debug("Waited {} ms for TURN resolution", waitTime);
+ break;
+ }
+ }
+
+ CPPUNIT_ASSERT(turnCache->getResolvedTurn(AF_INET) ||
+ turnCache->getResolvedTurn(AF_INET6));
+}
+
+void
+TurnCacheTest::testRefreshMultipleTimes()
+{
+ auto cachePath = testDir_ / "cache";
+ bool enabled = true;
+
+ TurnTransportParams turnParams;
+ turnParams.domain = "turn.jami.net";
+ turnParams.realm = "ring";
+ turnParams.username = "ring";
+ turnParams.password = "ring";
+
+ auto turnCache = std::make_shared<TurnCache>("dummyAccount",
+ cachePath.string(),
+ ioContext,
+ logger,
+ turnParams,
+ enabled);
+ // This test is meant to be a regression test for the following issue:
+ // https://git.jami.net/savoirfairelinux/dhtnet/-/issues/27
+ // Calling refresh twice causes the TurnTransport created by the first call to
+ // be destroyed shortly thereafter, which seems to be enough to reliably
+ // trigger the bug described in the GitLab issue linked above.
+ turnCache->refresh();
+ turnCache->refresh();
+}
+
+} // namespace test
+}
+
+JAMI_TEST_RUNNER(dhtnet::test::TurnCacheTest::name())