From 28e0c1005b7b1f773a4ba71da5307cf0bf53f555 Mon Sep 17 00:00:00 2001 From: jhugard Date: Thu, 9 Apr 2026 11:52:18 -0700 Subject: [PATCH 1/2] Add repro for HTTP cleanup callback race Add a unit-test-only NetworkState hook that reports whether cleanup can still cancel a request while the client's HTTP completion callback is running. Add TestHttpCallCompletionCallbackIsNotCleanupCancelable in GlobalTests to capture the current behavior with a mocked HTTP call. The test asserts that the request should no longer be cleanup-cancelable once the client callback is executing, which reproduces the lifetime bug that will be addressed in a follow-up fix commit. --- Source/Global/NetworkState.cpp | 15 +++++++++++ Source/Global/NetworkState.h | 4 +++ Tests/UnitTests/Tests/GlobalTests.cpp | 37 +++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/Source/Global/NetworkState.cpp b/Source/Global/NetworkState.cpp index beea2e797..4ad72a761 100644 --- a/Source/Global/NetworkState.cpp +++ b/Source/Global/NetworkState.cpp @@ -101,6 +101,21 @@ HRESULT NetworkState::HttpCallPerformAsync(HCCallHandle call, XAsyncBlock* async return S_OK; } +#ifdef HC_UNITTEST_API +bool NetworkState::CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept +{ + std::unique_lock lock{ m_mutex }; + for (auto activeRequest : m_activeHttpRequests) + { + if (activeRequest == async) + { + return true; + } + } + return false; +} +#endif + HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const XAsyncProviderData* data) { HttpPerformContext* performContext{ static_cast(data->context) }; diff --git a/Source/Global/NetworkState.h b/Source/Global/NetworkState.h index 66b1abfae..b7b65a125 100644 --- a/Source/Global/NetworkState.h +++ b/Source/Global/NetworkState.h @@ -48,6 +48,10 @@ class NetworkState XAsyncBlock* async ) noexcept; + #ifdef HC_UNITTEST_API + bool CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept; + #endif + #ifndef HC_NOWEBSOCKETS public: // WebSocket IWebSocketProvider& WebSocketProvider() noexcept; diff --git a/Tests/UnitTests/Tests/GlobalTests.cpp b/Tests/UnitTests/Tests/GlobalTests.cpp index 3ec9b36a0..81d668fcc 100644 --- a/Tests/UnitTests/Tests/GlobalTests.cpp +++ b/Tests/UnitTests/Tests/GlobalTests.cpp @@ -159,6 +159,43 @@ DEFINE_TEST_CLASS(GlobalTests) VERIFY_ARE_EQUAL((DWORD)WAIT_OBJECT_0, WaitForSingleObject(cleanupCompleteEvent, INFINITE)); CloseHandle(cleanupCompleteEvent); } + + DEFINE_TEST_CASE(TestHttpCallCompletionCallbackIsNotCleanupCancelable) + { + VERIFY_SUCCEEDED(HCInitialize(nullptr)); + PumpedTaskQueue pumpedQueue; + + constexpr char mockUrl[]{ "www.bing.com" }; + + HCMockCallHandle mock{ nullptr }; + VERIFY_SUCCEEDED(HCMockCallCreate(&mock)); + VERIFY_SUCCEEDED(HCMockResponseSetStatusCode(mock, 200)); + VERIFY_SUCCEEDED(HCMockAddMock(mock, "GET", mockUrl, nullptr, 0)); + + HCCallHandle call{ nullptr }; + VERIFY_SUCCEEDED(HCHttpCallCreate(&call)); + VERIFY_SUCCEEDED(HCHttpCallRequestSetUrl(call, "GET", mockUrl)); + + bool callbackSawCleanupCancelableRequest{ false }; + HANDLE httpCallCompleteEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + + XAsyncThunk httpPerformThunk{ [&](XAsyncBlock* async) + { + auto httpSingleton = get_http_singleton(); + VERIFY_IS_NOT_NULL(httpSingleton.get()); + callbackSawCleanupCancelableRequest = httpSingleton->m_networkState->CanCleanupCancelHttpRequest(async); + SetEvent(httpCallCompleteEvent); + }, pumpedQueue.queue }; + + VERIFY_SUCCEEDED(HCHttpCallPerformAsync(call, &httpPerformThunk.asyncBlock)); + VERIFY_ARE_EQUAL((DWORD)WAIT_OBJECT_0, WaitForSingleObject(httpCallCompleteEvent, INFINITE)); + + VERIFY_IS_FALSE(callbackSawCleanupCancelableRequest); + + CloseHandle(httpCallCompleteEvent); + VERIFY_SUCCEEDED(HCHttpCallCloseHandle(call)); + HCCleanup(); + } }; NAMESPACE_XBOX_HTTP_CLIENT_TEST_END From 2e5386e8c27fae99f74b3dff48c9ebdb8e0eb949 Mon Sep 17 00:00:00 2001 From: jhugard Date: Thu, 9 Apr 2026 12:23:06 -0700 Subject: [PATCH 2/2] Protect HTTP cleanup callback handoff Track active HTTP performs by HttpPerformContext and add an explicit handoff state for the client-owned XAsyncBlock during cleanup and completion. The cleanup path now snapshots requests under the NetworkState mutex, marks cancel-in-flight requests, and issues XAsyncCancel outside the lock. The completion path waits until cleanup finishes cancel propagation before publishing callback ownership and completing the client async block. This preserves the repro commit as the first commit on the branch and records the fix as the second commit. --- Source/Global/NetworkState.cpp | 94 +++++++++++++++++++++++++++++++--- Source/Global/NetworkState.h | 8 +-- 2 files changed, 90 insertions(+), 12 deletions(-) diff --git a/Source/Global/NetworkState.cpp b/Source/Global/NetworkState.cpp index 4ad72a761..a02742c1f 100644 --- a/Source/Global/NetworkState.cpp +++ b/Source/Global/NetworkState.cpp @@ -68,6 +68,17 @@ Result> NetworkState::HttpCallCreate() noexcept return call; } +// Coordinates handoff of the client-owned XAsyncBlock between cleanup and completion. +// This prevents CleanupAsyncProvider from canceling a request while HttpCallPerformComplete is +// handing that same XAsyncBlock to XAsyncComplete, after which the client callback may delete it. +// Once CallbackStarted is published, cleanup must never touch clientAsyncBlock again. +enum class HttpPerformClientBlockState : uint8_t +{ + CleanupMayCancel, + CleanupCancelInFlight, + CallbackStarted +}; + struct NetworkState::HttpPerformContext { HttpPerformContext(NetworkState& _state, HCCallHandle _callHandle, XAsyncBlock* _clientAsyncBlock) : @@ -89,6 +100,7 @@ struct NetworkState::HttpPerformContext NetworkState& state; HCCallHandle const callHandle; XAsyncBlock* const clientAsyncBlock; + std::atomic clientBlockState{ HttpPerformClientBlockState::CleanupMayCancel }; XAsyncBlock internalAsyncBlock; }; @@ -105,9 +117,9 @@ HRESULT NetworkState::HttpCallPerformAsync(HCCallHandle call, XAsyncBlock* async bool NetworkState::CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept { std::unique_lock lock{ m_mutex }; - for (auto activeRequest : m_activeHttpRequests) + for (auto context : m_activeHttpRequests) { - if (activeRequest == async) + if (context->clientAsyncBlock == async && context->clientBlockState.load(std::memory_order_acquire) != HttpPerformClientBlockState::CallbackStarted) { return true; } @@ -131,7 +143,7 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X RETURN_IF_FAILED(XTaskQueueCreateComposite(workPort, workPort, &performContext->internalAsyncBlock.queue)); std::unique_lock lock{ state.m_mutex }; - state.m_activeHttpRequests.insert(performContext->clientAsyncBlock); + state.m_activeHttpRequests.insert(performContext); lock.unlock(); return performContext->callHandle->PerformAsync(&performContext->internalAsyncBlock); @@ -144,7 +156,7 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X case XAsyncOp::Cleanup: { std::unique_lock lock{ state.m_mutex }; - state.m_activeHttpRequests.erase(performContext->clientAsyncBlock); + state.m_activeHttpRequests.erase(performContext); bool scheduleCleanup = state.ScheduleCleanup(); lock.unlock(); @@ -173,6 +185,47 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X void CALLBACK NetworkState::HttpCallPerformComplete(XAsyncBlock* async) { HttpPerformContext* performContext{ static_cast(async->context) }; + + // Cleanup snapshots requests under m_mutex and then issues XAsyncCancel outside the lock. + // A snapshotted request publishes CleanupCancelInFlight before that lock is released, so + // the completion path waits here until cancel propagation finishes or until it wins the race + // and publishes CallbackStarted itself. + bool clientCallbackMayRun{ false }; + while (!clientCallbackMayRun) + { + switch (performContext->clientBlockState.load(std::memory_order_acquire)) + { + case HttpPerformClientBlockState::CallbackStarted: + { + clientCallbackMayRun = true; + break; + } + + case HttpPerformClientBlockState::CleanupMayCancel: + { + auto expectedState = HttpPerformClientBlockState::CleanupMayCancel; + if (performContext->clientBlockState.compare_exchange_weak( + expectedState, + HttpPerformClientBlockState::CallbackStarted, + std::memory_order_acq_rel, + std::memory_order_acquire)) + { + clientCallbackMayRun = true; + } + break; + } + + case HttpPerformClientBlockState::CleanupCancelInFlight: + { + // Expected transient state while CleanupAsyncProvider is synchronously issuing + // XAsyncCancel for this snapshotted request outside m_mutex. That path restores + // CleanupMayCancel before it leaves, at which point this loop can retry the handoff. + std::this_thread::yield(); + break; + } + } + } + XAsyncComplete(performContext->clientAsyncBlock, XAsyncGetStatus(async, false), 0); } @@ -391,7 +444,7 @@ HRESULT CALLBACK NetworkState::CleanupAsyncProvider(XAsyncOp op, const XAsyncPro { case XAsyncOp::Begin: { - xbox::httpclient::Vector activeRequests; + xbox::httpclient::Vector activeRequestsToCancel; #ifndef HC_NOWEBSOCKETS xbox::httpclient::Vector connectedWebSockets; #endif @@ -404,15 +457,40 @@ HRESULT CALLBACK NetworkState::CleanupAsyncProvider(XAsyncOp op, const XAsyncPro #ifndef HC_NOWEBSOCKETS HC_TRACE_VERBOSE(HTTPCLIENT, "NetworkState::CleanupAsyncProvider::Begin: HTTP active=%llu, WebSocket Connecting=%llu, WebSocket Connected=%llu", state->m_activeHttpRequests.size(), state->m_connectingWebSockets.size(), state->m_connectedWebSockets.size()); #endif - activeRequests.assign(state->m_activeHttpRequests.begin(), state->m_activeHttpRequests.end()); + // No new HTTP performs can enter m_activeHttpRequests after cleanup begins because + // http_singleton::singleton_access(cleanup) detaches the singleton before + // NetworkState::CleanupAsync runs. Snapshot requests here, then cancel them after + // releasing m_mutex. This prevents a race between holding the global cleanup mutex + // across XAsyncCancel and allowing completion to advance a request that cleanup has + // already decided to cancel. + for (auto activeRequest : state->m_activeHttpRequests) + { + auto expectedState = HttpPerformClientBlockState::CleanupMayCancel; + if (activeRequest->clientBlockState.compare_exchange_strong( + expectedState, + HttpPerformClientBlockState::CleanupCancelInFlight, + std::memory_order_acq_rel, + std::memory_order_acquire)) + { + activeRequestsToCancel.push_back(activeRequest); + } + } #ifndef HC_NOWEBSOCKETS connectedWebSockets.assign(state->m_connectedWebSockets.begin(), state->m_connectedWebSockets.end()); #endif } - for (auto& activeRequest : activeRequests) + // The snapshot remains valid outside m_mutex because a request in CleanupCancelInFlight + // cannot publish CallbackStarted, and the active-set entry is only removed during the + // client async cleanup that follows that callback. + for (auto activeRequest : activeRequestsToCancel) { - XAsyncCancel(activeRequest); + XAsyncCancel(activeRequest->clientAsyncBlock); + + // XAsyncCancel synchronously propagated the cancel request to the provider. If the + // request is still alive after that, the completion path may resume and enter the + // client callback. + activeRequest->clientBlockState.store(HttpPerformClientBlockState::CleanupMayCancel, std::memory_order_release); } #ifndef HC_NOWEBSOCKETS for (auto& context : connectedWebSockets) diff --git a/Source/Global/NetworkState.h b/Source/Global/NetworkState.h index b7b65a125..2e93dc8af 100644 --- a/Source/Global/NetworkState.h +++ b/Source/Global/NetworkState.h @@ -48,9 +48,9 @@ class NetworkState XAsyncBlock* async ) noexcept; - #ifdef HC_UNITTEST_API - bool CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept; - #endif +#ifdef HC_UNITTEST_API + bool CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept; +#endif #ifndef HC_NOWEBSOCKETS public: // WebSocket @@ -86,7 +86,7 @@ class NetworkState struct HttpPerformContext; - Set m_activeHttpRequests; + Set m_activeHttpRequests; XAsyncBlock* m_cleanupAsyncBlock{ nullptr }; // non-owning #ifndef HC_NOWEBSOCKETS