diff --git a/Source/Global/NetworkState.cpp b/Source/Global/NetworkState.cpp index beea2e79..a02742c1 100644 --- a/Source/Global/NetworkState.cpp +++ b/Source/Global/NetworkState.cpp @@ -68,6 +68,17 @@ Result> NetworkState::HttpCallCreate() noexcept return call; } +// Coordinates handoff of the client-owned XAsyncBlock between cleanup and completion. +// This prevents CleanupAsyncProvider from canceling a request while HttpCallPerformComplete is +// handing that same XAsyncBlock to XAsyncComplete, after which the client callback may delete it. +// Once CallbackStarted is published, cleanup must never touch clientAsyncBlock again. +enum class HttpPerformClientBlockState : uint8_t +{ + CleanupMayCancel, + CleanupCancelInFlight, + CallbackStarted +}; + struct NetworkState::HttpPerformContext { HttpPerformContext(NetworkState& _state, HCCallHandle _callHandle, XAsyncBlock* _clientAsyncBlock) : @@ -89,6 +100,7 @@ struct NetworkState::HttpPerformContext NetworkState& state; HCCallHandle const callHandle; XAsyncBlock* const clientAsyncBlock; + std::atomic clientBlockState{ HttpPerformClientBlockState::CleanupMayCancel }; XAsyncBlock internalAsyncBlock; }; @@ -101,6 +113,21 @@ HRESULT NetworkState::HttpCallPerformAsync(HCCallHandle call, XAsyncBlock* async return S_OK; } +#ifdef HC_UNITTEST_API +bool NetworkState::CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept +{ + std::unique_lock lock{ m_mutex }; + for (auto context : m_activeHttpRequests) + { + if (context->clientAsyncBlock == async && context->clientBlockState.load(std::memory_order_acquire) != HttpPerformClientBlockState::CallbackStarted) + { + return true; + } + } + return false; +} +#endif + HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const XAsyncProviderData* data) { HttpPerformContext* performContext{ static_cast(data->context) }; @@ -116,7 +143,7 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X RETURN_IF_FAILED(XTaskQueueCreateComposite(workPort, workPort, &performContext->internalAsyncBlock.queue)); std::unique_lock lock{ state.m_mutex }; - state.m_activeHttpRequests.insert(performContext->clientAsyncBlock); + state.m_activeHttpRequests.insert(performContext); lock.unlock(); return performContext->callHandle->PerformAsync(&performContext->internalAsyncBlock); @@ -129,7 +156,7 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X case XAsyncOp::Cleanup: { std::unique_lock lock{ state.m_mutex }; - state.m_activeHttpRequests.erase(performContext->clientAsyncBlock); + state.m_activeHttpRequests.erase(performContext); bool scheduleCleanup = state.ScheduleCleanup(); lock.unlock(); @@ -158,6 +185,47 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X void CALLBACK NetworkState::HttpCallPerformComplete(XAsyncBlock* async) { HttpPerformContext* performContext{ static_cast(async->context) }; + + // Cleanup snapshots requests under m_mutex and then issues XAsyncCancel outside the lock. + // A snapshotted request publishes CleanupCancelInFlight before that lock is released, so + // the completion path waits here until cancel propagation finishes or until it wins the race + // and publishes CallbackStarted itself. + bool clientCallbackMayRun{ false }; + while (!clientCallbackMayRun) + { + switch (performContext->clientBlockState.load(std::memory_order_acquire)) + { + case HttpPerformClientBlockState::CallbackStarted: + { + clientCallbackMayRun = true; + break; + } + + case HttpPerformClientBlockState::CleanupMayCancel: + { + auto expectedState = HttpPerformClientBlockState::CleanupMayCancel; + if (performContext->clientBlockState.compare_exchange_weak( + expectedState, + HttpPerformClientBlockState::CallbackStarted, + std::memory_order_acq_rel, + std::memory_order_acquire)) + { + clientCallbackMayRun = true; + } + break; + } + + case HttpPerformClientBlockState::CleanupCancelInFlight: + { + // Expected transient state while CleanupAsyncProvider is synchronously issuing + // XAsyncCancel for this snapshotted request outside m_mutex. That path restores + // CleanupMayCancel before it leaves, at which point this loop can retry the handoff. + std::this_thread::yield(); + break; + } + } + } + XAsyncComplete(performContext->clientAsyncBlock, XAsyncGetStatus(async, false), 0); } @@ -376,7 +444,7 @@ HRESULT CALLBACK NetworkState::CleanupAsyncProvider(XAsyncOp op, const XAsyncPro { case XAsyncOp::Begin: { - xbox::httpclient::Vector activeRequests; + xbox::httpclient::Vector activeRequestsToCancel; #ifndef HC_NOWEBSOCKETS xbox::httpclient::Vector connectedWebSockets; #endif @@ -389,15 +457,40 @@ HRESULT CALLBACK NetworkState::CleanupAsyncProvider(XAsyncOp op, const XAsyncPro #ifndef HC_NOWEBSOCKETS HC_TRACE_VERBOSE(HTTPCLIENT, "NetworkState::CleanupAsyncProvider::Begin: HTTP active=%llu, WebSocket Connecting=%llu, WebSocket Connected=%llu", state->m_activeHttpRequests.size(), state->m_connectingWebSockets.size(), state->m_connectedWebSockets.size()); #endif - activeRequests.assign(state->m_activeHttpRequests.begin(), state->m_activeHttpRequests.end()); + // No new HTTP performs can enter m_activeHttpRequests after cleanup begins because + // http_singleton::singleton_access(cleanup) detaches the singleton before + // NetworkState::CleanupAsync runs. Snapshot requests here, then cancel them after + // releasing m_mutex. This prevents a race between holding the global cleanup mutex + // across XAsyncCancel and allowing completion to advance a request that cleanup has + // already decided to cancel. + for (auto activeRequest : state->m_activeHttpRequests) + { + auto expectedState = HttpPerformClientBlockState::CleanupMayCancel; + if (activeRequest->clientBlockState.compare_exchange_strong( + expectedState, + HttpPerformClientBlockState::CleanupCancelInFlight, + std::memory_order_acq_rel, + std::memory_order_acquire)) + { + activeRequestsToCancel.push_back(activeRequest); + } + } #ifndef HC_NOWEBSOCKETS connectedWebSockets.assign(state->m_connectedWebSockets.begin(), state->m_connectedWebSockets.end()); #endif } - for (auto& activeRequest : activeRequests) + // The snapshot remains valid outside m_mutex because a request in CleanupCancelInFlight + // cannot publish CallbackStarted, and the active-set entry is only removed during the + // client async cleanup that follows that callback. + for (auto activeRequest : activeRequestsToCancel) { - XAsyncCancel(activeRequest); + XAsyncCancel(activeRequest->clientAsyncBlock); + + // XAsyncCancel synchronously propagated the cancel request to the provider. If the + // request is still alive after that, the completion path may resume and enter the + // client callback. + activeRequest->clientBlockState.store(HttpPerformClientBlockState::CleanupMayCancel, std::memory_order_release); } #ifndef HC_NOWEBSOCKETS for (auto& context : connectedWebSockets) diff --git a/Source/Global/NetworkState.h b/Source/Global/NetworkState.h index 66b1abfa..2e93dc8a 100644 --- a/Source/Global/NetworkState.h +++ b/Source/Global/NetworkState.h @@ -48,6 +48,10 @@ class NetworkState XAsyncBlock* async ) noexcept; +#ifdef HC_UNITTEST_API + bool CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept; +#endif + #ifndef HC_NOWEBSOCKETS public: // WebSocket IWebSocketProvider& WebSocketProvider() noexcept; @@ -82,7 +86,7 @@ class NetworkState struct HttpPerformContext; - Set m_activeHttpRequests; + Set m_activeHttpRequests; XAsyncBlock* m_cleanupAsyncBlock{ nullptr }; // non-owning #ifndef HC_NOWEBSOCKETS diff --git a/Tests/UnitTests/Tests/GlobalTests.cpp b/Tests/UnitTests/Tests/GlobalTests.cpp index 3ec9b36a..81d668fc 100644 --- a/Tests/UnitTests/Tests/GlobalTests.cpp +++ b/Tests/UnitTests/Tests/GlobalTests.cpp @@ -159,6 +159,43 @@ DEFINE_TEST_CLASS(GlobalTests) VERIFY_ARE_EQUAL((DWORD)WAIT_OBJECT_0, WaitForSingleObject(cleanupCompleteEvent, INFINITE)); CloseHandle(cleanupCompleteEvent); } + + DEFINE_TEST_CASE(TestHttpCallCompletionCallbackIsNotCleanupCancelable) + { + VERIFY_SUCCEEDED(HCInitialize(nullptr)); + PumpedTaskQueue pumpedQueue; + + constexpr char mockUrl[]{ "www.bing.com" }; + + HCMockCallHandle mock{ nullptr }; + VERIFY_SUCCEEDED(HCMockCallCreate(&mock)); + VERIFY_SUCCEEDED(HCMockResponseSetStatusCode(mock, 200)); + VERIFY_SUCCEEDED(HCMockAddMock(mock, "GET", mockUrl, nullptr, 0)); + + HCCallHandle call{ nullptr }; + VERIFY_SUCCEEDED(HCHttpCallCreate(&call)); + VERIFY_SUCCEEDED(HCHttpCallRequestSetUrl(call, "GET", mockUrl)); + + bool callbackSawCleanupCancelableRequest{ false }; + HANDLE httpCallCompleteEvent = CreateEvent(nullptr, FALSE, FALSE, nullptr); + + XAsyncThunk httpPerformThunk{ [&](XAsyncBlock* async) + { + auto httpSingleton = get_http_singleton(); + VERIFY_IS_NOT_NULL(httpSingleton.get()); + callbackSawCleanupCancelableRequest = httpSingleton->m_networkState->CanCleanupCancelHttpRequest(async); + SetEvent(httpCallCompleteEvent); + }, pumpedQueue.queue }; + + VERIFY_SUCCEEDED(HCHttpCallPerformAsync(call, &httpPerformThunk.asyncBlock)); + VERIFY_ARE_EQUAL((DWORD)WAIT_OBJECT_0, WaitForSingleObject(httpCallCompleteEvent, INFINITE)); + + VERIFY_IS_FALSE(callbackSawCleanupCancelableRequest); + + CloseHandle(httpCallCompleteEvent); + VERIFY_SUCCEEDED(HCHttpCallCloseHandle(call)); + HCCleanup(); + } }; NAMESPACE_XBOX_HTTP_CLIENT_TEST_END