Skip to content

Commit 2e5386e

Browse files
committed
Protect HTTP cleanup callback handoff
Track active HTTP performs by HttpPerformContext and add an explicit handoff state for the client-owned XAsyncBlock during cleanup and completion. The cleanup path now snapshots requests under the NetworkState mutex, marks cancel-in-flight requests, and issues XAsyncCancel outside the lock. The completion path waits until cleanup finishes cancel propagation before publishing callback ownership and completing the client async block. This preserves the repro commit as the first commit on the branch and records the fix as the second commit.
1 parent 28e0c10 commit 2e5386e

File tree

2 files changed

+90
-12
lines changed

2 files changed

+90
-12
lines changed

Source/Global/NetworkState.cpp

Lines changed: 86 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,17 @@ Result<UniquePtr<HC_CALL>> NetworkState::HttpCallCreate() noexcept
6868
return call;
6969
}
7070

71+
// Coordinates handoff of the client-owned XAsyncBlock between cleanup and completion.
72+
// This prevents CleanupAsyncProvider from canceling a request while HttpCallPerformComplete is
73+
// handing that same XAsyncBlock to XAsyncComplete, after which the client callback may delete it.
74+
// Once CallbackStarted is published, cleanup must never touch clientAsyncBlock again.
75+
enum class HttpPerformClientBlockState : uint8_t
76+
{
77+
CleanupMayCancel,
78+
CleanupCancelInFlight,
79+
CallbackStarted
80+
};
81+
7182
struct NetworkState::HttpPerformContext
7283
{
7384
HttpPerformContext(NetworkState& _state, HCCallHandle _callHandle, XAsyncBlock* _clientAsyncBlock) :
@@ -89,6 +100,7 @@ struct NetworkState::HttpPerformContext
89100
NetworkState& state;
90101
HCCallHandle const callHandle;
91102
XAsyncBlock* const clientAsyncBlock;
103+
std::atomic<HttpPerformClientBlockState> clientBlockState{ HttpPerformClientBlockState::CleanupMayCancel };
92104
XAsyncBlock internalAsyncBlock;
93105
};
94106

@@ -105,9 +117,9 @@ HRESULT NetworkState::HttpCallPerformAsync(HCCallHandle call, XAsyncBlock* async
105117
bool NetworkState::CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept
106118
{
107119
std::unique_lock<std::mutex> lock{ m_mutex };
108-
for (auto activeRequest : m_activeHttpRequests)
120+
for (auto context : m_activeHttpRequests)
109121
{
110-
if (activeRequest == async)
122+
if (context->clientAsyncBlock == async && context->clientBlockState.load(std::memory_order_acquire) != HttpPerformClientBlockState::CallbackStarted)
111123
{
112124
return true;
113125
}
@@ -131,7 +143,7 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X
131143
RETURN_IF_FAILED(XTaskQueueCreateComposite(workPort, workPort, &performContext->internalAsyncBlock.queue));
132144

133145
std::unique_lock<std::mutex> lock{ state.m_mutex };
134-
state.m_activeHttpRequests.insert(performContext->clientAsyncBlock);
146+
state.m_activeHttpRequests.insert(performContext);
135147
lock.unlock();
136148

137149
return performContext->callHandle->PerformAsync(&performContext->internalAsyncBlock);
@@ -144,7 +156,7 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X
144156
case XAsyncOp::Cleanup:
145157
{
146158
std::unique_lock<std::mutex> lock{ state.m_mutex };
147-
state.m_activeHttpRequests.erase(performContext->clientAsyncBlock);
159+
state.m_activeHttpRequests.erase(performContext);
148160
bool scheduleCleanup = state.ScheduleCleanup();
149161
lock.unlock();
150162

@@ -173,6 +185,47 @@ HRESULT CALLBACK NetworkState::HttpCallPerformAsyncProvider(XAsyncOp op, const X
173185
void CALLBACK NetworkState::HttpCallPerformComplete(XAsyncBlock* async)
174186
{
175187
HttpPerformContext* performContext{ static_cast<HttpPerformContext*>(async->context) };
188+
189+
// Cleanup snapshots requests under m_mutex and then issues XAsyncCancel outside the lock.
190+
// A snapshotted request publishes CleanupCancelInFlight before that lock is released, so
191+
// the completion path waits here until cancel propagation finishes or until it wins the race
192+
// and publishes CallbackStarted itself.
193+
bool clientCallbackMayRun{ false };
194+
while (!clientCallbackMayRun)
195+
{
196+
switch (performContext->clientBlockState.load(std::memory_order_acquire))
197+
{
198+
case HttpPerformClientBlockState::CallbackStarted:
199+
{
200+
clientCallbackMayRun = true;
201+
break;
202+
}
203+
204+
case HttpPerformClientBlockState::CleanupMayCancel:
205+
{
206+
auto expectedState = HttpPerformClientBlockState::CleanupMayCancel;
207+
if (performContext->clientBlockState.compare_exchange_weak(
208+
expectedState,
209+
HttpPerformClientBlockState::CallbackStarted,
210+
std::memory_order_acq_rel,
211+
std::memory_order_acquire))
212+
{
213+
clientCallbackMayRun = true;
214+
}
215+
break;
216+
}
217+
218+
case HttpPerformClientBlockState::CleanupCancelInFlight:
219+
{
220+
// Expected transient state while CleanupAsyncProvider is synchronously issuing
221+
// XAsyncCancel for this snapshotted request outside m_mutex. That path restores
222+
// CleanupMayCancel before it leaves, at which point this loop can retry the handoff.
223+
std::this_thread::yield();
224+
break;
225+
}
226+
}
227+
}
228+
176229
XAsyncComplete(performContext->clientAsyncBlock, XAsyncGetStatus(async, false), 0);
177230
}
178231

@@ -391,7 +444,7 @@ HRESULT CALLBACK NetworkState::CleanupAsyncProvider(XAsyncOp op, const XAsyncPro
391444
{
392445
case XAsyncOp::Begin:
393446
{
394-
xbox::httpclient::Vector<XAsyncBlock*> activeRequests;
447+
xbox::httpclient::Vector<HttpPerformContext*> activeRequestsToCancel;
395448
#ifndef HC_NOWEBSOCKETS
396449
xbox::httpclient::Vector<ActiveWebSocketContext*> connectedWebSockets;
397450
#endif
@@ -404,15 +457,40 @@ HRESULT CALLBACK NetworkState::CleanupAsyncProvider(XAsyncOp op, const XAsyncPro
404457
#ifndef HC_NOWEBSOCKETS
405458
HC_TRACE_VERBOSE(HTTPCLIENT, "NetworkState::CleanupAsyncProvider::Begin: HTTP active=%llu, WebSocket Connecting=%llu, WebSocket Connected=%llu", state->m_activeHttpRequests.size(), state->m_connectingWebSockets.size(), state->m_connectedWebSockets.size());
406459
#endif
407-
activeRequests.assign(state->m_activeHttpRequests.begin(), state->m_activeHttpRequests.end());
460+
// No new HTTP performs can enter m_activeHttpRequests after cleanup begins because
461+
// http_singleton::singleton_access(cleanup) detaches the singleton before
462+
// NetworkState::CleanupAsync runs. Snapshot requests here, then cancel them after
463+
// releasing m_mutex. This prevents a race between holding the global cleanup mutex
464+
// across XAsyncCancel and allowing completion to advance a request that cleanup has
465+
// already decided to cancel.
466+
for (auto activeRequest : state->m_activeHttpRequests)
467+
{
468+
auto expectedState = HttpPerformClientBlockState::CleanupMayCancel;
469+
if (activeRequest->clientBlockState.compare_exchange_strong(
470+
expectedState,
471+
HttpPerformClientBlockState::CleanupCancelInFlight,
472+
std::memory_order_acq_rel,
473+
std::memory_order_acquire))
474+
{
475+
activeRequestsToCancel.push_back(activeRequest);
476+
}
477+
}
408478
#ifndef HC_NOWEBSOCKETS
409479
connectedWebSockets.assign(state->m_connectedWebSockets.begin(), state->m_connectedWebSockets.end());
410480
#endif
411481
}
412482

413-
for (auto& activeRequest : activeRequests)
483+
// The snapshot remains valid outside m_mutex because a request in CleanupCancelInFlight
484+
// cannot publish CallbackStarted, and the active-set entry is only removed during the
485+
// client async cleanup that follows that callback.
486+
for (auto activeRequest : activeRequestsToCancel)
414487
{
415-
XAsyncCancel(activeRequest);
488+
XAsyncCancel(activeRequest->clientAsyncBlock);
489+
490+
// XAsyncCancel synchronously propagated the cancel request to the provider. If the
491+
// request is still alive after that, the completion path may resume and enter the
492+
// client callback.
493+
activeRequest->clientBlockState.store(HttpPerformClientBlockState::CleanupMayCancel, std::memory_order_release);
416494
}
417495
#ifndef HC_NOWEBSOCKETS
418496
for (auto& context : connectedWebSockets)

Source/Global/NetworkState.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ class NetworkState
4848
XAsyncBlock* async
4949
) noexcept;
5050

51-
#ifdef HC_UNITTEST_API
52-
bool CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept;
53-
#endif
51+
#ifdef HC_UNITTEST_API
52+
bool CanCleanupCancelHttpRequest(XAsyncBlock* async) noexcept;
53+
#endif
5454

5555
#ifndef HC_NOWEBSOCKETS
5656
public: // WebSocket
@@ -86,7 +86,7 @@ class NetworkState
8686

8787
struct HttpPerformContext;
8888

89-
Set<XAsyncBlock*> m_activeHttpRequests;
89+
Set<HttpPerformContext*> m_activeHttpRequests;
9090
XAsyncBlock* m_cleanupAsyncBlock{ nullptr }; // non-owning
9191

9292
#ifndef HC_NOWEBSOCKETS

0 commit comments

Comments
 (0)