From a1904d6f1f5dcf8690f7ca5b0a76bef8a21c6971 Mon Sep 17 00:00:00 2001 From: Zhou Lin Date: Tue, 7 Apr 2026 07:33:27 +0000 Subject: [PATCH 01/26] remove passing disablereassign in Split and Merge jobs --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index acbfebaee..577035343 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -64,18 +64,17 @@ namespace SPTAG::SPANN { private: ExtraDynamicSearcher* m_extraIndex; SizeType m_headID; - bool m_disableReassign; std::function m_callback; public: - MergeAsyncJob(ExtraDynamicSearcher* extraIndex, SizeType headID, bool disableReassign, std::function p_callback) - : m_extraIndex(extraIndex), m_headID(headID), m_disableReassign(disableReassign), m_callback(std::move(p_callback)) {} + MergeAsyncJob(ExtraDynamicSearcher* extraIndex, SizeType headID, std::function p_callback) + : m_extraIndex(extraIndex), m_headID(headID), m_callback(std::move(p_callback)) {} ~MergeAsyncJob() {} inline void exec(IAbortOperation* p_abort) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot support job.exec(abort)!\n"); } inline void exec(void* p_workSpace, IAbortOperation* p_abort) override { - ErrorCode ret = m_extraIndex->MergePostings((ExtraWorkSpace*)p_workSpace, m_headID, !m_disableReassign); + ErrorCode ret = m_extraIndex->MergePostings((ExtraWorkSpace*)p_workSpace, m_headID); if (ret != ErrorCode::Success) m_extraIndex->m_asyncStatus = ret; if (m_callback != nullptr) { @@ -89,18 +88,17 @@ namespace SPTAG::SPANN { private: ExtraDynamicSearcher* m_extraIndex; SizeType m_headID; - bool m_disableReassign; std::function m_callback; public: - SplitAsyncJob(ExtraDynamicSearcher* extraIndex, SizeType headID, bool disableReassign, std::function p_callback) - : m_extraIndex(extraIndex), m_headID(headID), m_disableReassign(disableReassign), m_callback(std::move(p_callback)) {} + SplitAsyncJob(ExtraDynamicSearcher* extraIndex, SizeType headID, std::function p_callback) + : m_extraIndex(extraIndex), m_headID(headID), m_callback(std::move(p_callback)) {} ~SplitAsyncJob() {} inline void exec(IAbortOperation* p_abort) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot support job.exec(abort)!\n"); } inline void exec(void* p_workSpace, IAbortOperation* p_abort) override { - ErrorCode ret = m_extraIndex->Split((ExtraWorkSpace*)p_workSpace, m_headID, !m_disableReassign); + ErrorCode ret = m_extraIndex->Split((ExtraWorkSpace*)p_workSpace, m_headID); if (ret != ErrorCode::Success) m_extraIndex->m_asyncStatus = ret; if (m_callback != nullptr) { @@ -425,7 +423,7 @@ namespace SPTAG::SPANN { return ErrorCode::Success; } - ErrorCode Split(ExtraWorkSpace* p_exWorkSpace, const SizeType headID, bool reassign = false, bool requirelock = true) + ErrorCode Split(ExtraWorkSpace* p_exWorkSpace, const SizeType headID, bool requirelock = true) { auto splitBegin = std::chrono::high_resolution_clock::now(); std::vector newHeadsID; @@ -742,7 +740,7 @@ namespace SPTAG::SPANN { } m_stat.m_splitNum++; - if (reassign) { + if (!m_opt->m_disableReassign) { auto reassignScanBegin = std::chrono::high_resolution_clock::now(); CollectReAssign(p_exWorkSpace, headID, headVec, newPostingLists, newHeadsID, newHeadsVec, theSameHead); @@ -758,7 +756,7 @@ namespace SPTAG::SPANN { return ErrorCode::Success; } - ErrorCode MergePostings(ExtraWorkSpace *p_exWorkSpace, SizeType headID, bool reassign = false) + ErrorCode MergePostings(ExtraWorkSpace *p_exWorkSpace, SizeType headID) { std::unique_lock lock(m_rwLocks[headID]); @@ -850,7 +848,7 @@ namespace SPTAG::SPANN { // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Locked: %d, to be lock: %d\n", headID, queryResult->VID); if (m_rwLocks.hash_func(queryResult->VID) != m_rwLocks.hash_func(headID)) { if (!anotherLock.try_lock()) { - auto* curJob = new MergeAsyncJob(this, headID, reassign, nullptr); + auto* curJob = new MergeAsyncJob(this, headID, nullptr); m_splitThreadPool->add(curJob); return ErrorCode::Success; } @@ -929,7 +927,7 @@ namespace SPTAG::SPANN { // SPTAGLIB_LOG(Helper::LogLevel::LL_Info,"Release: %d, Release: %d\n", headID, queryResult->VID); lock.unlock(); - if (reassign) + if (!m_opt->m_disableReassign) { postingP = reinterpret_cast(deletedPostingList->data()); for (int j = 0; j < deletedLength; j++) { @@ -994,7 +992,7 @@ namespace SPTAG::SPANN { } } - auto* curJob = new SplitAsyncJob(this, headID, m_opt->m_disableReassign, p_callback); + auto* curJob = new SplitAsyncJob(this, headID, p_callback); m_splitThreadPool->add(curJob); // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Add to thread pool\n"); } @@ -1011,7 +1009,7 @@ namespace SPTAG::SPANN { } } - auto* curJob = new MergeAsyncJob(this, headID, m_opt->m_disableReassign, p_callback); + auto* curJob = new MergeAsyncJob(this, headID, p_callback); m_splitThreadPool->add(curJob); } @@ -1247,7 +1245,7 @@ namespace SPTAG::SPANN { } if (postingSize + appendNum > (m_postingSizeLimit + m_bufferSizeLimit)) { SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "After appending, the number of vectors in %lld exceeds the postingsize + buffersize (%d + %d)! Do split now...\n", (std::int64_t)headID, m_postingSizeLimit, m_bufferSizeLimit); - ret = Split(p_exWorkSpace, headID, !m_opt->m_disableReassign, false); + ret = Split(p_exWorkSpace, headID, false); if (ret != ErrorCode::Success) SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); lock.unlock(); @@ -1275,7 +1273,7 @@ namespace SPTAG::SPANN { // GetDBStats(); // } if (!reassignThreshold) SplitAsync(headID, postingSize); - else Split(p_exWorkSpace, headID, !m_opt->m_disableReassign); + else Split(p_exWorkSpace, headID); } auto appendEnd = std::chrono::high_resolution_clock::now(); double elapsedMSeconds = std::chrono::duration_cast(appendEnd - appendBegin).count(); From be09800298c7f4b806317b101642b527d0dce5b7 Mon Sep 17 00:00:00 2001 From: Zhou Lin Date: Tue, 14 Apr 2026 07:43:37 +0000 Subject: [PATCH 02/26] add delete posting if addhead fail --- AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 577035343..7e09e0cbe 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -567,10 +567,16 @@ namespace SPTAG::SPANN { return ErrorCode::Success; } + std::vector ks(2, 0); + if (m_headIndex->ComputeDistance(args.centers, headVec->c_str()) < m_headIndex->ComputeDistance(args.centers + args._D, headVec->c_str())) { + ks[0] = 1; + } else { + ks[1] = 1; + } SizeType newHeadVID = -1; int first = 0; newPostingLists.resize(2); - for (int k = 0; k < 2; k++) { + for (int k : ks) { if (args.counts[k] == 0) continue; newPostingLists[k].resize(args.counts[k] * m_vectorInfoSize); @@ -706,7 +712,13 @@ namespace SPTAG::SPANN { m_stat.m_putCost += elapsedMSeconds; auto updateHeadBegin = std::chrono::high_resolution_clock::now(); - m_headIndex->AddHeadIndex(args.centers + k * args._D, newHeadVID, version, m_opt->m_dim, m_layer + 1, p_exWorkSpace); + if ((ret = m_headIndex->AddHeadIndex(args.centers + k * args._D, newHeadVID, version, m_opt->m_dim, m_layer + 1, p_exWorkSpace)) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to update head index %lld\n", (std::int64_t)(newHeadVID)); + if (db->Delete(DBKey(newHeadVID)) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to delete gc posting %lld\n", (std::int64_t)(newHeadVID)); + } + return ret; + } auto updateHeadEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(updateHeadEnd - updateHeadBegin).count(); m_stat.m_updateHeadCost += elapsedMSeconds; From 291f3f0fd40797e56ddb719a3a14022a1c5ee744 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 15 Apr 2026 08:32:20 +0000 Subject: [PATCH 03/26] fix split bug --- .../inc/Core/BKT/ParameterDefinitionList.h | 1 + AnnService/inc/Core/Common/BKTree.h | 190 ++++++++++++++++++ .../inc/Core/SPANN/ExtraDynamicSearcher.h | 89 ++++++-- AnnService/inc/Core/SPANN/Options.h | 3 +- .../inc/Core/SPANN/ParameterDefinitionList.h | 1 + AnnService/src/Core/BKT/BKTIndex.cpp | 6 +- AnnService/src/Core/SPANN/SPANNIndex.cpp | 22 +- Test/src/SPFreshTest.cpp | 5 +- 8 files changed, 289 insertions(+), 28 deletions(-) diff --git a/AnnService/inc/Core/BKT/ParameterDefinitionList.h b/AnnService/inc/Core/BKT/ParameterDefinitionList.h index 72b608fa9..44ecdebfe 100644 --- a/AnnService/inc/Core/BKT/ParameterDefinitionList.h +++ b/AnnService/inc/Core/BKT/ParameterDefinitionList.h @@ -15,6 +15,7 @@ DefineBKTParameter(m_pTrees.m_iBKTKmeansK, int, 32L, "BKTKmeansK") DefineBKTParameter(m_pTrees.m_iBKTLeafSize, int, 8L, "BKTLeafSize") DefineBKTParameter(m_pTrees.m_iSamples, int, 1000L, "Samples") DefineBKTParameter(m_pTrees.m_fBalanceFactor, float, 100.0F, "BKTLambdaFactor") +DefineBKTParameter(m_pTrees.m_parallelBuild, bool, false, "ParallelBKTBuild") DefineBKTParameter(m_pGraph.m_iTPTNumber, int, 32L, "TPTNumber") DefineBKTParameter(m_pGraph.m_iTPTLeafSize, int, 2000L, "TPTLeafSize") diff --git a/AnnService/inc/Core/Common/BKTree.h b/AnnService/inc/Core/Common/BKTree.h index e59962bd3..011c01a20 100644 --- a/AnnService/inc/Core/Common/BKTree.h +++ b/AnnService/inc/Core/Common/BKTree.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "inc/Core/VectorIndex.h" #include "CommonUtils.h" @@ -655,6 +657,193 @@ break; } } + // Parallel BKTree Build - processes sibling nodes in parallel + template + void BuildTreesParallel(const Dataset& data, DistCalcMethod distMethod, int numOfThreads, + std::vector* indices = nullptr, std::vector* reverseIndices = nullptr, + bool dynamicK = false, IAbortOperation* abort = nullptr) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Using PARALLEL BKTree build with %d threads.\n", numOfThreads); + + // Helper struct for collecting parallel results + struct ParallelNodeResult { + SizeType parentIndex; + SizeType first, last; + std::vector childCenters; + std::vector childCounts; + bool isLeaf; + bool singleCluster; + SizeType singleClusterCenter; + }; + + struct BKTStackItem { + SizeType index, first, last; + bool debug; + BKTStackItem(SizeType index_ = -1, SizeType first_ = 0, SizeType last_ = 0, bool debug_ = false) + : index(index_), first(first_), last(last_), debug(debug_) {} + }; + + std::vector localindices; + if (indices == nullptr) { + localindices.resize(data.R()); + for (SizeType i = 0; i < (SizeType)localindices.size(); i++) localindices[i] = i; + } + else { + localindices.assign(indices->begin(), indices->end()); + } + + // Create a shared KmeansArgs for DynamicFactorSelect (uses all threads) + KmeansArgs sharedArgs(m_iBKTKmeansK, data.C(), (SizeType)localindices.size(), numOfThreads, distMethod, m_pQuantizer); + + if (m_fBalanceFactor < 0) { + m_fBalanceFactor = DynamicFactorSelect(data, localindices, 0, (SizeType)localindices.size(), sharedArgs, m_iSamples); + } + + std::mt19937 rg; + m_pSampleCenterMap.clear(); + + for (char treeIdx = 0; treeIdx < m_iTreeNumber; treeIdx++) + { + std::shuffle(localindices.begin(), localindices.end(), rg); + + m_pTreeStart.push_back((SizeType)m_pTreeRoots.size()); + m_pTreeRoots.emplace_back((SizeType)localindices.size()); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start to build BKTree %d (parallel)\n", treeIdx + 1); + + // Level-order processing + std::vector currentLevel, nextLevel; + currentLevel.push_back(BKTStackItem(m_pTreeStart[treeIdx], 0, (SizeType)localindices.size(), true)); + + int level = 0; + while (!currentLevel.empty()) { + if (abort && abort->ShouldAbort()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "Abort!!!\n"); + return; + } + + size_t levelSize = currentLevel.size(); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Processing level %d with %zu nodes...\n", level, levelSize); + + std::vector results(levelSize); + + // Parallel phase: Run k-means for all nodes in this level + std::atomic_int nextidx(0); + auto func = [&]() { + while (true) { + int idx = nextidx.fetch_add(1); + if (idx < (int)levelSize) { + BKTStackItem& item = currentLevel[idx]; + ParallelNodeResult& result = results[idx]; + result.parentIndex = item.index; + result.first = item.first; + result.last = item.last; + result.isLeaf = false; + result.singleCluster = false; + + if (item.last - item.first <= m_iBKTLeafSize) { + // Leaf node + result.isLeaf = true; + for (SizeType j = item.first; j < item.last; j++) { + SizeType cid = (reverseIndices == nullptr) ? localindices[j] : reverseIndices->at(localindices[j]); + result.childCenters.push_back(cid); + } + } else { + // K-means clustering - dynamically allocate threads per node + // When few nodes at this level, give more threads to each k-means; + // when many nodes, use 1 thread per k-means (parallelism at node level). + // IMPORTANT: Must use full dataset size because KmeansAssign uses absolute indices + // (args.label[i] where i ranges from first to last, not 0 to rangeSize) + int threadsPerNode = std::max(1, numOfThreads / (int)levelSize); + KmeansArgs localArgs(m_iBKTKmeansK, data.C(), (SizeType)localindices.size(), threadsPerNode, distMethod, m_pQuantizer); + + int dk = m_iBKTKmeansK; + if (dynamicK) { + dk = std::min((item.last - item.first) / m_iBKTLeafSize + 1, m_iBKTKmeansK); + dk = std::max(dk, 2); + localArgs._DK = dk; + } + + int numClusters = KmeansClustering(data, localindices, item.first, item.last, localArgs, + m_iSamples, m_fBalanceFactor, false, abort); + + if (numClusters <= 1) { + result.singleCluster = true; + SizeType end = min(item.last + 1, (SizeType)localindices.size()); + std::sort(localindices.begin() + item.first, localindices.begin() + end); + result.singleClusterCenter = (reverseIndices == nullptr) ? localindices[item.first] : reverseIndices->at(localindices[item.first]); + for (SizeType j = item.first + 1; j < end; j++) { + SizeType cid = (reverseIndices == nullptr) ? localindices[j] : reverseIndices->at(localindices[j]); + result.childCenters.push_back(cid); + } + } else { + SizeType pos = item.first; + for (int k = 0; k < m_iBKTKmeansK; k++) { + if (localArgs.counts[k] == 0) continue; + SizeType cid = (reverseIndices == nullptr) ? localindices[pos + localArgs.counts[k] - 1] : reverseIndices->at(localindices[pos + localArgs.counts[k] - 1]); + result.childCenters.push_back(cid); + result.childCounts.push_back(localArgs.counts[k]); + pos += localArgs.counts[k]; + } + } + } + } else { + return; + } + } + }; + + std::vector mythreads; + // When nodes are few, each k-means uses multiple threads internally, + // so limit outer parallelism to avoid thread over-subscription. + int outerThreads = std::min(numOfThreads, (int)levelSize); + mythreads.reserve(outerThreads); + for (int tid = 0; tid < outerThreads; tid++) + { + mythreads.emplace_back(func); + } + for (auto& thread : mythreads) { thread.join(); } + + // Sequential phase: Build tree structure and prepare next level + nextLevel.clear(); + for (size_t idx = 0; idx < levelSize; idx++) { + ParallelNodeResult& result = results[idx]; + m_pTreeRoots[result.parentIndex].childStart = (SizeType)m_pTreeRoots.size(); + + if (result.isLeaf) { + for (SizeType cid : result.childCenters) { + m_pTreeRoots.emplace_back(cid); + } + } else if (result.singleCluster) { + m_pTreeRoots[result.parentIndex].centerid = result.singleClusterCenter; + m_pTreeRoots[result.parentIndex].childStart = -m_pTreeRoots[result.parentIndex].childStart; + for (SizeType cid : result.childCenters) { + m_pTreeRoots.emplace_back(cid); + m_pSampleCenterMap[cid] = result.singleClusterCenter; + } + m_pSampleCenterMap[-1 - result.singleClusterCenter] = result.parentIndex; + } else { + SizeType pos = result.first; + for (size_t c = 0; c < result.childCenters.size(); c++) { + SizeType nodeIdx = (SizeType)m_pTreeRoots.size(); + m_pTreeRoots.emplace_back(result.childCenters[c]); + if (result.childCounts[c] > 1) { + nextLevel.push_back(BKTStackItem(nodeIdx, pos, pos + result.childCounts[c] - 1, false)); + } + pos += result.childCounts[c]; + } + } + m_pTreeRoots[result.parentIndex].childEnd = (SizeType)m_pTreeRoots.size(); + } + + currentLevel.swap(nextLevel); + level++; + } + + m_pTreeRoots.emplace_back(-1); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "%d BKTree built (parallel), %zu %zu\n", treeIdx + 1, m_pTreeRoots.size() - m_pTreeStart[treeIdx], localindices.size()); + } + } + inline std::uint64_t BufferSize() const { return sizeof(int) + sizeof(SizeType) * m_iTreeNumber + @@ -863,6 +1052,7 @@ break; int m_iTreeNumber, m_iBKTKmeansK, m_iBKTLeafSize, m_iSamples, m_bfs; float m_fBalanceFactor; std::shared_ptr m_pQuantizer; + bool m_parallelBuild = false; }; } } diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 7e09e0cbe..4d25bff9a 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -496,7 +496,10 @@ namespace SPTAG::SPANN { if (VID == headID) hasHead = true; localIndices.push_back(j); } - + if (headj < 0) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split fail: cannot find head in posting! headID:%lld\n", (std::int64_t)headID); + return ErrorCode::Fail; + } // double gcEndTime = sw.getElapsedMs(); // m_splitGcCost += gcEndTime; @@ -578,7 +581,7 @@ namespace SPTAG::SPANN { newPostingLists.resize(2); for (int k : ks) { if (args.counts[k] == 0) continue; - + first = (k == 0) ? 0 : args.counts[0]; newPostingLists[k].resize(args.counts[k] * m_vectorInfoSize); char* ptr = (char*)(newPostingLists[k].c_str()); for (int j = 0; j < args.counts[k]; j++, ptr += m_vectorInfoSize) @@ -615,10 +618,10 @@ namespace SPTAG::SPANN { int retry = 0; while (!anotherLock.try_lock() && retry < 3) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, - "Split: new head VID %lld is being locked. Wait for lock and do " - "merging after getting lock...\n", - (std::int64_t)(newHeadVID)); + //SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, + // "Split: new head VID %lld is being locked. Wait for lock and do " + // "merging after getting lock...\n", + // (std::int64_t)(newHeadVID)); retry++; std::this_thread::sleep_for(std::chrono::milliseconds(3)); } @@ -632,7 +635,7 @@ namespace SPTAG::SPANN { } if (m_headIndex->ContainSample(newHeadVID, m_layer + 1)) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split: new head VID %lld already exists in head index. Do merging...\n", (std::int64_t)(newHeadVID)); + //SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Split: new head VID %lld already exists in head index. Do merging...\n", (std::int64_t)(newHeadVID)); std::string mergedPostingList; std::set vectorIdSet; @@ -645,14 +648,24 @@ namespace SPTAG::SPANN { return ret; } - auto *postingP = reinterpret_cast(newPostingLists[k].data()); - size_t postVectorNum = newPostingLists[k].size() / m_vectorInfoSize; + auto *postingO = reinterpret_cast(newPostingLists[k].data()); + size_t postVectorNumO = newPostingLists[k].size() / m_vectorInfoSize; int currentLength = 0; - for (int j = 0; j < postVectorNum; j++, postingP += m_vectorInfoSize) + bool hasHeadO = false; + for (int j = 0; j < postVectorNumO; j++, postingO += m_vectorInfoSize) { - SizeType VID = *((SizeType *)(postingP)); - vectorIdSet.insert(VID); - mergedPostingList += newPostingLists[k].substr(j * m_vectorInfoSize, m_vectorInfoSize); + SizeType VID = *((SizeType *)(postingO)); + if (vectorIdSet.insert(VID).second) { + mergedPostingList += newPostingLists[k].substr(j * m_vectorInfoSize, m_vectorInfoSize); + currentLength++; + if (VID == newHeadVID) hasHeadO = true; + } + } + + if (!hasHeadO) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "Split: after merging head VID %lld, the head vector is missing in posting list. Add head vector back to posting list.\n", (std::int64_t)(newHeadVID)); + vectorIdSet.insert(newHeadVID); + mergedPostingList = postingList.substr(args.clusterIdx[k] * m_vectorInfoSize, m_vectorInfoSize) + mergedPostingList; currentLength++; } @@ -668,6 +681,8 @@ namespace SPTAG::SPANN { if (vectorIdSet.find(VID) != vectorIdSet.end()) continue; + + vectorIdSet.insert(VID); mergedPostingList += currentPostingList.substr(j * m_vectorInfoSize, m_vectorInfoSize); currentLength++; } @@ -725,7 +740,6 @@ namespace SPTAG::SPANN { } } //SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Head id: %d split into : %d, length: %d\n", headID, newHeadVID, args.counts[k]); - first += args.counts[k]; } if (!theSameHead) { m_headIndex->DeleteIndex(headID, m_layer + 1); @@ -817,6 +831,11 @@ namespace SPTAG::SPANN { currentLength++; } + if (headVec == nullptr) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MergePostings fail: cannot find head vector in posting! headID:%lld\n", (std::int64_t)headID); + return ErrorCode::Fail; + } + if (currentLength > m_mergeThreshold) { if (vectorIdSet.find(headID) == vectorIdSet.end() && headVec != nullptr) { @@ -890,6 +909,10 @@ namespace SPTAG::SPANN { } nextLength++; } + if (resultVec == nullptr) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MergePostings fail: cannot find another head vector in posting! headID:%lld\n", (std::int64_t)(queryResult->VID)); + return ErrorCode::Fail; + } if (currentLength + dedupLength >= m_postingSizeLimit) continue; if (currentLength >= nextLength) @@ -1268,6 +1291,16 @@ namespace SPTAG::SPANN { if ((ret = db->Merge( DBKey(headID), appendPosting, MaxTimeout, &(p_exWorkSpace->m_diskRequests), postingSize)) != ErrorCode::Success) { + if (ret == ErrorCode::Posting_OverFlow) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "Merge failed:Posting overflow when appending to %lld! Do split and then retry...\n", (std::int64_t)headID); + ret = Split(p_exWorkSpace, headID, false); + if (ret != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); + return ret; + } + lock.unlock(); + goto checkDeleted; + } SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed for %lld! Posting Size:%d, limit: %d\n", (std::int64_t)headID, postingSize, m_postingSizeLimit); GetDBStats(); return ret; @@ -1803,6 +1836,7 @@ namespace SPTAG::SPANN { } Helper::Concurrent::ConcurrentSet zeroReplicaSet; + Helper::Concurrent::ConcurrentSet postingsForSplit; std::atomic_int64_t originalSize(0), relaxSize(0); { std::vector mythreads; @@ -1820,9 +1854,10 @@ namespace SPTAG::SPANN { { if (postingListSize[i] <= m_postingSizeLimit) originalSize += postingListSize[i]; - else + else { originalSize += m_postingSizeLimit; - + postingsForSplit.insert(i); + } if (postingListSize[i] <= relaxLimit) { relaxSize += postingListSize[i]; @@ -1894,7 +1929,7 @@ namespace SPTAG::SPANN { p_headGlobaltoLocal[*(p_headToLocal[i])] = i; } } - if (ErrorCode::Success != WriteDownAllPostingToDB(selections, fullVectors, postingListSize, p_headToLocal, p_localToGlobal)) return false; + if (ErrorCode::Success != WriteDownAllPostingToDB(p_headIndex, selections, fullVectors, postingListSize, p_headToLocal, p_localToGlobal)) return false; if (m_opt->m_update && !m_opt->m_allowZeroReplica && zeroReplicaSet.size() > 0) { @@ -1905,6 +1940,10 @@ namespace SPTAG::SPANN { ExtraWorkSpace workSpace; InitWorkSpace(&workSpace); + for (SizeType sp : postingsForSplit) { + SplitAsync(*(p_headToLocal[sp]), postingListSize[sp].load()); + } + for (SizeType it : zeroReplicaSet) { std::shared_ptr vectorSet(new BasicVectorSet(ByteArray((std::uint8_t*)fullVectors->GetVector(it), m_vectorDataSize, false), @@ -1935,7 +1974,7 @@ namespace SPTAG::SPANN { return true; } - ErrorCode WriteDownAllPostingToDB(Selection& p_postingSelections, std::shared_ptr p_fullVectors, std::vector& postingSizes, COMMON::Dataset& p_headToGlobal, COMMON::Dataset& p_localToGlobal) { + ErrorCode WriteDownAllPostingToDB(std::shared_ptr& p_headIndex, Selection& p_postingSelections, std::shared_ptr p_fullVectors, std::vector& postingSizes, COMMON::Dataset& p_headToGlobal, COMMON::Dataset& p_localToGlobal) { std::vector threads; std::atomic vectorsSent(0); @@ -1952,6 +1991,8 @@ namespace SPTAG::SPANN { std::string postinglist(m_vectorInfoSize * postingSizes[index].load(), '\0'); char* ptr = (char*)postinglist.c_str(); std::size_t selectIdx = p_postingSelections.lower_bound(index); + SizeType postingID = *(p_headToGlobal[index]); + bool hasHead = false; for (int j = 0; j < postingSizes[index].load(); ++j) { if (p_postingSelections[selectIdx].node != index) { @@ -1961,14 +2002,22 @@ namespace SPTAG::SPANN { } SizeType localID = p_postingSelections[selectIdx++].tonode; SizeType fullID = (p_localToGlobal.R() > 0) ? *(p_localToGlobal[localID]) : localID; + if (fullID == postingID) hasHead = true; // if (id == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "ID: %d\n", fullID); uint8_t version = m_versionMap.GetVersion(fullID); // First Vector ID, then version, then Vector Serialize(ptr, fullID, version, p_fullVectors->GetVector(localID)); ptr += m_vectorInfoSize; } - ErrorCode tmp; - SizeType postingID = *(p_headToGlobal[index]); + if (!hasHead) { + if (postingSizes[index].load() < m_postingSizeLimit + m_bufferSizeLimit) { + postinglist.append(m_vectorInfoSize, '\0'); + postingSizes[index]++; + } + Serialize(postinglist.data() + m_vectorInfoSize * (postingSizes[index].load() - 1), postingID, m_versionMap.GetVersion(postingID), p_headIndex->GetSample(index)); + } + + ErrorCode tmp; if ((tmp = db->Put(DBKey(postingID), postinglist, MaxTimeout, &(workSpace.m_diskRequests))) != ErrorCode::Success) { diff --git a/AnnService/inc/Core/SPANN/Options.h b/AnnService/inc/Core/SPANN/Options.h index 711221a37..152ec4f74 100644 --- a/AnnService/inc/Core/SPANN/Options.h +++ b/AnnService/inc/Core/SPANN/Options.h @@ -70,7 +70,8 @@ namespace SPTAG { bool m_recursiveCheckSmallCluster; bool m_printSizeCount; std::string m_selectType; - + bool m_parallelBKTBuild; + // Section 3: for build head bool m_buildHead; diff --git a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h index 9f7568004..18f81a769 100644 --- a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h +++ b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h @@ -62,6 +62,7 @@ DefineSelectHeadParameter(m_headVectorCount, SizeType, 0, "Count") DefineSelectHeadParameter(m_recursiveCheckSmallCluster, bool, true, "RecursiveCheckSmallCluster") DefineSelectHeadParameter(m_printSizeCount, bool, true, "PrintSizeCount") DefineSelectHeadParameter(m_selectType, std::string, "BKT", "SelectHeadType") +DefineSelectHeadParameter(m_parallelBKTBuild, bool, false, "ParallelBKTBuild") #endif #ifdef DefineBuildHeadParameter diff --git a/AnnService/src/Core/BKT/BKTIndex.cpp b/AnnService/src/Core/BKT/BKTIndex.cpp index eca9e31b5..768752fa8 100644 --- a/AnnService/src/Core/BKT/BKTIndex.cpp +++ b/AnnService/src/Core/BKT/BKTIndex.cpp @@ -847,7 +847,11 @@ ErrorCode Index::BuildIndex(const void *p_data, SizeType p_vectorNum, Dimensi m_threadPool.init(); auto t1 = std::chrono::high_resolution_clock::now(); - m_pTrees.BuildTrees(m_pSamples, m_iDistCalcMethod, m_iNumberOfThreads); + if (m_pTrees.m_parallelBuild) { + m_pTrees.BuildTreesParallel(m_pSamples, m_iDistCalcMethod, m_iNumberOfThreads); + } else { + m_pTrees.BuildTrees(m_pSamples, m_iDistCalcMethod, m_iNumberOfThreads); + } auto t2 = std::chrono::high_resolution_clock::now(); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Build Tree time (s): %lld\n", std::chrono::duration_cast(t2 - t1).count()); diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index 61f6df564..c9dc2e7e4 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -905,16 +905,22 @@ bool Index::SelectHeadInternal(std::shared_ptr &p_re bkt->m_iSamples = m_options.m_iSamples; bkt->m_iTreeNumber = m_options.m_iTreeNumber; bkt->m_fBalanceFactor = m_options.m_fBalanceFactor; + bkt->m_parallelBuild = m_options.m_parallelBKTBuild; bkt->m_pQuantizer = m_pQuantizer; SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Start invoking BuildTrees.\n"); SPTAGLIB_LOG( Helper::LogLevel::LL_Info, - "BKTKmeansK: %d, BKTLeafSize: %d, Samples: %d, BKTLambdaFactor:%f TreeNumber: %d, ThreadNum: %d.\n", + "BKTKmeansK: %d, BKTLeafSize: %d, Samples: %d, BKTLambdaFactor:%f TreeNumber: %d, ThreadNum: %d, ParallelBuild: %s.\n", bkt->m_iBKTKmeansK, bkt->m_iBKTLeafSize, bkt->m_iSamples, bkt->m_fBalanceFactor, bkt->m_iTreeNumber, - m_options.m_iSelectHeadNumberOfThreads); + m_options.m_iSelectHeadNumberOfThreads, m_options.m_parallelBKTBuild ? "True" : "False"); - bkt->BuildTrees(data, m_options.m_distCalcMethod, m_options.m_iSelectHeadNumberOfThreads, - nullptr, nullptr, true); + if (bkt->m_parallelBuild) { + bkt->BuildTreesParallel(data, m_options.m_distCalcMethod, m_options.m_iSelectHeadNumberOfThreads, + nullptr, nullptr, true); + } else { + bkt->BuildTrees(data, m_options.m_distCalcMethod, m_options.m_iSelectHeadNumberOfThreads, + nullptr, nullptr, true); + } auto t2 = std::chrono::high_resolution_clock::now(); double elapsedSeconds = std::chrono::duration_cast(t2 - t1).count(); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "End invoking BuildTrees.\n"); @@ -1021,7 +1027,12 @@ template ErrorCode Index::BuildIndexInternalLayer(std::shared_pt (m_options.m_indexDirectory + FolderSep + m_options.m_headIDFile).c_str()); } else { - localToGlobalID.Load(ptr, m_topIndex->m_iDataBlockSize, m_topIndex->m_iDataCapacity); + localToGlobalID.Load(ptr, this->m_iDataBlockSize, this->m_iDataCapacity); + if (localToGlobalID.R() != p_reader->GetVectorSet()->Count()) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "HeadIDFile count doesn't match head vector file count!\n"); + localToGlobalID.SetR(0); + } } } @@ -1056,6 +1067,7 @@ template ErrorCode Index::BuildIndexInternalLayer(std::shared_pt m_topIndex = SPTAG::VectorIndex::CreateInstance(m_options.m_indexAlgoType, valueType); m_topIndex->SetParameter("DistCalcMethod", SPTAG::Helper::Convert::ConvertToString(m_options.m_distCalcMethod)); + m_topIndex->SetParameter("ParallelBKTBuild", m_options.m_parallelBKTBuild ? "true" : "false"); m_topIndex->SetQuantizer(m_pQuantizer); for (const auto &iter : m_topParameters) { diff --git a/Test/src/SPFreshTest.cpp b/Test/src/SPFreshTest.cpp index 65498c545..f022064d8 100644 --- a/Test/src/SPFreshTest.cpp +++ b/Test/src/SPFreshTest.cpp @@ -228,6 +228,7 @@ std::shared_ptr BuildLargeIndex(const std::string &outDirectory, st auto vecIndex = VectorIndex::CreateInstance(IndexAlgoType::SPANN, GetEnumValueType()); int maxthreads = std::thread::hardware_concurrency(); int postingLimit = 4 * sizeof(T); + remove((outDirectory + FolderSep + "ssdmapping_0_postings").c_str()); std::string configuration = R"( [Base] DistCalcMethod=)" + distMethod + R"( @@ -248,6 +249,7 @@ std::shared_ptr BuildLargeIndex(const std::string &outDirectory, st SplitFactor=0 SplitThreshold=0 Ratio=0.2 + ParallelBKTBuild=true [BuildHead] isExecute=true @@ -292,6 +294,7 @@ std::shared_ptr BuildLargeIndex(const std::string &outDirectory, st DeletePercentageForRefine=0.4 AsyncAppendQueueSize=0 AllowZeroReplica=false + ShareDB=true Layers=)" + std::to_string(layers) + R"( )"; @@ -696,7 +699,7 @@ void RunBenchmark(const std::string &vectorPath, const std::string &queryPath, c // Build initial index BOOST_TEST_MESSAGE("\n=== Building Index ==="); if (rebuild || !direxists(indexPath.c_str())) { - std::filesystem::remove_all(indexPath); + //std::filesystem::remove_all(indexPath); auto buildstart = std::chrono::high_resolution_clock::now(); if (enableQuantization) From 5cedf608d44db3aa9a57b27f6ed67c4db89e8fc9 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Wed, 15 Apr 2026 12:32:59 +0000 Subject: [PATCH 04/26] fix split gc --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 4d25bff9a..26eb03070 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -376,6 +376,9 @@ namespace SPTAG::SPANN { } vectorCount++; } + if (vecStr == nullptr) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "RefineIndex failed to find head vector in posting %lld\n", (std::int64_t)globalID); + } if (!hasHead && vecStr != nullptr) { Serialize((char*)postingP + vectorCount * m_vectorInfoSize, globalID, m_versionMap.GetVersion(globalID), vecStr->data()); @@ -455,7 +458,7 @@ namespace SPTAG::SPANN { elapsedMSeconds = std::chrono::duration_cast(splitGetEnd - splitGetBegin).count(); m_stat.m_getCost += elapsedMSeconds; // reinterpret postingList to vectors and IDs - auto* postingP = reinterpret_cast(postingList.data()); + uint8_t* postingP = reinterpret_cast(postingList.data()); SizeType postVectorNum = (SizeType)(postingList.size() / m_vectorInfoSize); //SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "DEBUG: db get Posting %d successfully with length %d real length:%d vectorNum:%d\n", headID, (int)(postingList.size()), m_postingSizes.GetSize(headID), postVectorNum); @@ -505,14 +508,15 @@ namespace SPTAG::SPANN { if (localIndices.size() < m_postingSizeLimit) { - if (!hasHead && headj >= 0) { - localIndices.push_back(headj); - } - char* ptr = (char*)(postingList.c_str()); + char* ptr = (char*)(postingList.data()); for (int j = 0; j < localIndices.size(); j++, ptr += m_vectorInfoSize) { if (j == localIndices[j]) continue; - memcpy(ptr, postingList.c_str() + localIndices[j] * m_vectorInfoSize, m_vectorInfoSize); + memcpy(ptr, postingList.data() + localIndices[j] * m_vectorInfoSize, m_vectorInfoSize); + } + if (!hasHead) { + Serialize(ptr, headID, m_versionMap.GetVersion(headID), headVec->data()); + localIndices.push_back(headj); } postingList.resize(localIndices.size() * m_vectorInfoSize); if ((ret=db->Put(DBKey(headID), postingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { @@ -544,20 +548,18 @@ namespace SPTAG::SPANN { // int numClusters = ClusteringSPFresh(smallSample, localIndices, 0, localIndices.size(), args, 10, false, m_opt->m_virtualHead); if (numClusters <= 1) { - int cut = 1; - if (m_opt->m_oneClusterCutMax) cut = m_postingSizeLimit; + int cut = (m_opt->m_oneClusterCutMax)? m_postingSizeLimit: 1; + std::string newpostingList(cut * m_vectorInfoSize, '\0'); - char* ptr = (char*)(newpostingList.c_str()); - float totaldist = 0.0f; - bool hasHead = false; + char* ptr = (char*)(newpostingList.data()); + hasHead = false; for (int j = 0; j < cut; j++, ptr += m_vectorInfoSize) { - totaldist += m_headIndex->ComputeDistance(ptr + m_metaDataSize, args.centers); memcpy(ptr, postingList.c_str() + localIndices[j] * m_vectorInfoSize, m_vectorInfoSize); if (*((SizeType*)(ptr)) == headID) hasHead = true; } if (!hasHead) memcpy(newpostingList.data(), postingList.c_str() + headj * m_vectorInfoSize, m_vectorInfoSize); - SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Cluserting Failed (The same vector), Cluster total dist:%f Only Keep %d vectors.\n", totaldist, cut); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Cluserting Failed (The same vector), Only Keep %d vectors.\n", cut); if ((ret=db->Put(DBKey(headID), newpostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split fail to override posting cut to limit for posting %lld\n", (std::int64_t)(headID)); @@ -689,11 +691,13 @@ namespace SPTAG::SPANN { if (currentLength > (m_postingSizeLimit + m_bufferSizeLimit)) { + /* SPTAGLIB_LOG( Helper::LogLevel::LL_Warning, "Split: merged posting list length %d exceeds hard limit %d after merging head " "VID %lld. Cut to limit and put back to db.\n", currentLength, m_postingSizeLimit + m_bufferSizeLimit, (std::int64_t)(newHeadVID)); + */ mergedPostingList.resize((m_postingSizeLimit + m_bufferSizeLimit) * m_vectorInfoSize); currentLength = m_postingSizeLimit + m_bufferSizeLimit; } @@ -1279,7 +1283,7 @@ namespace SPTAG::SPANN { } } if (postingSize + appendNum > (m_postingSizeLimit + m_bufferSizeLimit)) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "After appending, the number of vectors in %lld exceeds the postingsize + buffersize (%d + %d)! Do split now...\n", (std::int64_t)headID, m_postingSizeLimit, m_bufferSizeLimit); + //SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "After appending, the number of vectors in %lld exceeds the postingsize + buffersize (%d + %d)! Do split now...\n", (std::int64_t)headID, m_postingSizeLimit, m_bufferSizeLimit); ret = Split(p_exWorkSpace, headID, false); if (ret != ErrorCode::Success) SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); From 983b10ebc62f9623d51da336beb9baa48ee7d36e Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Thu, 16 Apr 2026 08:54:14 +0000 Subject: [PATCH 05/26] fix separate DB per layer and remove aggressive split --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 3 --- AnnService/src/Core/SPANN/SPANNIndex.cpp | 19 +++++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 26eb03070..1b238a01a 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -1944,9 +1944,6 @@ namespace SPTAG::SPANN { ExtraWorkSpace workSpace; InitWorkSpace(&workSpace); - for (SizeType sp : postingsForSplit) { - SplitAsync(*(p_headToLocal[sp]), postingListSize[sp].load()); - } for (SizeType it : zeroReplicaSet) { diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index c9dc2e7e4..bd3ae519b 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -119,8 +119,7 @@ template ErrorCode Index::LoadIndexDataFromMemory(const std::vec SizeType globalID = *(m_topLocalToGlobalID[i]); m_topGlobalToLocalID[globalID] = i; } - - PrepareDB(m_db); + if (m_options.m_shareDB) PrepareDB(m_db); m_extraSearchers.resize(m_options.m_layers); for (int i = m_options.m_layers - 1; i >= 0; i--) { if (m_options.m_storage == Storage::STATIC) @@ -188,7 +187,7 @@ ErrorCode Index::LoadIndexData(const std::vector= 0; i--) { if (m_options.m_storage == Storage::STATIC) @@ -1028,9 +1027,10 @@ template ErrorCode Index::BuildIndexInternalLayer(std::shared_pt } else { localToGlobalID.Load(ptr, this->m_iDataBlockSize, this->m_iDataCapacity); - if (localToGlobalID.R() != p_reader->GetVectorSet()->Count()) + SizeType vectorCount = p_reader->GetVectorSet()->Count(); + if (localToGlobalID.R() != vectorCount) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "HeadIDFile count doesn't match head vector file count!\n"); + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "HeadIDFile count %lld doesn't match head vector file count %lld!\n", (int64_t)localToGlobalID.R(), (int64_t)vectorCount); localToGlobalID.SetR(0); } } @@ -1207,7 +1207,7 @@ template ErrorCode Index::BuildIndexInternal(std::shared_ptr ErrorCode Index::BuildIndexInternal(std::shared_ptrGetParameter("VectorFilePath"); std::string vectorPath = m_options.m_indexDirectory + FolderSep + m_options.m_headVectorFile; - if (rename(vectorPath.c_str(), (vectorPath + "_tmp").c_str()) != 0) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to rename vector file to %s\n", (vectorPath + "_tmp").c_str()); + if (rename(vectorInPath.c_str(), (vectorPath + "_tmp").c_str()) != 0) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to rename vector file %s to %s\n", vectorInPath.c_str(), (vectorPath + "_tmp").c_str()); } vectorReader = Helper::VectorSetReader::CreateInstance(vectorOptions); if (ErrorCode::Success != vectorReader->LoadFile(vectorPath + "_tmp")) @@ -1659,8 +1660,6 @@ template ErrorCode Index::DeleteIndex(const void *p_vectors, Siz template void Index::PrepareDB(std::shared_ptr& db, int layer) { - if (!m_options.m_shareDB) return; - if(m_options.m_storage == Storage::FILEIO) { SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPANNIndex:UseFileIO\n"); db.reset(new FileIO(m_options, layer)); From ce480d503d0328ca95a761cd898fa8e398d5fe85 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Thu, 16 Apr 2026 11:14:43 +0000 Subject: [PATCH 06/26] remove Error due to another lock is locked --- AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 1b238a01a..3c1f57942 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -632,7 +632,12 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split: new head VID %lld is being locked after 3 retries. Skip merging and return split failed...\n", (std::int64_t)(newHeadVID)); - return ErrorCode::Fail; + { + std::unique_lock tmplock(m_splitListLock); + m_splitList.unsafe_erase(headID); + } + SplitAsync(headID, postingList.size() / m_vectorInfoSize); + return ErrorCode::Success; } } From 9a3bf79683f011922e71ee9c0bfbc7094872b830 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Thu, 16 Apr 2026 12:40:47 +0000 Subject: [PATCH 07/26] all the layers share the same workspace id pool --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 47 ++++------------- .../inc/Core/SPANN/ExtraStaticSearcher.h | 32 ------------ AnnService/inc/Core/SPANN/IExtraSearcher.h | 2 - AnnService/inc/Core/SPANN/Index.h | 27 ++++++++++ AnnService/src/Core/SPANN/SPANNIndex.cpp | 50 +++++++++++++++---- 5 files changed, 76 insertions(+), 82 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 3c1f57942..c9b04f28e 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -146,7 +146,7 @@ namespace SPTAG::SPANN { m_threads.emplace_back([this, extraIndex] { Job *j; ExtraWorkSpace workSpace; - extraIndex->InitWorkSpace(&workSpace); + extraIndex->GetHeadIndex()->InitWorkSpace(&workSpace); while (get(j)) { try @@ -165,9 +165,6 @@ namespace SPTAG::SPANN { }; private: - std::shared_ptr> m_freeWorkSpaceIds; - std::atomic m_workspaceCount = 0; - std::mutex m_asyncAppendLock; Helper::Concurrent::ConcurrentPriorityQueue m_asyncAppendQueue; @@ -213,13 +210,6 @@ namespace SPTAG::SPANN { m_hardLatencyLimit = std::chrono::microseconds((int)(p_opt.m_latencyLimit) * 1000); m_mergeThreshold = p_opt.m_mergeThreshold; - int maxIOThreads = max(p_opt.m_ioThreads, (2 * max(p_opt.m_searchThreadNum, p_opt.m_iSSDNumberOfThreads) + - p_opt.m_insertThreadNum + p_opt.m_reassignThreadNum + p_opt.m_appendThreadNum)); - m_freeWorkSpaceIds.reset(new Helper::Concurrent::ConcurrentQueue()); - for (int i = 0; i < maxIOThreads; i++) { - m_freeWorkSpaceIds->push(i); - } - m_workspaceCount = maxIOThreads; SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Posting size limit: %d, search limit: %f, merge threshold: %d\n", m_postingSizeLimit, p_opt.m_latencyLimit, m_mergeThreshold); } @@ -266,7 +256,9 @@ namespace SPTAG::SPANN { } return ret; } - + + SPANN::Index* GetHeadIndex() const { return m_headIndex; } + bool CheckIsNeedReassign(std::vector>& newHeadsVec, ValueType* data, std::shared_ptr splitHeadVec, float_t headToSplitHeadDist, float_t currentHeadDist, bool isInSplitHead) { float_t splitHeadDist = m_headIndex->ComputeDistance(data, splitHeadVec->data()); @@ -327,7 +319,7 @@ namespace SPTAG::SPANN { ErrorCode ret; SizeType index = 0; ExtraWorkSpace workSpace; - InitWorkSpace(&workSpace); + m_headIndex->InitWorkSpace(&workSpace); while (true) { index = nextPostingID.fetch_add(1); @@ -1198,27 +1190,6 @@ namespace SPTAG::SPANN { return true; } - void InitWorkSpace(ExtraWorkSpace* p_exWorkSpace, bool clear = false) override - { - if (clear) { - p_exWorkSpace->Clear(m_opt->m_searchInternalResultNum, (max(m_opt->m_postingPageLimit, m_opt->m_searchPostingPageLimit) + m_opt->m_bufferLength) << PageSizeEx, true, m_opt->m_enableDataCompression); - } - else { - p_exWorkSpace->Initialize(m_opt->m_maxCheck, m_opt->m_hashExp, max(m_opt->m_searchInternalResultNum, m_opt->m_reassignK), (max(m_opt->m_postingPageLimit, m_opt->m_searchPostingPageLimit) + m_opt->m_bufferLength) << PageSizeEx, true, m_opt->m_enableDataCompression); - int wid = 0; - if (m_freeWorkSpaceIds == nullptr || !m_freeWorkSpaceIds->try_pop(wid)) - { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "FreeWorkSpaceIds is not initalized or the workspace number is not enough! Please increase iothread number.\n"); - p_exWorkSpace->m_diskRequests[0].m_status = -1; - return; - } - p_exWorkSpace->m_diskRequests[0].m_status = wid; - p_exWorkSpace->m_callback = [m_freeWorkSpaceIds = m_freeWorkSpaceIds, wid] () { - if (m_freeWorkSpaceIds) m_freeWorkSpaceIds->push(wid); - }; - } - } - ErrorCode AsyncAppend(ExtraWorkSpace* p_exWorkSpace, SizeType headID, int appendNum, std::string& appendPosting, int reassignThreshold = 0) { if (m_asyncAppendQueue.size() >= m_opt->m_asyncAppendQueueSize) { @@ -1423,7 +1394,7 @@ namespace SPTAG::SPANN { ErrorCode ret = ErrorCode::Success; auto func = [&]() { ExtraWorkSpace workSpace; - InitWorkSpace(&workSpace); + m_headIndex->InitWorkSpace(&workSpace); size_t index = 0; while (true) { @@ -1512,7 +1483,7 @@ namespace SPTAG::SPANN { return true; } ExtraWorkSpace workSpace; - InitWorkSpace(&workSpace); + m_headIndex->InitWorkSpace(&workSpace); do { countAssignment++; if (countAssignment % 10000 == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Process %d logs\n", countAssignment); @@ -1948,7 +1919,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "SPFresh: finish initialization, zeroReplicaCount:%d\n", (int)(zeroReplicaSet.size())); ExtraWorkSpace workSpace; - InitWorkSpace(&workSpace); + m_headIndex->InitWorkSpace(&workSpace); for (SizeType it : zeroReplicaSet) { @@ -1988,7 +1959,7 @@ namespace SPTAG::SPANN { auto func = [&]() { ExtraWorkSpace workSpace; - InitWorkSpace(&workSpace); + m_headIndex->InitWorkSpace(&workSpace); SizeType index = 0; while (true) { diff --git a/AnnService/inc/Core/SPANN/ExtraStaticSearcher.h b/AnnService/inc/Core/SPANN/ExtraStaticSearcher.h index 34eaf0018..905f93f16 100644 --- a/AnnService/inc/Core/SPANN/ExtraStaticSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraStaticSearcher.h @@ -175,29 +175,6 @@ namespace SPTAG return m_available; } - void InitWorkSpace(ExtraWorkSpace* p_exWorkSpace, bool clear = false) override - { - if (clear) { - p_exWorkSpace->Clear(m_opt->m_searchInternalResultNum, max(m_opt->m_postingPageLimit, m_opt->m_searchPostingPageLimit + 1) << PageSizeEx, false, m_opt->m_enableDataCompression); - } - else { - p_exWorkSpace->Initialize(m_opt->m_maxCheck, m_opt->m_hashExp, m_opt->m_searchInternalResultNum, max(m_opt->m_postingPageLimit, m_opt->m_searchPostingPageLimit + 1) << PageSizeEx, false, m_opt->m_enableDataCompression); - int wid = 0; - if (m_freeWorkSpaceIds == nullptr || !m_freeWorkSpaceIds->try_pop(wid)) - { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "FreeWorkSpaceIds is not initalized or the workspace number is not enough! Please increase iothread number.\n"); - wid = m_workspaceCount.fetch_add(1); - } - for (auto & req : p_exWorkSpace->m_diskRequests) - { - req.m_status = wid; - } - p_exWorkSpace->m_callback = [m_freeWorkSpaceIds = m_freeWorkSpaceIds, wid] () { - if (m_freeWorkSpaceIds) m_freeWorkSpaceIds->push(wid); - }; - } - } - virtual bool LoadIndex(Options& p_opt) override { m_extraFullGraphFile = p_opt.m_indexDirectory + FolderSep + p_opt.m_ssdIndex; std::string curFile = m_extraFullGraphFile + "_" + std::to_string(m_layer); @@ -253,12 +230,6 @@ namespace SPTAG Helper::AIOTimeout.tv_nsec = p_opt.m_iotimeout * 1000; #endif - m_freeWorkSpaceIds.reset(new Helper::Concurrent::ConcurrentQueue()); - int maxIOThreads = max(p_opt.m_searchThreadNum, p_opt.m_iSSDNumberOfThreads); - for (int i = 0; i < maxIOThreads; i++) { - m_freeWorkSpaceIds->push(i); - } - m_workspaceCount = maxIOThreads; m_available = true; return true; } @@ -1660,9 +1631,6 @@ namespace SPTAG private: bool m_available = false; - std::shared_ptr> m_freeWorkSpaceIds; - std::atomic m_workspaceCount = 0; - std::string m_extraFullGraphFile; std::vector m_listInfos; diff --git a/AnnService/inc/Core/SPANN/IExtraSearcher.h b/AnnService/inc/Core/SPANN/IExtraSearcher.h index 35563e8c2..7e5e9527e 100644 --- a/AnnService/inc/Core/SPANN/IExtraSearcher.h +++ b/AnnService/inc/Core/SPANN/IExtraSearcher.h @@ -305,8 +305,6 @@ namespace SPTAG { virtual bool BuildIndex(std::shared_ptr& p_reader, std::shared_ptr p_index, Options& p_opt, COMMON::Dataset& p_headtoLocal, Helper::Concurrent::ConcurrentMap& p_headGlobaltoLocal, COMMON::Dataset& p_localToGlobal, SizeType upperBound = -1) = 0; - - virtual void InitWorkSpace(ExtraWorkSpace* p_exWorkSpace, bool clear = false) = 0; virtual ErrorCode RefineIndex() { diff --git a/AnnService/inc/Core/SPANN/Index.h b/AnnService/inc/Core/SPANN/Index.h index 363ea1e1e..4afe7a051 100644 --- a/AnnService/inc/Core/SPANN/Index.h +++ b/AnnService/inc/Core/SPANN/Index.h @@ -72,6 +72,9 @@ namespace SPTAG std::shared_timed_mutex m_dataDeleteLock; std::shared_timed_mutex m_checkPointLock; + std::shared_ptr> m_freeWorkSpaceIds; + std::atomic m_workspaceCount = 0; + public: Index() { @@ -83,6 +86,30 @@ namespace SPTAG ~Index() {} + void InitWorkSpace(ExtraWorkSpace* p_exWorkSpace, bool clear = false) const + { + if (clear) { + p_exWorkSpace->Clear(m_options.m_searchInternalResultNum, (max(m_options.m_postingPageLimit, m_options.m_searchPostingPageLimit) + m_options.m_bufferLength) << PageSizeEx, true, m_options.m_enableDataCompression); + } + else { + p_exWorkSpace->Initialize(m_options.m_maxCheck, m_options.m_hashExp, max(m_options.m_searchInternalResultNum, m_options.m_reassignK), (max(m_options.m_postingPageLimit, m_options.m_searchPostingPageLimit) + m_options.m_bufferLength) << PageSizeEx, true, m_options.m_enableDataCompression); + int wid = 0; + if (m_freeWorkSpaceIds == nullptr || !m_freeWorkSpaceIds->try_pop(wid)) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "FreeWorkSpaceIds is not initalized or the workspace number is not enough! Please increase iothread number.\n"); + p_exWorkSpace->m_diskRequests[0].m_status = -1; + return; + } + for (auto & req : p_exWorkSpace->m_diskRequests) + { + req.m_status = wid; + } + p_exWorkSpace->m_callback = [m_freeWorkSpaceIds = m_freeWorkSpaceIds, wid] () { + if (m_freeWorkSpaceIds) m_freeWorkSpaceIds->push(wid); + }; + } + } + inline std::shared_ptr GetMemoryIndex() { return m_topIndex; } inline std::shared_ptr GetDiskIndex(int layer = 0) { if (layer < m_extraSearchers.size()) return m_extraSearchers[layer]; else return nullptr; } inline Options* GetOptions() { return &m_options; } diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index bd3ae519b..8081aac7c 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -134,6 +134,16 @@ template ErrorCode Index::LoadIndexDataFromMemory(const std::vec if (!m_extraSearchers[i]->LoadIndex(m_options)) return ErrorCode::Fail; } + + if (m_freeWorkSpaceIds == nullptr) { + m_freeWorkSpaceIds.reset(new Helper::Concurrent::ConcurrentQueue()); + int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + + m_options.m_layers * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum) + 8)); + for (int i = 0; i < maxIOThreads; i++) { + m_freeWorkSpaceIds->push(i); + } + m_workspaceCount = maxIOThreads; + } return ErrorCode::Success; } @@ -209,6 +219,16 @@ ErrorCode Index::LoadIndexData(const std::vectorLoadIndex(m_options))) return ErrorCode::Fail; } + + if (m_freeWorkSpaceIds == nullptr) { + m_freeWorkSpaceIds.reset(new Helper::Concurrent::ConcurrentQueue()); + int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + + m_options.m_layers * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum) + 8)); + for (int i = 0; i < maxIOThreads; i++) { + m_freeWorkSpaceIds->push(i); + } + m_workspaceCount = maxIOThreads; + } return ErrorCode::Success; } @@ -401,11 +421,11 @@ std::shared_ptr Index::GetIterator(const void *p_target, bool if (!extraWorkspace) { extraWorkspace.reset(new ExtraWorkSpace()); - m_extraSearchers.back()->InitWorkSpace(extraWorkspace.get(), false); + InitWorkSpace(extraWorkspace.get(), false); } else { - m_extraSearchers.back()->InitWorkSpace(extraWorkspace.get(), true); + InitWorkSpace(extraWorkspace.get(), true); } extraWorkspace->m_filterFunc = p_filterFunc; extraWorkspace->m_relaxedMono = false; @@ -507,11 +527,11 @@ ErrorCode Index::SearchDiskIndex(QueryResult &p_query, SearchStats *p_stats, if (!workSpace) { workSpace.reset(new ExtraWorkSpace()); - m_extraSearchers.back()->InitWorkSpace(workSpace.get(), false); + InitWorkSpace(workSpace.get(), false); } else { - m_extraSearchers.back()->InitWorkSpace(workSpace.get(), true); + InitWorkSpace(workSpace.get(), true); } p_exWorkSpace = workSpace.get(); } @@ -661,11 +681,11 @@ ErrorCode Index::DebugSearchDiskIndex(QueryResult &p_query, int p_subInternal if (!workSpace) { workSpace.reset(new ExtraWorkSpace()); - m_extraSearchers.back()->InitWorkSpace(workSpace.get(), false); + InitWorkSpace(workSpace.get(), false); } else { - m_extraSearchers.back()->InitWorkSpace(workSpace.get(), true); + InitWorkSpace(workSpace.get(), true); } workSpace->m_deduper.clear(); @@ -1207,6 +1227,16 @@ template ErrorCode Index::BuildIndexInternal(std::shared_ptr()); + int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + + m_options.m_layers * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum) + 8)); + for (int i = 0; i < maxIOThreads; i++) { + m_freeWorkSpaceIds->push(i); + } + m_workspaceCount = maxIOThreads; + } + if (m_db == nullptr && m_options.m_shareDB) PrepareDB(m_db); auto ret = BuildIndexInternalLayer(vectorReader); @@ -1530,11 +1560,11 @@ ErrorCode Index::AddIndex(const void *p_data, SizeType p_vectorNum, Dimension if (!workSpace) { workSpace.reset(new ExtraWorkSpace()); - m_extraSearchers.back()->InitWorkSpace(workSpace.get(), false); + InitWorkSpace(workSpace.get(), false); } else { - m_extraSearchers.back()->InitWorkSpace(workSpace.get(), true); + InitWorkSpace(workSpace.get(), true); } workSpace->m_deduper.clear(); workSpace->m_postingIDs.clear(); @@ -1568,11 +1598,11 @@ ErrorCode Index::Check() if (!workSpace) { workSpace.reset(new ExtraWorkSpace()); - m_extraSearchers[layer]->InitWorkSpace(workSpace.get(), false); + InitWorkSpace(workSpace.get(), false); } else { - m_extraSearchers[layer]->InitWorkSpace(workSpace.get(), true); + InitWorkSpace(workSpace.get(), true); } size_t i = 0; while (true) From 8f0e07584547dde497f401dc7da59734b7d612cf Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Fri, 17 Apr 2026 02:53:48 +0000 Subject: [PATCH 08/26] fix iocp overlap in each layer issue --- .../src/Core/SPANN/ExtraFileController.cpp | 2 +- AnnService/src/Core/SPANN/SPANNIndex.cpp | 42 ++++++++++--------- Test/src/SPFreshTest.cpp | 12 +++--- 3 files changed, 31 insertions(+), 25 deletions(-) diff --git a/AnnService/src/Core/SPANN/ExtraFileController.cpp b/AnnService/src/Core/SPANN/ExtraFileController.cpp index b5db83822..24c839455 100644 --- a/AnnService/src/Core/SPANN/ExtraFileController.cpp +++ b/AnnService/src/Core/SPANN/ExtraFileController.cpp @@ -25,7 +25,7 @@ bool FileIO::BlockController::Initialize(SPANN::Options &p_opt, int p_layer) #ifndef _MSC_VER O_RDWR | O_DIRECT, numblocks, 2, 2, max(p_opt.m_ioThreads, (2 * max(p_opt.m_searchThreadNum, p_opt.m_iSSDNumberOfThreads) + - p_opt.m_insertThreadNum + p_opt.m_reassignThreadNum + p_opt.m_appendThreadNum)), + (p_opt.m_layers + 1) * (p_opt.m_insertThreadNum + p_opt.m_reassignThreadNum + p_opt.m_appendThreadNum))), ((std::uint64_t)p_opt.m_startFileSize) << 30 #else GENERIC_READ | GENERIC_WRITE, numblocks, 2, 2, diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index 8081aac7c..78f34f4b7 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -119,6 +119,17 @@ template ErrorCode Index::LoadIndexDataFromMemory(const std::vec SizeType globalID = *(m_topLocalToGlobalID[i]); m_topGlobalToLocalID[globalID] = i; } + + if (m_freeWorkSpaceIds == nullptr) { + m_freeWorkSpaceIds.reset(new Helper::Concurrent::ConcurrentQueue()); + int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + + (m_options.m_layers + 1) * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum))); + for (int i = 0; i < maxIOThreads; i++) { + m_freeWorkSpaceIds->push(i); + } + m_workspaceCount = maxIOThreads; + } + if (m_options.m_shareDB) PrepareDB(m_db); m_extraSearchers.resize(m_options.m_layers); for (int i = m_options.m_layers - 1; i >= 0; i--) { @@ -135,15 +146,6 @@ template ErrorCode Index::LoadIndexDataFromMemory(const std::vec return ErrorCode::Fail; } - if (m_freeWorkSpaceIds == nullptr) { - m_freeWorkSpaceIds.reset(new Helper::Concurrent::ConcurrentQueue()); - int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + - m_options.m_layers * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum) + 8)); - for (int i = 0; i < maxIOThreads; i++) { - m_freeWorkSpaceIds->push(i); - } - m_workspaceCount = maxIOThreads; - } return ErrorCode::Success; } @@ -197,6 +199,17 @@ ErrorCode Index::LoadIndexData(const std::vector()); + int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + + (m_options.m_layers + 1) * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum))); + for (int i = 0; i < maxIOThreads; i++) { + m_freeWorkSpaceIds->push(i); + } + m_workspaceCount = maxIOThreads; + } + if (m_options.m_shareDB) PrepareDB(m_db); m_extraSearchers.resize(m_options.m_layers); for (int i = m_options.m_layers - 1; i >= 0; i--) { @@ -220,15 +233,6 @@ ErrorCode Index::LoadIndexData(const std::vector()); - int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + - m_options.m_layers * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum) + 8)); - for (int i = 0; i < maxIOThreads; i++) { - m_freeWorkSpaceIds->push(i); - } - m_workspaceCount = maxIOThreads; - } return ErrorCode::Success; } @@ -1230,7 +1234,7 @@ template ErrorCode Index::BuildIndexInternal(std::shared_ptr()); int maxIOThreads = (m_options.m_storage == Storage::STATIC) ? max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) : max(m_options.m_ioThreads, (2 * max(m_options.m_searchThreadNum, m_options.m_iSSDNumberOfThreads) + - m_options.m_layers * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum) + 8)); + (m_options.m_layers + 1) * (m_options.m_insertThreadNum + m_options.m_reassignThreadNum + m_options.m_appendThreadNum))); for (int i = 0; i < maxIOThreads; i++) { m_freeWorkSpaceIds->push(i); } diff --git a/Test/src/SPFreshTest.cpp b/Test/src/SPFreshTest.cpp index f022064d8..b317dba49 100644 --- a/Test/src/SPFreshTest.cpp +++ b/Test/src/SPFreshTest.cpp @@ -627,7 +627,7 @@ ErrorCode QuantizeVectors(const std::shared_ptr& quantizer, template void RunBenchmark(const std::string &vectorPath, const std::string &queryPath, const std::string &truthPath, DistCalcMethod distMethod, const std::string &indexPath, int dimension, int baseVectorCount, - int insertVectorCount, int deleteVectorCount, int batches, int topK, int numSearchThreads, int numInsertThreads, int numQueries, + int insertVectorCount, int deleteVectorCount, int batches, int topK, int numSearchThreads, int numInsertThreads, int numSearchDuringInsertThreads, int numQueries, const std::string &outputFile = "output.json", const bool rebuild = true, const int resume = -1, const std::string &quantizerFilePath = std::string(""), int quantizedDim = 0, int layers = 1) { @@ -852,7 +852,7 @@ void RunBenchmark(const std::string &vectorPath, const std::string &queryPath, c std::shared_ptr addmetaset = TestUtils::TestDataGenerator::LoadMetadataSet(paddmeta, paddmetaidx, insertStart, insertBatchSize); start = std::chrono::high_resolution_clock::now(); InsertVectors(static_cast *>(cloneIndex.get()), numInsertThreads, insertBatchSize, - addset, addmetaset, numSearchThreads, queryset, numQueries, SearchK, &jsonFile, 0); + addset, addmetaset, numSearchDuringInsertThreads, queryset, numQueries, SearchK, &jsonFile, 0); end = std::chrono::high_resolution_clock::now(); } seconds = @@ -1978,6 +1978,7 @@ BOOST_AUTO_TEST_CASE(BenchmarkFromConfig) int topK = iniReader.GetParameter("Benchmark", "TopK", 10); int numSearchThreads = iniReader.GetParameter("Benchmark", "NumSearchThreads", 8); int numInsertThreads = iniReader.GetParameter("Benchmark", "NumInsertThreads", 8); + int numSearchDuringInsertThreads = iniReader.GetParameter("Benchmark", "NumSearchDuringInsertThreads", 1); int numQueries = iniReader.GetParameter("Benchmark", "NumQueries", 1000); int layers = iniReader.GetParameter("Benchmark", "Layers", 1); DistCalcMethod distMethod = iniReader.GetParameter("Benchmark", "DistMethod", DistCalcMethod::L2); @@ -1994,6 +1995,7 @@ BOOST_AUTO_TEST_CASE(BenchmarkFromConfig) BOOST_TEST_MESSAGE("Top-K: " << topK); BOOST_TEST_MESSAGE("SearchThreads: " << numSearchThreads); BOOST_TEST_MESSAGE("InsertThreads: " << numInsertThreads); + BOOST_TEST_MESSAGE("SearchDuringInsertThreads: " << numSearchDuringInsertThreads); BOOST_TEST_MESSAGE("Queries: " << numQueries); BOOST_TEST_MESSAGE("Layers: " << layers); BOOST_TEST_MESSAGE("DistMethod: " << Helper::Convert::ConvertToString(distMethod)); @@ -2012,19 +2014,19 @@ BOOST_AUTO_TEST_CASE(BenchmarkFromConfig) if (valueType == VectorValueType::Float) { RunBenchmark(vectorPath, queryPath, truthPath, distMethod, indexPath, dimension, baseVectorCount, - insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numQueries, outputFile, + insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers); } else if (valueType == VectorValueType::Int8) { RunBenchmark(vectorPath, queryPath, truthPath, distMethod, indexPath, dimension, baseVectorCount, - insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numQueries, + insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers); } else if (valueType == VectorValueType::UInt8) { RunBenchmark(vectorPath, queryPath, truthPath, distMethod, indexPath, dimension, baseVectorCount, - insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numQueries, + insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers); } From 5e47d5e5bd11fbd4484a2376fd9c1e7351f8b726 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Wed, 22 Apr 2026 07:49:50 +0000 Subject: [PATCH 09/26] fix split center order --- AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index c9b04f28e..7aac89f9b 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -573,6 +573,8 @@ namespace SPTAG::SPANN { SizeType newHeadVID = -1; int first = 0; newPostingLists.resize(2); + newHeadsID.resize(2); + newHeadsVec.resize(2); for (int k : ks) { if (args.counts[k] == 0) continue; first = (k == 0) ? 0 : args.counts[0]; @@ -584,8 +586,8 @@ namespace SPTAG::SPANN { //Serialize(ptr, localIndicesInsert[localIndices[first + j]], localIndicesInsertVersion[localIndices[first + j]], smallSample[localIndices[first + j]]); } if (!theSameHead && m_headIndex->ComputeDistance(args.centers + k * args._D, headVec->c_str()) < Epsilon) { - newHeadsID.push_back(headID); - newHeadsVec.push_back(headVec); + newHeadsID[k] = headID; + newHeadsVec[k] = headVec; newHeadVID = headID; theSameHead = true; if (!hasHead && headj != -1) newPostingLists[k] += postingList.substr(headj * m_vectorInfoSize, m_vectorInfoSize); @@ -603,14 +605,14 @@ namespace SPTAG::SPANN { newHeadVID = *((SizeType*)(postingP + args.clusterIdx[k] * m_vectorInfoSize)); uint8_t version = *((uint8_t*)(postingP + args.clusterIdx[k] * m_vectorInfoSize + sizeof(SizeType))); - newHeadsID.push_back(newHeadVID); - newHeadsVec.push_back(std::make_shared((char *)(args.centers + k * args._D), m_vectorDataSize)); + newHeadsID[k] = newHeadVID; + newHeadsVec[k] = std::make_shared((char *)(args.centers + k * args._D), m_vectorDataSize); std::unique_lock anotherLock(m_rwLocks[newHeadVID], std::defer_lock); if (m_rwLocks.hash_func(newHeadVID) != m_rwLocks.hash_func(headID)) { int retry = 0; - while (!anotherLock.try_lock() && retry < 3) + while (!anotherLock.try_lock() && retry < 10) { //SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, // "Split: new head VID %lld is being locked. Wait for lock and do " @@ -622,7 +624,7 @@ namespace SPTAG::SPANN { if (!anotherLock.owns_lock()) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, - "Split: new head VID %lld is being locked after 3 retries. Skip merging and return split failed...\n", + "Split: new head VID %lld is being locked after 10 retries. Skip merging and return split failed...\n", (std::int64_t)(newHeadVID)); { std::unique_lock tmplock(m_splitListLock); From c83d2d5993fe6f9961d6e8e9043b44415c995218 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Fri, 24 Apr 2026 08:08:37 +0000 Subject: [PATCH 10/26] fix headvector version --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 7aac89f9b..16fa39000 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -259,9 +259,9 @@ namespace SPTAG::SPANN { SPANN::Index* GetHeadIndex() const { return m_headIndex; } - bool CheckIsNeedReassign(std::vector>& newHeadsVec, ValueType* data, std::shared_ptr splitHeadVec, float_t headToSplitHeadDist, float_t currentHeadDist, bool isInSplitHead) + bool CheckIsNeedReassign(std::vector>& newHeadsVec, ValueType* data, ValueType* splitHeadVec, float_t headToSplitHeadDist, float_t currentHeadDist, bool isInSplitHead) { - float_t splitHeadDist = m_headIndex->ComputeDistance(data, splitHeadVec->data()); + float_t splitHeadDist = m_headIndex->ComputeDistance(data, splitHeadVec); if (isInSplitHead) { if (splitHeadDist >= currentHeadDist) return false; @@ -459,7 +459,6 @@ namespace SPTAG::SPANN { std::vector localIndices; localIndices.reserve(postVectorNum); uint8_t* vectorId = postingP; - SizeType headj = -1; bool hasHead = false; for (SizeType j = 0; j < postVectorNum; j++, vectorId += m_vectorInfoSize) { @@ -481,19 +480,19 @@ namespace SPTAG::SPANN { } } - if (VID == headID) { - headj = j; - headVec = std::make_shared((char*)vectorId + m_metaDataSize, m_vectorDataSize); - } + if (VID == headID) headVec = std::make_shared((char*)vectorId, m_vectorInfoSize); + //if (VID >= m_versionMap.Count()) SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "DEBUG: vector ID:%d total size:%d\n", VID, m_versionMap.Count()); if (m_versionMap.Deleted(VID) || m_versionMap.GetVersion(VID) != version) continue; if (VID == headID) hasHead = true; localIndices.push_back(j); } - if (headj < 0) { + if (headVec == nullptr) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split fail: cannot find head in posting! headID:%lld\n", (std::int64_t)headID); return ErrorCode::Fail; + } else { + *((uint8_t*)(headVec->data() + sizeof(SizeType))) = m_versionMap.GetVersion(headID); } // double gcEndTime = sw.getElapsedMs(); // m_splitGcCost += gcEndTime; @@ -507,8 +506,8 @@ namespace SPTAG::SPANN { memcpy(ptr, postingList.data() + localIndices[j] * m_vectorInfoSize, m_vectorInfoSize); } if (!hasHead) { - Serialize(ptr, headID, m_versionMap.GetVersion(headID), headVec->data()); - localIndices.push_back(headj); + memcpy(ptr, headVec->data(), m_vectorInfoSize); + localIndices.push_back(0); // just to make sure head is included in posting, the index won't be used } postingList.resize(localIndices.size() * m_vectorInfoSize); if ((ret=db->Put(DBKey(headID), postingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { @@ -550,7 +549,7 @@ namespace SPTAG::SPANN { memcpy(ptr, postingList.c_str() + localIndices[j] * m_vectorInfoSize, m_vectorInfoSize); if (*((SizeType*)(ptr)) == headID) hasHead = true; } - if (!hasHead) memcpy(newpostingList.data(), postingList.c_str() + headj * m_vectorInfoSize, m_vectorInfoSize); + if (!hasHead) memcpy(newpostingList.data(), headVec->data(), m_vectorInfoSize); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Cluserting Failed (The same vector), Only Keep %d vectors.\n", cut); if ((ret=db->Put(DBKey(headID), newpostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { @@ -565,7 +564,7 @@ namespace SPTAG::SPANN { } std::vector ks(2, 0); - if (m_headIndex->ComputeDistance(args.centers, headVec->c_str()) < m_headIndex->ComputeDistance(args.centers + args._D, headVec->c_str())) { + if (m_headIndex->ComputeDistance(args.centers, headVec->c_str() + m_metaDataSize) < m_headIndex->ComputeDistance(args.centers + args._D, headVec->c_str() + m_metaDataSize)) { ks[0] = 1; } else { ks[1] = 1; @@ -583,14 +582,14 @@ namespace SPTAG::SPANN { for (int j = 0; j < args.counts[k]; j++, ptr += m_vectorInfoSize) { memcpy(ptr, postingList.c_str() + localIndices[first + j] * m_vectorInfoSize, m_vectorInfoSize); - //Serialize(ptr, localIndicesInsert[localIndices[first + j]], localIndicesInsertVersion[localIndices[first + j]], smallSample[localIndices[first + j]]); } - if (!theSameHead && m_headIndex->ComputeDistance(args.centers + k * args._D, headVec->c_str()) < Epsilon) { + if (!theSameHead && m_headIndex->ComputeDistance(args.centers + k * args._D, headVec->c_str() + m_metaDataSize) < Epsilon) { newHeadsID[k] = headID; - newHeadsVec[k] = headVec; + newHeadsVec[k] = headVec->substr(m_metaDataSize, m_vectorDataSize); newHeadVID = headID; theSameHead = true; - if (!hasHead && headj != -1) newPostingLists[k] += postingList.substr(headj * m_vectorInfoSize, m_vectorInfoSize); + if (!hasHead) newPostingLists[k] += *headVec; + auto splitPutBegin = std::chrono::high_resolution_clock::now(); if ((ret=db->Put(DBKey(newHeadVID), newPostingLists[k], MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to override posting %lld\n", (std::int64_t)(newHeadVID)); @@ -837,6 +836,8 @@ namespace SPTAG::SPANN { if (headVec == nullptr) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MergePostings fail: cannot find head vector in posting! headID:%lld\n", (std::int64_t)headID); return ErrorCode::Fail; + } else { + *((uint8_t*)(headVec->data() + sizeof(SizeType))) = m_versionMap.GetVersion(headID); } if (currentLength > m_mergeThreshold) @@ -1061,11 +1062,11 @@ namespace SPTAG::SPANN { std::vector &postingLists, std::vector &newHeadsID, std::vector> &newHeadsVec, bool theSameHead) { - auto headVector = reinterpret_cast(headVec->data()); + auto headVector = reinterpret_cast(headVec->data() + m_metaDataSize); std::vector newHeadsDist; std::set reAssignVectorsTopK; - newHeadsDist.push_back(m_headIndex->ComputeDistance(headVec->data(), newHeadsVec[0]->data())); - newHeadsDist.push_back(m_headIndex->ComputeDistance(headVec->data(), newHeadsVec[1]->data())); + newHeadsDist.push_back(m_headIndex->ComputeDistance(headVector, newHeadsVec[0]->data())); + newHeadsDist.push_back(m_headIndex->ComputeDistance(headVector, newHeadsVec[1]->data())); for (int i = 0; i < postingLists.size(); i++) { auto& postingList = postingLists[i]; size_t postVectorNum = postingList.size() / m_vectorInfoSize; @@ -1079,7 +1080,7 @@ namespace SPTAG::SPANN { if (reAssignVectorsTopK.find(vid) == reAssignVectorsTopK.end() && !m_versionMap.Deleted(vid) && m_versionMap.GetVersion(vid) == version) { m_stat.m_reAssignScanNum++; float dist = m_headIndex->ComputeDistance(newHeadsVec[i]->data(), vector); - if (CheckIsNeedReassign(newHeadsVec, vector, headVec, newHeadsDist[i], dist, true)) { + if (CheckIsNeedReassign(newHeadsVec, vector, headVector, newHeadsDist[i], dist, true)) { ReassignAsync(std::make_shared((char*)vectorId, m_vectorInfoSize), newHeadsID[i]); reAssignVectorsTopK.insert(vid); } @@ -1141,7 +1142,7 @@ namespace SPTAG::SPANN { if (reAssignVectorsTopK.find(vid) == reAssignVectorsTopK.end() && !m_versionMap.Deleted(vid) && m_versionMap.GetVersion(vid) == version) { m_stat.m_reAssignScanNum++; float dist = m_headIndex->ComputeDistance(HeadPrevTopKVec[i]->data(), vector); - if (CheckIsNeedReassign(newHeadsVec, vector, headVec, newHeadsDist[i], dist, false)) { + if (CheckIsNeedReassign(newHeadsVec, vector, headVector, newHeadsDist[i], dist, false)) { ReassignAsync(std::make_shared((char*)vectorId, m_vectorInfoSize), HeadPrevTopK[i]); reAssignVectorsTopK.insert(vid); } From 0d5d43efde363229958fbfebab7a796026eccf01 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Fri, 24 Apr 2026 08:08:58 +0000 Subject: [PATCH 11/26] remove ChecksumCheck --- Test/src/SPFreshTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Test/src/SPFreshTest.cpp b/Test/src/SPFreshTest.cpp index b317dba49..62aa7f258 100644 --- a/Test/src/SPFreshTest.cpp +++ b/Test/src/SPFreshTest.cpp @@ -187,7 +187,7 @@ std::shared_ptr BuildIndex(const std::string &outDirectory, std::sh StartFileSizeGB=1 OneClusterCutMax=true ConsistencyCheck=true - ChecksumCheck=true + ChecksumCheck=false ChecksumInRead=false AsyncMergeInSearch=false DeletePercentageForRefine=0.4 From 339dabf1d15ea99dff5c5fd1a05e6126024f3742 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Fri, 24 Apr 2026 08:27:38 +0000 Subject: [PATCH 12/26] fix compiling issues --- AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 16fa39000..3fb7f8818 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -259,7 +259,7 @@ namespace SPTAG::SPANN { SPANN::Index* GetHeadIndex() const { return m_headIndex; } - bool CheckIsNeedReassign(std::vector>& newHeadsVec, ValueType* data, ValueType* splitHeadVec, float_t headToSplitHeadDist, float_t currentHeadDist, bool isInSplitHead) + bool CheckIsNeedReassign(std::vector>& newHeadsVec, const ValueType* data, const ValueType* splitHeadVec, float_t headToSplitHeadDist, float_t currentHeadDist, bool isInSplitHead) { float_t splitHeadDist = m_headIndex->ComputeDistance(data, splitHeadVec); @@ -585,7 +585,7 @@ namespace SPTAG::SPANN { } if (!theSameHead && m_headIndex->ComputeDistance(args.centers + k * args._D, headVec->c_str() + m_metaDataSize) < Epsilon) { newHeadsID[k] = headID; - newHeadsVec[k] = headVec->substr(m_metaDataSize, m_vectorDataSize); + newHeadsVec[k] = std::make_shared(headVec->c_str() + m_metaDataSize, m_vectorDataSize); newHeadVID = headID; theSameHead = true; if (!hasHead) newPostingLists[k] += *headVec; From 563a31426008e811845ffedff1ebff94dcb03d20 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Thu, 30 Apr 2026 02:36:47 +0000 Subject: [PATCH 13/26] align two code bases --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 400 +++--------------- .../inc/Core/SPANN/ExtraTiKVController.h | 169 +++++++- AnnService/src/Core/SPANN/SPANNIndex.cpp | 6 +- Test/src/VersionMapTest.cpp | 2 +- 4 files changed, 223 insertions(+), 354 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 7f3b9dab9..c27e437b9 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -51,73 +51,6 @@ extern "C" bool RocksDbIOUringEnable() { return true; } namespace SPTAG::SPANN { - // Simple sharded LRU cache for posting vector counts. - // Thread-safe: each shard has its own mutex. - class PostingCountCache { - public: - PostingCountCache(size_t capacity = 100000, int shards = 16) - : m_shards(shards), m_capacity(std::max(capacity / shards, (size_t)1)) { - m_data.resize(shards); - m_mutexes = std::make_unique(shards); - } - - // Returns (count, true) on hit, (0, false) on miss. - std::pair Get(SizeType headID) { - int s = Shard(headID); - std::lock_guard lock(m_mutexes[s]); - auto& shard = m_data[s]; - auto it = shard.map.find(headID); - if (it == shard.map.end()) return {0, false}; - // Move to front (most recently used) - shard.order.splice(shard.order.begin(), shard.order, it->second); - return {it->second->second, true}; - } - - void Put(SizeType headID, int count) { - int s = Shard(headID); - std::lock_guard lock(m_mutexes[s]); - auto& shard = m_data[s]; - auto it = shard.map.find(headID); - if (it != shard.map.end()) { - it->second->second = count; - shard.order.splice(shard.order.begin(), shard.order, it->second); - return; - } - // Evict if full - if (shard.map.size() >= m_capacity) { - auto& back = shard.order.back(); - shard.map.erase(back.first); - shard.order.pop_back(); - } - shard.order.emplace_front(headID, count); - shard.map[headID] = shard.order.begin(); - } - - void Remove(SizeType headID) { - int s = Shard(headID); - std::lock_guard lock(m_mutexes[s]); - auto& shard = m_data[s]; - auto it = shard.map.find(headID); - if (it != shard.map.end()) { - shard.order.erase(it->second); - shard.map.erase(it); - } - } - - private: - int Shard(SizeType headID) const { return static_cast(headID) % m_shards; } - - struct ShardData { - std::list> order; // front = MRU - std::unordered_map>::iterator> map; - }; - - int m_shards; - size_t m_capacity; // per shard - std::vector m_data; - std::unique_ptr m_mutexes; - }; - template class ExtraDynamicSearcher : public IExtraSearcher { @@ -281,10 +214,6 @@ namespace SPTAG::SPANN { size_t m_lastProgressLogMerge = std::numeric_limits::max(); size_t m_lastProgressLogReassign = std::numeric_limits::max(); - // Posting count cache for multi-chunk mode. - // Tracks approximate vector count per posting to decide when to split. - std::unique_ptr m_postingCountCache; - bool ShouldLogProgress(size_t totalJobs, bool force = false) { auto now = std::chrono::steady_clock::now(); std::lock_guard lock(m_progressLogMutex); @@ -364,12 +293,6 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Posting size limit: %d, search limit: %f, merge threshold: %d\n", m_postingSizeLimit, p_opt.m_latencyLimit, m_mergeThreshold); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[CONFIG] layer=%d DistributedVersionMap=%s UseMultiChunkPosting=%s PostingPageLimit=%d\n", layer, p_opt.m_distributedVersionMap ? "true" : "false", p_opt.m_useMultiChunkPosting ? "true" : "false", p_opt.m_postingPageLimit); - - // Initialize posting count cache for multi-chunk mode - if (p_opt.m_useMultiChunkPosting && p_opt.m_storage == Storage::TIKVIO) { - m_postingCountCache = std::make_unique(100000, 16); - SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "PostingCountCache initialized (capacity=100000, shards=16) for layer %d\n", layer); - } } ~ExtraDynamicSearcher() {} @@ -498,7 +421,7 @@ namespace SPTAG::SPANN { // ForceCompaction std::string postingList; - if ((ret = GetPostingFromDB(globalID, &postingList, MaxTimeout, &(workSpace.m_diskRequests))) != + if ((ret = db->Get(DBKey(globalID), &postingList, MaxTimeout, &(workSpace.m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, @@ -544,7 +467,7 @@ namespace SPTAG::SPANN { if (vectorCount <= m_mergeThreshold) mergelist.insert(globalID); postingList.resize(vectorCount * m_vectorInfoSize); - if ((ret = PutPostingToDB(globalID, postingList, MaxTimeout, + if ((ret = db->Put(DBKey(globalID), postingList, MaxTimeout, &(workSpace.m_diskRequests))) != ErrorCode::Success) { @@ -613,7 +536,7 @@ namespace SPTAG::SPANN { std::string postingList; auto splitGetBegin = std::chrono::high_resolution_clock::now(); { - if ((ret=GetPostingFromDB(headID, &postingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != + if ((ret=db->Get(DBKey(headID), &postingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, @@ -686,7 +609,7 @@ namespace SPTAG::SPANN { localIndices.push_back(0); // just to make sure head is included in posting, the index won't be used } postingList.resize(localIndices.size() * m_vectorInfoSize); - if ((ret=PutPostingToDB(headID, postingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(headID), postingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split Fail to write back posting %lld\n", (std::int64_t)(headID)); return ret; } @@ -727,7 +650,7 @@ namespace SPTAG::SPANN { if (!hasHead) memcpy(newpostingList.data(), headVec->data(), m_vectorInfoSize); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Cluserting Failed (The same vector), Only Keep %d vectors.\n", cut); - if ((ret=PutPostingToDB(headID, newpostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(headID), newpostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split fail to override posting cut to limit for posting %lld\n", (std::int64_t)(headID)); return ret; } @@ -763,7 +686,7 @@ namespace SPTAG::SPANN { if (!hasHead) newPostingLists[k] += *headVec; auto splitPutBegin = std::chrono::high_resolution_clock::now(); - if ((ret=PutPostingToDB(newHeadVID, newPostingLists[k], MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(newHeadVID), newPostingLists[k], MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to override posting %lld\n", (std::int64_t)(newHeadVID)); return ret; } @@ -797,7 +720,12 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split: new head VID %lld is being locked after %d retries. Skip merging and return split failed...\n", (std::int64_t)(newHeadVID), retry); - return ErrorCode::Fail; + { + std::unique_lock tmplock(m_splitListLock); + m_splitList.unsafe_erase(headID); + } + SplitAsync(headID, postingList.size() / m_vectorInfoSize); + return ErrorCode::Success; } } @@ -808,7 +736,7 @@ namespace SPTAG::SPANN { std::set vectorIdSet; std::string currentPostingList; { - if ((ret = GetPostingFromDB(newHeadVID, ¤tPostingList, MaxTimeout, + if ((ret = db->Get(DBKey(newHeadVID), ¤tPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to get posting %lld\n", @@ -866,7 +794,7 @@ namespace SPTAG::SPANN { } auto splitPutBegin = std::chrono::high_resolution_clock::now(); - if ((ret = PutPostingToDB(newHeadVID, mergedPostingList, MaxTimeout, + if ((ret = db->Put(DBKey(newHeadVID), mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to put posting %lld\n", @@ -885,7 +813,7 @@ namespace SPTAG::SPANN { } } else { auto splitPutBegin = std::chrono::high_resolution_clock::now(); - if ((ret=PutPostingToDB(newHeadVID, newPostingLists[k], MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(newHeadVID), newPostingLists[k], MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to add new posting %lld\n", (std::int64_t)(newHeadVID)); return ret; } @@ -904,15 +832,13 @@ namespace SPTAG::SPANN { auto updateHeadEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(updateHeadEnd - updateHeadBegin).count(); m_stat.m_updateHeadCost += elapsedMSeconds; - - if (m_opt->m_excludehead) m_versionMap->IncVersion(newHeadVID, &version, version); } } //SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Head id: %d split into : %d, length: %d\n", headID, newHeadVID, args.counts[k]); } if (!theSameHead) { m_headIndex->DeleteIndex(headID, m_layer + 1); - if ((ret=DeletePostingFromDB(headID)) != ErrorCode::Success) + if ((ret=db->Delete(DBKey(headID))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to delete old posting in Split\n"); return ret; @@ -974,7 +900,7 @@ namespace SPTAG::SPANN { std::string currentPostingList; ErrorCode ret; { - if ((ret = GetPostingFromDB(headID, ¤tPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != + if ((ret = db->Get(DBKey(headID), ¤tPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG( @@ -1016,7 +942,7 @@ namespace SPTAG::SPANN { if (vectorIdSet.find(headID) == vectorIdSet.end() && headVec != nullptr) { mergedPostingList += *headVec; } - if ((ret=PutPostingToDB(headID, mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(headID), mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge Fail to write back posting %lld\n", (std::int64_t)headID); return ret; } @@ -1043,7 +969,6 @@ namespace SPTAG::SPANN { int dedupLength = 0; SizeType nextHeadID = -1; - SizeType deletedHeadID = -1; std::shared_ptr nextHeadVec; std::shared_ptr deletedHeadVec; std::string * deletedPostingList = nullptr; @@ -1068,7 +993,7 @@ namespace SPTAG::SPANN { } } if (!m_headIndex->ContainSample(queryResult->VID, m_layer + 1)) continue; - if ((ret=GetPostingFromDB(queryResult->VID, &nextPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Get(DBKey(queryResult->VID), &nextPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to get to be merged posting: %lld, get size:%d\n", (std::int64_t)(queryResult->VID), (int)(nextPostingList.size())); @@ -1103,18 +1028,17 @@ namespace SPTAG::SPANN { if (vectorIdSet.find(headID) == vectorIdSet.end() && nextVectorIdSet.find(headID) == nextVectorIdSet.end() && headVec != nullptr) { mergedPostingList += *headVec; } - if ((ret=PutPostingToDB(headID, mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(headID), mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MergePostings fail to override old posting %lld after merge\n", (std::int64_t)headID); return ret; } m_headIndex->DeleteIndex(queryResult->VID, m_layer + 1); - if ((ret=DeletePostingFromDB(queryResult->VID)) != ErrorCode::Success) + if ((ret=db->Delete(DBKey(queryResult->VID))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to delete old posting %lld in Merge\n", (std::int64_t)(queryResult->VID)); return ret; } nextHeadID = headID; - deletedHeadID = queryResult->VID; nextHeadVec = headVec; deletedHeadVec = resultVec; deletedPostingList = &nextPostingList; @@ -1124,18 +1048,17 @@ namespace SPTAG::SPANN { if (vectorIdSet.find(queryResult->VID) == vectorIdSet.end() && nextVectorIdSet.find(queryResult->VID) == nextVectorIdSet.end() && resultVec != nullptr) { mergedPostingList += *resultVec; } - if ((ret=PutPostingToDB(queryResult->VID, mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(queryResult->VID), mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MergePostings fail to override posting %lld after merge\n", (std::int64_t)(queryResult->VID)); return ret; } m_headIndex->DeleteIndex(headID, m_layer + 1); - if ((ret = DeletePostingFromDB(headID)) != ErrorCode::Success) + if ((ret = db->Delete(DBKey(headID))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to delete old posting %lld in Merge\n", (std::int64_t)(headID)); return ret; } nextHeadID = queryResult->VID; - deletedHeadID = headID; nextHeadVec = resultVec; deletedHeadVec = headVec; deletedPostingList = ¤tPostingList; @@ -1161,21 +1084,6 @@ namespace SPTAG::SPANN { if (current_dist > origin_dist) ReassignAsync(std::make_shared((char*)vectorId, m_vectorInfoSize), nextHeadID); } - - if (!m_versionMap->Deleted(deletedHeadID)) - { - std::shared_ptr vectorinfo = - std::make_shared(m_vectorInfoSize, ' '); - // deletedHeadVec is the full m_vectorInfoSize record - // ([VID][version][vector]) read from the posting in - // MergePostings (see line ~990). Serialize expects a - // pointer to the raw m_vectorDataSize-byte vector, so - // skip the m_metaDataSize prefix to avoid shifting the - // vector bytes by 5 and corrupting the reassigned data. - Serialize(vectorinfo->data(), deletedHeadID, m_versionMap->GetVersion(deletedHeadID), - deletedHeadVec->data() + m_metaDataSize); - ReassignAsync(vectorinfo, -1); - } } { @@ -1196,7 +1104,7 @@ namespace SPTAG::SPANN { if (vectorIdSet.find(headID) == vectorIdSet.end() && headVec != nullptr) { mergedPostingList += *headVec; } - if ((ret=PutPostingToDB(headID, mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { + if ((ret=db->Put(DBKey(headID), mergedPostingList, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge Fail to write back posting %lld\n", (std::int64_t)headID); return ret; } @@ -1361,22 +1269,13 @@ namespace SPTAG::SPANN { } auto reassignScanIOBegin = std::chrono::high_resolution_clock::now(); ErrorCode ret; - bool reassignReadOk = true; - if (IsMultiChunk()) { - auto* tikvDB = this->GetTiKVDB(); - auto dbKeys = DBKeys(HeadPrevTopK); - if ((ret = tikvDB->MultiScanPostings(*dbKeys, p_exWorkSpace->m_pageBuffers, m_hardLatencyLimit)) != ErrorCode::Success) - { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "ReAssign skipped: couldn't scan nearby postings (non-fatal)\n"); - reassignReadOk = false; - } - } else { + { auto keys = DBKeys(HeadPrevTopK); if ((ret = db->MultiGet(*keys, p_exWorkSpace->m_pageBuffers, m_hardLatencyLimit, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "ReAssign skipped: couldn't read nearby postings (non-fatal)\n"); - reassignReadOk = false; + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "ReAssign can't get all the near postings\n"); + return ret; } } @@ -1384,16 +1283,6 @@ namespace SPTAG::SPANN { auto elapsedMSeconds = std::chrono::duration_cast(reassignScanIOEnd - reassignScanIOBegin).count(); m_stat.m_reassignScanIOCost += elapsedMSeconds; - if (reassignReadOk) { - // IMPORTANT: snapshot each posting buffer into a local std::string - // BEFORE iterating. tryBatchReassign() below calls - // RNGSelection -> SearchHeadIndex -> SearchDiskIndex -> - // searcher->SearchIndex(p_exWorkSpace, ...) which performs its own - // MultiGet/MultiScanPostings into p_exWorkSpace->m_pageBuffers, - // overwriting (or reallocating) the very buffers we are scanning. - // Without this snapshot, the raw `postingP` pointer dangles or is - // mutated mid-loop, leading to records being interpreted as garbage - // (visible as invalid VIDs at the tail of single-chunk postings). std::vector nearbyPostings(HeadPrevTopK.size()); for (int i = 0; i < HeadPrevTopK.size(); i++) { @@ -1430,7 +1319,6 @@ namespace SPTAG::SPANN { } } } - } // reassignReadOk } // Batch Append: one Append call per target head instead of one ReassignAsync per vector @@ -1522,7 +1410,6 @@ namespace SPTAG::SPANN { } double appendIOSeconds = 0; int postingSize = 0; - bool splitPending = false; { //std::shared_lock lock(m_rwLocks[headID]); //ROCKSDB // [DIAG] measure lock wait time (suspect A: lock contention) @@ -1530,6 +1417,9 @@ namespace SPTAG::SPANN { std::unique_lock lock(m_rwLocks[headID]); //SPDK auto _lockAcq = std::chrono::high_resolution_clock::now(); uint64_t _lockWaitUs = std::chrono::duration_cast(_lockAcq - _lockBegin).count(); + IndexStats::HistAdd(m_stat.m_appendLockWaitUs, _lockWaitUs); + m_stat.m_appendLockWaitTotalUs.fetch_add(_lockWaitUs, std::memory_order_relaxed); + ErrorCode ret; if (!m_headIndex->ContainSample(headID, m_layer + 1)) { lock.unlock(); @@ -1540,87 +1430,49 @@ namespace SPTAG::SPANN { auto it = m_splitList.find(headID); if (it != m_splitList.end()) { postingSize = it->second; - splitPending = true; } } - // For multi-chunk mode, also check the posting count cache/TiKV - // since m_splitList only has entries for postings pending split. - if (IsMultiChunk() && postingSize == 0) { - postingSize = GetCachedPostingCount(headID); - } - if (!splitPending && postingSize + appendNum > (m_postingSizeLimit + m_bufferSizeLimit)) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Debug, "After appending, the number of vectors in %lld exceeds the postingsize + buffersize (%d + %d)! Do split now...\n", (std::int64_t)headID, m_postingSizeLimit, m_bufferSizeLimit); - if (reassignThreshold == 0) { - // From CollectReAssign batch: schedule async split but proceed - // with the append below (don't retry — async split hasn't - // finished so retrying would spin-loop). - SplitAsync(headID, postingSize + appendNum); - } else { + + if (postingSize + appendNum > (m_postingSizeLimit + m_bufferSizeLimit)) { + //SPTAGLIB_LOG(Helper::LogLevel::LL_Debug, "After appending, the number of vectors in %lld exceeds the postingsize + buffersize (%d + %d)! Do split now...\n", (std::int64_t)headID, m_postingSizeLimit, m_bufferSizeLimit); + ret = Split(p_exWorkSpace, headID, false); + if (ret != ErrorCode::Success) + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); + lock.unlock(); + goto checkDeleted; + } + + auto appendIOBegin = std::chrono::high_resolution_clock::now(); + if ((ret = db->Merge( + DBKey(headID), appendPosting, MaxTimeout, &(p_exWorkSpace->m_diskRequests), postingSize)) != ErrorCode::Success) + { + if (ret == ErrorCode::Posting_OverFlow) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "Merge failed:Posting overflow when appending to %lld! Do split and then retry...\n", (std::int64_t)headID); ret = Split(p_exWorkSpace, headID, false); - if (ret != ErrorCode::Success) + if (ret != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); + return ret; + } lock.unlock(); goto checkDeleted; } - } - - auto appendIOBegin = std::chrono::high_resolution_clock::now(); - if (IsMultiChunk()) { - { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] Append using MULTI-CHUNK AppendChunk path\n"); } - // Multi-chunk path: write chunk + update count in one BatchPut RPC. - ret = AppendChunkAndUpdateCount(headID, appendPosting, appendNum, - postingSize, MaxTimeout, - &(p_exWorkSpace->m_diskRequests)); - if (ret != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MultiChunkAppend failed for %lld!\n", (std::int64_t)headID); - return ret; - } - postingSize = (postingSize + appendNum) * m_vectorInfoSize; - } else { - { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] Append using SINGLE-KEY Get+Put path (no multi-chunk)\n"); } - std::string fullPosting; - // [DIAG] measure Get latency (suspect B/C: RMW read amplification + grpc) - auto _getBegin = std::chrono::high_resolution_clock::now(); - auto getRet = db->Get(DBKey(headID), &fullPosting, MaxTimeout, &(p_exWorkSpace->m_diskRequests)); - auto _getEnd = std::chrono::high_resolution_clock::now(); - uint64_t _getUs = std::chrono::duration_cast(_getEnd - _getBegin).count(); - if (getRet != ErrorCode::Success) fullPosting.clear(); - // Diagnostic: detect stale/misaligned bytes in TiKV (e.g. residue - // from a previous run with different m_vectorInfoSize, or a prior - // multi-chunk layout sharing the same key prefix). - if (getRet == ErrorCode::Success && - (fullPosting.size() % m_vectorInfoSize) != 0) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, - "Append: stale-aligned posting in TiKV headID=%lld size=%zu mod=%zu (m_vectorInfoSize=%d)\n", - (std::int64_t)headID, fullPosting.size(), - fullPosting.size() % (size_t)m_vectorInfoSize, - m_vectorInfoSize); - } - fullPosting.append(appendPosting); - postingSize = static_cast(fullPosting.size()); - // [DIAG] measure Put latency + posting size - auto _putBegin = std::chrono::high_resolution_clock::now(); - if ((ret = db->Put(DBKey(headID), fullPosting, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed for %lld! Posting Size:%d, limit: %d\n", (std::int64_t)headID, postingSize, m_postingSizeLimit); - GetDBStats(); - return ret; - } - auto _putEnd = std::chrono::high_resolution_clock::now(); - uint64_t _putUs = std::chrono::duration_cast(_putEnd - _putBegin).count(); - // [DIAG] record into stat histograms - IndexStats::HistAdd(m_stat.m_appendLockWaitUs, _lockWaitUs); - IndexStats::HistAdd(m_stat.m_appendGetUs, _getUs); - IndexStats::HistAdd(m_stat.m_appendPutUs, _putUs); - IndexStats::HistAdd(m_stat.m_appendPostingBytes, (uint64_t)fullPosting.size()); - m_stat.m_appendLockWaitTotalUs.fetch_add(_lockWaitUs, std::memory_order_relaxed); - m_stat.m_appendGetTotalUs.fetch_add(_getUs, std::memory_order_relaxed); - m_stat.m_appendPutTotalUs.fetch_add(_putUs, std::memory_order_relaxed); - m_stat.m_appendPostingBytesTotal.fetch_add((uint64_t)fullPosting.size(), std::memory_order_relaxed); - m_stat.m_appendRmwSampleCount.fetch_add(1, std::memory_order_relaxed); + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed for %lld! Posting Size:%d, limit: %d\n", (std::int64_t)headID, postingSize, m_postingSizeLimit); + GetDBStats(); + return ret; } auto appendIOEnd = std::chrono::high_resolution_clock::now(); appendIOSeconds = std::chrono::duration_cast(appendIOEnd - appendIOBegin).count(); + if (postingSize % m_vectorInfoSize != 0) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, + "Append: stale-aligned posting in TiKV headID=%lld size=%zu mod=%zu (m_vectorInfoSize=%d)\n", + (std::int64_t)headID, postingSize, + postingSize % m_vectorInfoSize, + m_vectorInfoSize); + } + IndexStats::HistAdd(m_stat.m_appendPostingBytes, (uint64_t)postingSize); + m_stat.m_appendPostingBytesTotal.fetch_add((uint64_t)postingSize, std::memory_order_relaxed); + m_stat.m_appendRmwSampleCount.fetch_add(1, std::memory_order_relaxed); postingSize /= m_vectorInfoSize; } if (postingSize > (m_postingSizeLimit + reassignThreshold)) { @@ -1712,10 +1564,7 @@ namespace SPTAG::SPANN { m_versionMap->Load(versionmapPath, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Recovery: Current vector num: %d.\n", m_versionMap->Count()); } - else if (m_opt->m_storage == Storage::ROCKSDBIO) { - m_versionMap->Load(versionmapPath, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); - SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Current vector num: %d.\n", m_versionMap->Count()); - } else if (m_opt->m_storage == Storage::TIKVIO) { + else if (m_opt->m_storage == Storage::ROCKSDBIO || m_opt->m_storage == Storage::TIKVIO) { m_versionMap->Load(versionmapPath, m_opt->m_datasetRowsInBlock, m_opt->m_datasetCapacity); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Current vector num: %d.\n", m_versionMap->Count()); } else if (m_opt->m_storage == Storage::SPDKIO || m_opt->m_storage == Storage::FILEIO) { @@ -1881,21 +1730,7 @@ namespace SPTAG::SPANN { else remainLimit = m_hardLatencyLimit; auto readStart = std::chrono::high_resolution_clock::now(); - if (m_opt->m_useMultiChunkPosting && m_opt->m_storage == Storage::TIKVIO) { - { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] SearchIndex using MULTI-CHUNK scan path\n"); } - // Multi-chunk: scan all chunks per posting and concatenate - auto* tikvDB = dynamic_cast(db.get()); - if (!tikvDB) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "[SearchIndex] db is not TiKVIO for multi-chunk!\n"); - return ErrorCode::DiskIOFail; - } - auto dbKeys = DBKeys(p_exWorkSpace->m_postingIDs); - if (tikvDB->MultiScanPostings(*dbKeys, p_exWorkSpace->m_pageBuffers, remainLimit) != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "[SearchIndex] multi-chunk scan postings fail!\n"); - return ErrorCode::DiskIOFail; - } - } else { - { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] SearchIndex using SINGLE-KEY Get path (no multi-chunk)\n"); } + { auto keys = DBKeys(p_exWorkSpace->m_postingIDs); if (db->MultiGet(*keys, p_exWorkSpace->m_pageBuffers, remainLimit, &(p_exWorkSpace->m_diskRequests)) != ErrorCode::Success) { @@ -2628,7 +2463,7 @@ namespace SPTAG::SPANN { } ErrorCode tmp; - if ((tmp = PutPostingToDB(postingID, postinglist, MaxTimeout, &(workSpace.m_diskRequests))) != + if ((tmp = db->Put(DBKey(postingID), postinglist, MaxTimeout, &(workSpace.m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "[WriteDB] Put %lld fail!\n", (std::int64_t)index); @@ -2801,14 +2636,14 @@ namespace SPTAG::SPANN { ErrorCode GetWritePosting(ExtraWorkSpace* p_exWorkSpace, SizeType pid, std::string& posting, bool write = false) override { ErrorCode ret; if (write) { - if ((ret = PutPostingToDB(pid, posting, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) + if ((ret = db->Put(DBKey(pid), posting, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "[GetWritePosting] Put fail!\n"); return ret; } // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "PostingSize: %d\n", m_postingSizes.GetSize(pid)); } else { - if ((ret = GetPostingFromDB(pid, &posting, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) + if ((ret = db->Get(DBKey(pid), &posting, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "[GetWritePosting] Get fail!\n"); return ret; @@ -2869,107 +2704,6 @@ namespace SPTAG::SPANN { return keys; } - // Multi-chunk aware helpers: abstract single-key vs chunked access. - // When UseMultiChunkPosting is on and storage is TiKV, use Scan/PutBase/DeletePosting. - // Otherwise, fall back to the standard KeyValueIO Get/Put/Delete. - - inline bool IsMultiChunk() const { - return m_opt->m_useMultiChunkPosting && m_opt->m_storage == Storage::TIKVIO; - } - - inline TiKVIO* GetTiKVDB() const { - return dynamic_cast(db.get()); - } - - // Read a full posting from DB (Scan for multi-chunk, Get for single-key). - ErrorCode GetPostingFromDB(SizeType headID, std::string* posting, - const std::chrono::microseconds& timeout, - std::vector* reqs) { - if (IsMultiChunk()) { - return this->GetTiKVDB()->ScanPosting(DBKey(headID), posting, timeout); - } - return db->Get(DBKey(headID), posting, timeout, reqs); - } - - // Write a full posting to DB (DeletePosting+PutBaseChunk for multi-chunk, Put for single-key). - // This is a compacting write: replaces all chunks with a single base chunk. - // Also updates the posting count key and local cache. - ErrorCode PutPostingToDB(SizeType headID, const std::string& posting, - const std::chrono::microseconds& timeout, - std::vector* reqs) { - if (IsMultiChunk()) { - auto* tikv = this->GetTiKVDB(); - auto delRet = tikv->DeletePosting(DBKey(headID)); - if (delRet != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "PutPostingToDB: DeletePosting failed for headID %d\n", headID); - return delRet; - } - auto ret = tikv->PutBaseChunk(DBKey(headID), posting, timeout, reqs); - if (ret != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "PutPostingToDB: PutBaseChunk failed for headID %d\n", headID); - return ret; - } - int count = static_cast(posting.size() / m_vectorInfoSize); - auto countRet = tikv->SetPostingCount(DBKey(headID), count, timeout); - if (countRet != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "PutPostingToDB: SetPostingCount failed for headID %d (data written OK)\n", headID); - } - if (m_postingCountCache) m_postingCountCache->Put(DBKey(headID), count); - return ErrorCode::Success; - } - return db->Put(DBKey(headID), posting, timeout, reqs); - } - - // Delete a posting from DB (DeletePosting for multi-chunk, Delete for single-key). - // Also deletes the posting count key and invalidates local cache. - ErrorCode DeletePostingFromDB(SizeType headID) { - if (IsMultiChunk()) { - auto* tikv = this->GetTiKVDB(); - auto countRet = tikv->DeletePostingCount(DBKey(headID)); - if (countRet != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "DeletePostingFromDB: DeletePostingCount failed for headID %d\n", headID); - } - if (m_postingCountCache) m_postingCountCache->Remove(DBKey(headID)); - return tikv->DeletePosting(DBKey(headID)); - } - return db->Delete(DBKey(headID)); - } - - // Get the posting vector count, using local cache with TiKV fallback. - // Returns 0 if unknown (cache miss + TiKV error/miss). - int GetCachedPostingCount(SizeType headID) { - if (!m_postingCountCache) return 0; - SizeType dbKey = DBKey(headID); - auto [count, hit] = m_postingCountCache->Get(dbKey); - if (hit) return count; - // Cache miss: fetch from TiKV - auto* tikv = this->GetTiKVDB(); - if (!tikv) return 0; - count = tikv->GetPostingCount(dbKey, std::chrono::microseconds(5000000)); - if (count < 0) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "GetCachedPostingCount: TiKV error for headID %d, returning 0\n", headID); - return 0; - } - m_postingCountCache->Put(dbKey, count); - return count; - } - - // Update posting count after appending vectors. - // Writes to TiKV via BatchPut (chunk + count in one RPC) and updates local cache. - ErrorCode AppendChunkAndUpdateCount(SizeType headID, const std::string& appendPosting, - int appendNum, int oldCount, - const std::chrono::microseconds& timeout, - std::vector* reqs) { - auto* tikv = this->GetTiKVDB(); - if (!tikv) return ErrorCode::Fail; - int newCount = oldCount + appendNum; - auto ret = tikv->PutChunkAndCount(DBKey(headID), appendPosting, newCount, timeout, reqs); - if (ret == ErrorCode::Success && m_postingCountCache) { - m_postingCountCache->Put(DBKey(headID), newCount); - } - return ret; - } - private: int m_metaDataSize = 0; diff --git a/AnnService/inc/Core/SPANN/ExtraTiKVController.h b/AnnService/inc/Core/SPANN/ExtraTiKVController.h index 43b9e167e..a6d533068 100644 --- a/AnnService/inc/Core/SPANN/ExtraTiKVController.h +++ b/AnnService/inc/Core/SPANN/ExtraTiKVController.h @@ -29,6 +29,73 @@ namespace SPTAG::SPANN { + // Simple sharded LRU cache for posting vector counts. + // Thread-safe: each shard has its own mutex. + class PostingCountCache { + public: + PostingCountCache(size_t capacity = 100000, int shards = 16) + : m_shards(shards), m_capacity(std::max(capacity / shards, (size_t)1)) { + m_data.resize(shards); + m_mutexes = std::make_unique(shards); + } + + // Returns (count, true) on hit, (0, false) on miss. + std::pair Get(SizeType headID) { + int s = Shard(headID); + std::lock_guard lock(m_mutexes[s]); + auto& shard = m_data[s]; + auto it = shard.map.find(headID); + if (it == shard.map.end()) return {0, false}; + // Move to front (most recently used) + shard.order.splice(shard.order.begin(), shard.order, it->second); + return {it->second->second, true}; + } + + void Put(SizeType headID, int count) { + int s = Shard(headID); + std::lock_guard lock(m_mutexes[s]); + auto& shard = m_data[s]; + auto it = shard.map.find(headID); + if (it != shard.map.end()) { + it->second->second = count; + shard.order.splice(shard.order.begin(), shard.order, it->second); + return; + } + // Evict if full + if (shard.map.size() >= m_capacity) { + auto& back = shard.order.back(); + shard.map.erase(back.first); + shard.order.pop_back(); + } + shard.order.emplace_front(headID, count); + shard.map[headID] = shard.order.begin(); + } + + void Remove(SizeType headID) { + int s = Shard(headID); + std::lock_guard lock(m_mutexes[s]); + auto& shard = m_data[s]; + auto it = shard.map.find(headID); + if (it != shard.map.end()) { + shard.order.erase(it->second); + shard.map.erase(it); + } + } + + private: + int Shard(SizeType headID) const { return static_cast(headID) % m_shards; } + + struct ShardData { + std::list> order; // front = MRU + std::unordered_map>::iterator> map; + }; + + int m_shards; + size_t m_capacity; // per shard + std::vector m_data; + std::unique_ptr m_mutexes; + }; + /// TiKVIO implements the KeyValueIO interface by communicating with a TiKV /// cluster via its RawKV gRPC API. /// @@ -45,8 +112,8 @@ namespace SPTAG::SPANN class TiKVIO : public Helper::KeyValueIO { public: - TiKVIO(const std::string& pdAddresses, const std::string& keyPrefix) - : m_keyPrefix(keyPrefix) + TiKVIO(const std::string& pdAddresses, const std::string& keyPrefix, bool useMultiChunkPosting) + : m_keyPrefix(keyPrefix), m_useMultiChunkPosting(useMultiChunkPosting) { // Parse comma-separated PD addresses and try to connect. std::istringstream ss(pdAddresses); @@ -126,6 +193,11 @@ namespace SPTAG::SPANN return; } + // Initialize posting count cache for multi-chunk mode + if (m_useMultiChunkPosting) { + m_postingCountCache = std::make_unique(100000, 16); + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "PostingCountCache initialized (capacity=100000, shards=16)\n"); + } m_available = true; SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "TiKVIO: Initialized with key prefix '%s'\n", m_keyPrefix.c_str()); } @@ -201,6 +273,9 @@ namespace SPTAG::SPANN const std::chrono::microseconds& timeout, std::vector* reqs) override { + if (m_useMultiChunkPosting) { + return ScanPosting(key, value, timeout); + } std::string k(reinterpret_cast(&key), sizeof(SizeType)); return Get(k, value, timeout, reqs); } @@ -256,7 +331,27 @@ namespace SPTAG::SPANN ErrorCode Put(const SizeType key, const std::string& value, const std::chrono::microseconds& timeout, std::vector* reqs) override - { + { + if (m_useMultiChunkPosting) { + auto delRet = DeletePosting(key); + if (delRet != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "PutPostingToDB: DeletePosting failed for key %d\n", key); + return delRet; + } + auto ret = PutBaseChunk(key, value, timeout, reqs); + if (ret != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "PutPostingToDB: PutBaseChunk failed for key %d\n", key); + return ret; + } + int count = static_cast(value.size()); + auto countRet = SetPostingCount(key, count, timeout); + if (countRet != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "PutPostingToDB: SetPostingCount failed for key %d (data written OK)\n", key); + } + if (m_postingCountCache) m_postingCountCache->Put(key, count); + return ErrorCode::Success; + } + std::string k(reinterpret_cast(&key), sizeof(SizeType)); return Put(k, value, timeout, reqs); } @@ -264,6 +359,15 @@ namespace SPTAG::SPANN // ---- Delete operations ---- ErrorCode Delete(SizeType key) override { + if (m_useMultiChunkPosting) { + auto countRet = DeletePostingCount(key); + if (countRet != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "DeletePostingFromDB: DeletePostingCount failed for headID %d\n", key); + } + if (m_postingCountCache) m_postingCountCache->Remove(key); + return DeletePosting(key); + } + std::string k(reinterpret_cast(&key), sizeof(SizeType)); std::string prefixedKey = MakePrefixedKey(k); @@ -366,18 +470,40 @@ namespace SPTAG::SPANN return ErrorCode::Fail; } - std::string existingValue; - auto ret = Get(key, &existingValue, timeout, reqs); - if (ret != ErrorCode::Success) { - // Key doesn't exist yet, just put the new value. - size = static_cast(value.size()); - return Put(key, value, timeout, reqs); - } + if (m_useMultiChunkPosting) { + auto [count, hit] = m_postingCountCache->Get(key); + if (!hit) { + count = GetPostingCount(key, std::chrono::microseconds(5000000)); + if (count < 0) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "GetCachedPostingCount: TiKV error for headID %d, returning 0\n", key); + return ErrorCode::Posting_SizeError; + } + m_postingCountCache->Put(key, count); + } + { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] Append using MULTI-CHUNK AppendChunk path\n"); } - // Append the new value to existing - existingValue.append(value); - size = static_cast(existingValue.size()); - return Put(key, existingValue, timeout, reqs); + int newCount = count + value.size(); + auto ret =PutChunkAndCount(key, value, newCount, timeout, reqs); + if (ret != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MultiChunkAppend failed for %lld!\n", (std::int64_t)key); + return ret; + } + if (m_postingCountCache) m_postingCountCache->Put(key, newCount); + size = newCount; + } else { + { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] Append using SINGLE-KEY Get+Put path (no multi-chunk)\n"); } + std::string fullPosting; + auto ret = Get(key, &fullPosting, MaxTimeout, reqs); + if (ret != ErrorCode::Success) fullPosting.clear(); + + fullPosting.append(value); + size = static_cast(fullPosting.size()); + if ((ret = Put(key, fullPosting, MaxTimeout, reqs)) != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed for %lld! Posting Size:%d\n", (std::int64_t)key, size); + return ret; + } + } + return ErrorCode::Success; } // ---- MultiGet operations ---- @@ -389,7 +515,11 @@ namespace SPTAG::SPANN std::vector>& values, const std::chrono::microseconds& timeout, std::vector* reqs) override - { + { + if (m_useMultiChunkPosting) { + return MultiScanPostings(keys, values, timeout); + } + if (keys.empty()) return ErrorCode::Success; // Build prefixed keys and initialize all values as empty @@ -422,7 +552,7 @@ namespace SPTAG::SPANN std::mutex resultMutex; for (auto& [gkey, rg] : regionGroups) { - futures.push_back(std::async(std::launch::async, [&, &gkey, &rg]() { + futures.push_back(std::async(std::launch::async, [&]() { auto& group = rg.keys; auto* stub = GetOrCreateStub(gkey.leaderAddr); if (!stub) return; @@ -652,7 +782,7 @@ namespace SPTAG::SPANN for (auto& [gkey, rg] : regionGroups) { futures.push_back(std::async(std::launch::async, - [&, &gkey, &rg]() -> std::vector { + [&]() -> std::vector { auto& group = rg.keys; auto* stub = GetOrCreateStub(gkey.leaderAddr); if (!stub) return {}; @@ -1366,6 +1496,11 @@ namespace SPTAG::SPANN std::vector> m_scanResults; size_t m_scanIndex = 0; + // Posting count cache for multi-chunk mode. + // Tracks approximate vector count per posting to decide when to split. + bool m_useMultiChunkPosting = false; + std::unique_ptr m_postingCountCache; + // ---- Helper: build a prefixed key ---- std::string MakePrefixedKey(const std::string& key) const { std::string result; diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index f8864c692..4a0aabf4a 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -1917,9 +1917,9 @@ template void Index::PrepareDB(std::shared_ptr MakeTiKVVersionMap(const std::string& tes auto now = std::chrono::steady_clock::now().time_since_epoch().count(); std::string prefix = "vmtest_" + testName + "_" + std::to_string(now) + "_"; - auto db = std::make_shared(std::string(pdAddr), prefix); + auto db = std::make_shared(std::string(pdAddr), prefix, false); auto vm = std::make_unique(); vm->SetDB(db); vm->SetLayer(0); From d461907543945c4f0e1bbfac2dfe1c32a2667a9d Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Wed, 6 May 2026 07:20:05 +0000 Subject: [PATCH 14/26] add version check --- AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index c27e437b9..db01cd472 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -771,6 +771,10 @@ namespace SPTAG::SPANN { for (int j = 0; j < newPostVectorNum; j++, postingK += m_vectorInfoSize) { SizeType VID = *((SizeType *)(postingK)); + uint8_t version = *(postingK + sizeof(SizeType)); + + if (m_versionMap->Deleted(VID) || m_versionMap->GetVersion(VID) != version) + continue; if (vectorIdSet.find(VID) != vectorIdSet.end()) continue; @@ -780,7 +784,7 @@ namespace SPTAG::SPANN { currentLength++; } - if (currentLength > (m_postingSizeLimit + m_bufferSizeLimit)) + if (currentLength > (m_postingSizeLimit + m_bufferSizeLimit) && m_opt->m_storage == Storage::FILEIO) { /* SPTAGLIB_LOG( From 04274aef26e7abea1515ee6e6e2dbe1c677fd685 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Fri, 8 May 2026 07:09:21 +0000 Subject: [PATCH 15/26] add lock.unlock and clean SPFreshTest --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 36 ++++++- Test/src/SPFreshTest.cpp | 102 ++++-------------- .../2026-05-08-merged_spfresh/benchmark.ini | 36 +++++++ 3 files changed, 90 insertions(+), 84 deletions(-) create mode 100644 evaluation/2026-05-08-merged_spfresh/benchmark.ini diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index db01cd472..f8cd84450 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -477,6 +477,7 @@ namespace SPTAG::SPANN { finalcode = ret; return; } + CheckCentroid(globalID, postingList, "RefineIndex"); } else { @@ -506,6 +507,25 @@ namespace SPTAG::SPANN { return ErrorCode::Success; } + void CheckCentroid(SizeType pid, std::string& posting, std::string where) + { + SizeType postVectorNum = posting.size() / m_vectorInfoSize; + uint8_t* vectorId = reinterpret_cast(posting.data()); + bool hasHead = false; + for (int j = 0; j < postVectorNum; j++, vectorId += m_vectorInfoSize) + { + SizeType VID = *((SizeType*)(vectorId)); + if (VID == pid) { + hasHead = true; + break; + } + } + if (!hasHead) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "CheckCentroid cannot find head in posting! pid:%d, where:%s\n", pid, where.c_str()); + exit(-1); + } + } + ErrorCode Split(ExtraWorkSpace* p_exWorkSpace, const SizeType headID, bool requirelock = true) { auto splitBegin = std::chrono::high_resolution_clock::now(); @@ -613,6 +633,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split Fail to write back posting %lld\n", (std::int64_t)(headID)); return ret; } + CheckCentroid(headID, postingList, "Split-GC"); m_stat.m_garbageNum++; auto GCEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(GCEnd - splitBegin).count(); @@ -654,6 +675,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split fail to override posting cut to limit for posting %lld\n", (std::int64_t)(headID)); return ret; } + CheckCentroid(headID, newpostingList, "Split-one-cluster"); { std::unique_lock tmplock(m_splitListLock); m_splitList.unsafe_erase(headID); @@ -690,6 +712,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to override posting %lld\n", (std::int64_t)(newHeadVID)); return ret; } + CheckCentroid(newHeadVID, newPostingLists[k], "Split-SameHead"); auto splitPutEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(splitPutEnd - splitPutBegin).count(); m_stat.m_putCost += elapsedMSeconds; @@ -805,6 +828,7 @@ namespace SPTAG::SPANN { (std::int64_t)(newHeadVID)); return ret; } + CheckCentroid(newHeadVID, mergedPostingList, "Split-MergePosting"); auto splitPutEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(splitPutEnd - splitPutBegin) @@ -821,6 +845,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Fail to add new posting %lld\n", (std::int64_t)(newHeadVID)); return ret; } + CheckCentroid(newHeadVID, newPostingLists[k], "Split-NewPosting"); auto splitPutEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(splitPutEnd - splitPutBegin).count(); m_stat.m_putCost += elapsedMSeconds; @@ -837,6 +862,7 @@ namespace SPTAG::SPANN { elapsedMSeconds = std::chrono::duration_cast(updateHeadEnd - updateHeadBegin).count(); m_stat.m_updateHeadCost += elapsedMSeconds; } + if (m_rwLocks.hash_func(newHeadVID) != m_rwLocks.hash_func(headID)) anotherLock.unlock(); } //SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Head id: %d split into : %d, length: %d\n", headID, newHeadVID, args.counts[k]); } @@ -862,6 +888,7 @@ namespace SPTAG::SPANN { } } } + lock.unlock(); } m_stat.m_splitNum++; @@ -950,6 +977,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge Fail to write back posting %lld\n", (std::int64_t)headID); return ret; } + CheckCentroid(headID, mergedPostingList, "MergePostings-ignore"); { std::unique_lock lock(m_mergeListLock); m_mergeList.unsafe_erase(headID); @@ -1036,6 +1064,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MergePostings fail to override old posting %lld after merge\n", (std::int64_t)headID); return ret; } + CheckCentroid(headID, mergedPostingList, "MergePostings-currentLength >= nextLength"); m_headIndex->DeleteIndex(queryResult->VID, m_layer + 1); if ((ret=db->Delete(DBKey(queryResult->VID))) != ErrorCode::Success) { @@ -1056,6 +1085,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MergePostings fail to override posting %lld after merge\n", (std::int64_t)(queryResult->VID)); return ret; } + CheckCentroid(queryResult->VID, mergedPostingList, "MergePostings-currentLength < nextLength"); m_headIndex->DeleteIndex(headID, m_layer + 1); if ((ret = db->Delete(DBKey(headID))) != ErrorCode::Success) { @@ -1112,6 +1142,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge Fail to write back posting %lld\n", (std::int64_t)headID); return ret; } + CheckCentroid(headID, mergedPostingList, "MergePostings-GC"); { std::unique_lock lock(m_mergeListLock); m_mergeList.unsafe_erase(headID); @@ -1478,6 +1509,7 @@ namespace SPTAG::SPANN { m_stat.m_appendPostingBytesTotal.fetch_add((uint64_t)postingSize, std::memory_order_relaxed); m_stat.m_appendRmwSampleCount.fetch_add(1, std::memory_order_relaxed); postingSize /= m_vectorInfoSize; + lock.unlock(); } if (postingSize > (m_postingSizeLimit + reassignThreshold)) { // SizeType VID = *(int*)(&appendPosting[0]); @@ -2474,6 +2506,7 @@ namespace SPTAG::SPANN { ret = tmp; return; } + CheckCentroid(postingID, postinglist, "WriteDownAllPostingToDB"); } else { @@ -2644,7 +2677,8 @@ namespace SPTAG::SPANN { { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "[GetWritePosting] Put fail!\n"); return ret; - } + } + CheckCentroid(pid, posting, "GetWritePosting"); // SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "PostingSize: %d\n", m_postingSizes.GetSize(pid)); } else { if ((ret = db->Get(DBKey(pid), &posting, MaxTimeout, &(p_exWorkSpace->m_diskRequests))) != ErrorCode::Success) diff --git a/Test/src/SPFreshTest.cpp b/Test/src/SPFreshTest.cpp index 1cdd44afb..bc4789a04 100644 --- a/Test/src/SPFreshTest.cpp +++ b/Test/src/SPFreshTest.cpp @@ -246,11 +246,9 @@ std::shared_ptr BuildIndex(const std::string &outDirectory, std::sh template std::shared_ptr BuildLargeIndex(const std::string &outDirectory, std::string &pvecset, - std::string& pmetaset, std::string& pmetaidx, const std::string &distMethod = "L2", + std::string& pmetaset, std::string& pmetaidx, Helper::IniReader& iniReader, const std::string &distMethod = "L2", int searchthread = 2, int insertthread = 2, int layers = 1, - std::shared_ptr quantizer = nullptr, std::string quantizerFilePath = "quantizer.bin", - const std::map& ssdOverrides = {}, - bool ssdOnly = false) + std::shared_ptr quantizer = nullptr, std::string quantizerFilePath = "quantizer.bin") { auto vecIndex = VectorIndex::CreateInstance(IndexAlgoType::SPANN, GetEnumValueType()); int maxthreads = std::thread::hardware_concurrency(); @@ -341,29 +339,15 @@ std::shared_ptr BuildLargeIndex(const std::string &outDirectory, st } } - // Apply overrides (e.g., Storage, TiKV settings, SelectHead/BuildHead params) - for (const auto &[key, val] : ssdOverrides) + for (const auto &sec : sections) { - // Keys prefixed with "SectionName." are routed to the corresponding section - auto dotPos = key.find('.'); - if (dotPos != std::string::npos) { - std::string section = key.substr(0, dotPos); - std::string param = key.substr(dotPos + 1); - vecIndex->SetParameter(param.c_str(), val.c_str(), section.c_str()); - } else { - vecIndex->SetParameter(key.c_str(), val.c_str(), "BuildSSDIndex"); + auto params = iniReader.GetParameters(sec.c_str()); + for (const auto &[key, val] : params) + { + vecIndex->SetParameter(key.c_str(), val.c_str(), sec.c_str()); } } - // SSD-only mode: skip SelectHead and BuildHead, resume from specified layer - if (ssdOnly) - { - // Allow explicit ResumeLayer from config/overrides; otherwise default to layer 0 - // (rebuild SSD for all layers, reusing existing head indexes) - int resumeLayer = 0; - vecIndex->SetParameter("ResumeLayer", std::to_string(resumeLayer).c_str(), "BuildSSDIndex"); - } - if (quantizer) { vecIndex->SetParameter("QuantizerFilePath", quantizerFilePath.c_str(), "Base"); @@ -677,11 +661,9 @@ ErrorCode QuantizeVectors(const std::shared_ptr& quantizer, template void RunBenchmark(const std::string &vectorPath, const std::string &queryPath, const std::string &truthPath, DistCalcMethod distMethod, const std::string &indexPath, int dimension, int baseVectorCount, - int insertVectorCount, int deleteVectorCount, int batches, int topK, int numSearchThreads, int numInsertThreads, int numSearchDuringInsertThreads, int numQueries, + int insertVectorCount, int deleteVectorCount, int batches, int topK, int numSearchThreads, int numInsertThreads, int numSearchDuringInsertThreads, int numQueries, Helper::IniReader& iniReader, const std::string &outputFile = "output.json", const bool rebuild = true, const int resume = -1, - const std::string &quantizerFilePath = std::string(""), int quantizedDim = 0, int layers = 1, - const std::map& ssdOverrides = {}, - bool rebuildSsdOnly = false) + const std::string &quantizerFilePath = std::string(""), int quantizedDim = 0, int layers = 1) { int oldM = M, oldK = K, oldN = N, oldQueries = queries; N = baseVectorCount; @@ -750,18 +732,7 @@ void RunBenchmark(const std::string &vectorPath, const std::string &queryPath, c // Build initial index BOOST_TEST_MESSAGE("\n=== Building Index ==="); - if (rebuild || rebuildSsdOnly || !direxists(indexPath.c_str())) { - if (!rebuildSsdOnly) { - // Allow empty or non-existent directories; block only if index files already exist - if (direxists(indexPath.c_str()) && fileexists((indexPath + FolderSep + "indexloader.ini").c_str())) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, - "Index directory '%s' already exists with index files. Refusing to delete. " - "Remove it manually or use RebuildSSDOnly=true to resume.\n", - indexPath.c_str()); - BOOST_FAIL("Index directory already exists: " + indexPath); - return; - } - } + if (rebuild || !direxists(indexPath.c_str())) { auto buildstart = std::chrono::high_resolution_clock::now(); if (enableQuantization) @@ -786,13 +757,13 @@ void RunBenchmark(const std::string &vectorPath, const std::string &queryPath, c quantizedBase->Save(pquanvecset); } - index = BuildLargeIndex(indexPath, pquanvecset, pmeta, pmetaidx, dist, numSearchThreads, numInsertThreads, layers, quantizer, "quantizer.bin", ssdOverrides, rebuildSsdOnly); + index = BuildLargeIndex(indexPath, pquanvecset, pmeta, pmetaidx, iniReader, dist, numSearchThreads, numInsertThreads, layers, quantizer, "quantizer.bin"); BOOST_REQUIRE(index != nullptr); index->SetQuantizerADC(true); } else { - index = BuildLargeIndex(indexPath, pvecset, pmeta, pmetaidx, dist, numSearchThreads, numInsertThreads, layers, nullptr, "quantizer.bin", ssdOverrides, rebuildSsdOnly); + index = BuildLargeIndex(indexPath, pvecset, pmeta, pmetaidx, iniReader, dist, numSearchThreads, numInsertThreads, layers); BOOST_REQUIRE(index != nullptr); } @@ -2099,44 +2070,9 @@ BOOST_AUTO_TEST_CASE(BenchmarkFromConfig) int numQueries = iniReader.GetParameter("Benchmark", "NumQueries", 1000); int layers = iniReader.GetParameter("Benchmark", "Layers", 1); DistCalcMethod distMethod = iniReader.GetParameter("Benchmark", "DistMethod", DistCalcMethod::L2); - bool rebuild = iniReader.GetParameter("Benchmark", "Rebuild", true); - bool rebuildSsdOnly = iniReader.GetParameter("Benchmark", "RebuildSSDOnly", false); + bool rebuild = (iniReader.GetParameter("Benchmark", "Rebuild", true) || iniReader.GetParameter("Benchmark", "RebuildSSDOnly", false)); int resume = iniReader.GetParameter("Benchmark", "Resume", -1); - // Read storage backend overrides for BuildSSDIndex - std::map ssdOverrides; - std::string storage = iniReader.GetParameter("Benchmark", "Storage", std::string("")); - if (!storage.empty()) { - ssdOverrides["Storage"] = storage; - } - std::string tikvPDAddresses = iniReader.GetParameter("Benchmark", "TiKVPDAddresses", std::string("")); - if (!tikvPDAddresses.empty()) { - ssdOverrides["TiKVPDAddresses"] = tikvPDAddresses; - } - std::string tikvKeyPrefix = iniReader.GetParameter("Benchmark", "TiKVKeyPrefix", std::string("")); - if (!tikvKeyPrefix.empty()) { - ssdOverrides["TiKVKeyPrefix"] = tikvKeyPrefix; - } - if (appendThreadNum > 0) { - ssdOverrides["AppendThreadNum"] = std::to_string(appendThreadNum); - } - - // Pass through any [BuildSSDIndex] section params from the ini as overrides - auto buildSSDParams = iniReader.GetParameters("BuildSSDIndex"); - for (const auto &[key, val] : buildSSDParams) { - ssdOverrides[key] = val; - } - - // Pass through [SelectHead] and [BuildHead] params as overrides too - auto selectHeadParams = iniReader.GetParameters("SelectHead"); - for (const auto &[key, val] : selectHeadParams) { - ssdOverrides["SelectHead." + key] = val; - } - auto buildHeadParams = iniReader.GetParameters("BuildHead"); - for (const auto &[key, val] : buildHeadParams) { - ssdOverrides["BuildHead." + key] = val; - } - BOOST_TEST_MESSAGE("=== Benchmark Configuration ==="); BOOST_TEST_MESSAGE("Vector Path: " << vectorPath); BOOST_TEST_MESSAGE("Query Path: " << queryPath); @@ -2166,20 +2102,20 @@ BOOST_AUTO_TEST_CASE(BenchmarkFromConfig) if (valueType == VectorValueType::Float) { RunBenchmark(vectorPath, queryPath, truthPath, distMethod, indexPath, dimension, baseVectorCount, - insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, outputFile, - rebuild, resume, quantizerFilePath, quantizedDim, layers, ssdOverrides, rebuildSsdOnly); + insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, iniReader, + outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers); } else if (valueType == VectorValueType::Int8) { RunBenchmark(vectorPath, queryPath, truthPath, distMethod, indexPath, dimension, baseVectorCount, - insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, - outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers, ssdOverrides, rebuildSsdOnly); + insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, iniReader, + outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers); } else if (valueType == VectorValueType::UInt8) { RunBenchmark(vectorPath, queryPath, truthPath, distMethod, indexPath, dimension, baseVectorCount, - insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, - outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers, ssdOverrides, rebuildSsdOnly); + insertVectorCount, deleteVectorCount, batchNum, topK, numSearchThreads, numInsertThreads, numSearchDuringInsertThreads, numQueries, iniReader, + outputFile, rebuild, resume, quantizerFilePath, quantizedDim, layers); } //std::filesystem::remove_all(indexPath); diff --git a/evaluation/2026-05-08-merged_spfresh/benchmark.ini b/evaluation/2026-05-08-merged_spfresh/benchmark.ini new file mode 100644 index 000000000..43db313d4 --- /dev/null +++ b/evaluation/2026-05-08-merged_spfresh/benchmark.ini @@ -0,0 +1,36 @@ +[Benchmark] +VectorPath=sift1b/base.1B.u8bin +QueryPath=sift1b/query.public.10K.u8bin +TruthPath=truth_1m_l2_batchget +IndexPath=/mnt/nvme/qi/index_1m_final/spann_index +ValueType=UInt8 +Dimension=128 +BaseVectorCount=1000000 +InsertVectorCount=10000000 +DeleteVectorCount=0 +BatchNum=10 +TopK=5 +NumSearchThreads=4 +NumInsertThreads=16 +NumSearchDuringInsertThreads=1 +NumQueries=1000 +DistMethod=L2 +Rebuild=true +Resume=-1 +Layers=2 + +[SelectHead] +ParallelBKTBuild=true + +[BuildSSDIndex] +LatencyLimit=100 +MaxCheck=8192 +SearchInternalResultNum=64 +UseMultiChunkPosting=false +ReassignK=64 +AsyncMergeInSearch=true +VersionCacheMaxChunks=100000 +Storage=TIKVIO +TiKVPDAddresses=127.0.0.1:23791,127.0.0.1:23792,127.0.0.1:23793 +TiKVKeyPrefix=qi_1m_l2 +AppendThreadNum=48 From ca25288665f6d540abfa47d1efbe1902c823625f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 8 May 2026 12:09:03 +0000 Subject: [PATCH 16/26] fix deadlock issue --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 35 ++++++++++--------- benchmark.ini | 19 ---------- 2 files changed, 18 insertions(+), 36 deletions(-) delete mode 100644 benchmark.ini diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 8a8cd8b3d..9b40f8caa 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -894,14 +894,14 @@ namespace SPTAG::SPANN { } } } - lock.unlock(); + if (requirelock) lock.unlock(); } m_stat.m_splitNum++; if (!m_opt->m_disableReassign) { auto reassignScanBegin = std::chrono::high_resolution_clock::now(); - CollectReAssign(p_exWorkSpace, headID, headVec, newPostingLists, newHeadsID, newHeadsVec, theSameHead); + CollectReAssign(p_exWorkSpace, headID, headVec, newPostingLists, newHeadsID, newHeadsVec, theSameHead, requirelock); auto reassignScanEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(reassignScanEnd - reassignScanBegin).count(); @@ -919,13 +919,13 @@ namespace SPTAG::SPANN { std::unique_lock lock(m_rwLocks[headID]); if (!m_headIndex->ContainSample(headID, m_layer + 1)) { - std::unique_lock lock(m_mergeListLock); + std::unique_lock tmplock(m_mergeListLock); m_mergeList.unsafe_erase(headID); return ErrorCode::Success; } { - std::shared_lock lock(m_mergeListLock); + std::shared_lock tmplock(m_mergeListLock); if (m_mergeList.find(headID) == m_mergeList.end()) { return ErrorCode::Success; } @@ -985,7 +985,7 @@ namespace SPTAG::SPANN { } CheckCentroid(headID, mergedPostingList, "MergePostings-ignore"); { - std::unique_lock lock(m_mergeListLock); + std::unique_lock tmplock(m_mergeListLock); m_mergeList.unsafe_erase(headID); } return ErrorCode::Success; @@ -1130,7 +1130,7 @@ namespace SPTAG::SPANN { { { - std::unique_lock lock(m_mergeListLock); + std::unique_lock tmplock(m_mergeListLock); m_mergeList.unsafe_erase(headID); m_mergeList.unsafe_erase(queryResult->VID); } @@ -1152,7 +1152,7 @@ namespace SPTAG::SPANN { } CheckCentroid(headID, mergedPostingList, "MergePostings-GC"); { - std::unique_lock lock(m_mergeListLock); + std::unique_lock tmplock(m_mergeListLock); m_mergeList.unsafe_erase(headID); } return ErrorCode::Success; @@ -1188,7 +1188,7 @@ namespace SPTAG::SPANN { inline void MergeAsync(SizeType headID, std::function p_callback = nullptr) { { - std::shared_lock lock(m_mergeListLock); + std::shared_lock tmplock(m_mergeListLock); auto res = m_mergeList.insert(headID); if (!res.second) { @@ -1213,7 +1213,7 @@ namespace SPTAG::SPANN { ErrorCode CollectReAssign(ExtraWorkSpace *p_exWorkSpace, SizeType headID, std::shared_ptr headVec, std::vector &postingLists, std::vector &newHeadsID, std::vector> &newHeadsVec, - bool theSameHead) + bool theSameHead, bool requirelock) { auto headVector = reinterpret_cast(headVec->data() + m_metaDataSize); @@ -1370,7 +1370,7 @@ namespace SPTAG::SPANN { // Split -> CollectReAssign -> Append -> Split -> CollectReAssign -> ... for (auto& kv : batchReassign) { int count = static_cast(kv.second.size() / m_vectorInfoSize); - ErrorCode ret = Append(p_exWorkSpace, kv.first, count, kv.second, 0); + ErrorCode ret = Append(p_exWorkSpace, kv.first, count, kv.second, 0, requirelock || m_rwLocks.hash_func(kv.first) != m_rwLocks.hash_func(headID)); if (ret != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "BatchReassign Append failed for head %d, count %d\n", kv.first, count); } @@ -1424,7 +1424,7 @@ namespace SPTAG::SPANN { } - ErrorCode Append(ExtraWorkSpace* p_exWorkSpace, SizeType headID, int appendNum, std::string& appendPosting, int reassignThreshold = 0) + ErrorCode Append(ExtraWorkSpace* p_exWorkSpace, SizeType headID, int appendNum, std::string& appendPosting, int reassignThreshold = 0, bool requirelock = true) { auto appendBegin = std::chrono::high_resolution_clock::now(); if (appendPosting.empty()) { @@ -1458,7 +1458,8 @@ namespace SPTAG::SPANN { //std::shared_lock lock(m_rwLocks[headID]); //ROCKSDB // [DIAG] measure lock wait time (suspect A: lock contention) auto _lockBegin = std::chrono::high_resolution_clock::now(); - std::unique_lock lock(m_rwLocks[headID]); //SPDK + std::unique_lock lock(m_rwLocks[headID], std::defer_lock); //SPDK + if (requirelock) lock.lock(); auto _lockAcq = std::chrono::high_resolution_clock::now(); uint64_t _lockWaitUs = std::chrono::duration_cast(_lockAcq - _lockBegin).count(); IndexStats::HistAdd(m_stat.m_appendLockWaitUs, _lockWaitUs); @@ -1466,11 +1467,11 @@ namespace SPTAG::SPANN { ErrorCode ret; if (!m_headIndex->ContainSample(headID, m_layer + 1)) { - lock.unlock(); + if (requirelock) lock.unlock(); goto checkDeleted; } { - std::shared_lock lock(m_splitListLock); + std::shared_lock tmplock(m_splitListLock); auto it = m_splitList.find(headID); if (it != m_splitList.end()) { postingSize = it->second; @@ -1482,7 +1483,7 @@ namespace SPTAG::SPANN { ret = Split(p_exWorkSpace, headID, false); if (ret != ErrorCode::Success) SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); - lock.unlock(); + if (requirelock) lock.unlock(); goto checkDeleted; } @@ -1497,7 +1498,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); return ret; } - lock.unlock(); + if (requirelock) lock.unlock(); goto checkDeleted; } SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed for %lld! Posting Size:%d, limit: %d\n", (std::int64_t)headID, postingSize, m_postingSizeLimit); @@ -1518,7 +1519,7 @@ namespace SPTAG::SPANN { m_stat.m_appendPostingBytesTotal.fetch_add((uint64_t)postingSize, std::memory_order_relaxed); m_stat.m_appendRmwSampleCount.fetch_add(1, std::memory_order_relaxed); postingSize /= m_vectorInfoSize; - lock.unlock(); + if (requirelock) lock.unlock(); } if (postingSize > (m_postingSizeLimit + reassignThreshold)) { // SizeType VID = *(int*)(&appendPosting[0]); diff --git a/benchmark.ini b/benchmark.ini deleted file mode 100644 index e2b400767..000000000 --- a/benchmark.ini +++ /dev/null @@ -1,19 +0,0 @@ -[Benchmark] -VectorPath=sift1b/base.100M.u8bin -QueryPath=sift1b/query.public.10K.u8bin -TruthPath=none -IndexPath=proidx/spann_index -ValueType=UInt8 -Dimension=128 -BaseVectorCount=10000 -InsertVectorCount=10000 -DeleteVectorCount=0 -BatchNum=10 -TopK=5 -NumThreads=8 -NumQueries=100 -DistMethod=L2 -Rebuild=true -Resume=-1 -QuantizerFilePath=quantizer.bin -QuantizedDim=64 From 5869ea5e1dc47b966ef6d3e27f26c381bb07d88f Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Sat, 9 May 2026 09:43:44 +0000 Subject: [PATCH 17/26] make append to be async job in split --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 74 ++++++++++++++----- AnnService/inc/Helper/ThreadPool.h | 17 ++++- 2 files changed, 70 insertions(+), 21 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 9b40f8caa..7b041e2de 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -112,6 +112,33 @@ namespace SPTAG::SPANN { } }; + class AppendAsyncJob : public Helper::ThreadPool::Job + { + private: + ExtraDynamicSearcher* m_extraIndex; + SizeType m_headID; + std::shared_ptr m_vectorInfo; + std::function m_callback; + public: + AppendAsyncJob(ExtraDynamicSearcher* extraIndex, SizeType headID, std::shared_ptr vectorInfo, std::function p_callback) + : m_extraIndex(extraIndex), m_headID(headID), m_vectorInfo(std::move(vectorInfo)), m_callback(std::move(p_callback)) {} + + ~AppendAsyncJob() {} + inline void exec(IAbortOperation* p_abort) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot support job.exec(abort)!\n"); + } + inline void exec(void* p_workSpace, IAbortOperation* p_abort) override { + ErrorCode ret = m_extraIndex->Append((ExtraWorkSpace*)p_workSpace, m_headID, (int)(m_vectorInfo->size() / m_extraIndex->m_vectorInfoSize), *m_vectorInfo); + if (ret != ErrorCode::Success) + m_extraIndex->m_asyncStatus = ret; + m_extraIndex->m_appendJobsInFlight--; + m_extraIndex->m_totalAppendCompleted++; + if (m_callback != nullptr) { + m_callback(); + } + } + }; + class ReassignAsyncJob : public Helper::ThreadPool::Job { private: @@ -205,6 +232,9 @@ namespace SPTAG::SPANN { std::atomic_size_t m_totalMergeSubmitted{ 0 }; std::atomic_size_t m_totalMergeCompleted{ 0 }; + std::atomic_size_t m_appendJobsInFlight{ 0 }; + std::atomic_size_t m_totalAppendSubmitted{ 0 }; + std::atomic_size_t m_totalAppendCompleted{ 0 }; std::atomic_size_t m_totalAppendCount{ 0 }; std::atomic_size_t m_reassignJobsInFlight{ 0 }; @@ -218,6 +248,7 @@ namespace SPTAG::SPANN { size_t m_lastProgressLogQueueSize = std::numeric_limits::max(); size_t m_lastProgressLogSplit = std::numeric_limits::max(); size_t m_lastProgressLogMerge = std::numeric_limits::max(); + size_t m_lastProgressLogAppend = std::numeric_limits::max(); size_t m_lastProgressLogReassign = std::numeric_limits::max(); bool ShouldLogProgress(size_t totalJobs, bool force = false) { @@ -226,10 +257,12 @@ namespace SPTAG::SPANN { size_t splitJobs = m_splitJobsInFlight.load(); size_t mergeJobs = m_mergeJobsInFlight.load(); + size_t appendJobs = m_appendJobsInFlight.load(); size_t reassignJobs = m_reassignJobsInFlight.load(); bool queueChanged = (totalJobs != m_lastProgressLogQueueSize) || (splitJobs != m_lastProgressLogSplit) || (mergeJobs != m_lastProgressLogMerge) || + (appendJobs != m_lastProgressLogAppend) || (reassignJobs != m_lastProgressLogReassign); if (force) { @@ -237,6 +270,7 @@ namespace SPTAG::SPANN { m_lastProgressLogQueueSize = totalJobs; m_lastProgressLogSplit = splitJobs; m_lastProgressLogMerge = mergeJobs; + m_lastProgressLogAppend = appendJobs; m_lastProgressLogReassign = reassignJobs; return true; } @@ -251,6 +285,7 @@ namespace SPTAG::SPANN { m_lastProgressLogQueueSize = totalJobs; m_lastProgressLogSplit = splitJobs; m_lastProgressLogMerge = mergeJobs; + m_lastProgressLogAppend = appendJobs; m_lastProgressLogReassign = reassignJobs; } return shouldLog; @@ -894,14 +929,13 @@ namespace SPTAG::SPANN { } } } - if (requirelock) lock.unlock(); } m_stat.m_splitNum++; if (!m_opt->m_disableReassign) { auto reassignScanBegin = std::chrono::high_resolution_clock::now(); - CollectReAssign(p_exWorkSpace, headID, headVec, newPostingLists, newHeadsID, newHeadsVec, theSameHead, requirelock); + CollectReAssign(p_exWorkSpace, headID, headVec, newPostingLists, newHeadsID, newHeadsVec, theSameHead); auto reassignScanEnd = std::chrono::high_resolution_clock::now(); elapsedMSeconds = std::chrono::duration_cast(reassignScanEnd - reassignScanBegin).count(); @@ -1203,6 +1237,18 @@ namespace SPTAG::SPANN { m_splitThreadPool->add(curJob); } + inline void AppendAsync(SizeType headID, std::shared_ptr postingList, bool urgent = false,std::function p_callback = nullptr) + { + auto* curJob = new AppendAsyncJob(this, headID, std::move(postingList), p_callback); + m_appendJobsInFlight++; + m_totalAppendSubmitted++; + if (urgent) { + m_splitThreadPool->addfront(curJob); + } else { + m_splitThreadPool->add(curJob); + } + } + inline void ReassignAsync(std::shared_ptr vectorInfo, SizeType headPrev, std::function p_callback = nullptr) { auto* curJob = new ReassignAsyncJob(this, std::move(vectorInfo), headPrev, p_callback); @@ -1213,7 +1259,7 @@ namespace SPTAG::SPANN { ErrorCode CollectReAssign(ExtraWorkSpace *p_exWorkSpace, SizeType headID, std::shared_ptr headVec, std::vector &postingLists, std::vector &newHeadsID, std::vector> &newHeadsVec, - bool theSameHead, bool requirelock) + bool theSameHead) { auto headVector = reinterpret_cast(headVec->data() + m_metaDataSize); @@ -1369,11 +1415,7 @@ namespace SPTAG::SPANN { // SplitAsync (async) rather than synchronous Split, avoiding recursive deadlock: // Split -> CollectReAssign -> Append -> Split -> CollectReAssign -> ... for (auto& kv : batchReassign) { - int count = static_cast(kv.second.size() / m_vectorInfoSize); - ErrorCode ret = Append(p_exWorkSpace, kv.first, count, kv.second, 0, requirelock || m_rwLocks.hash_func(kv.first) != m_rwLocks.hash_func(headID)); - if (ret != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "BatchReassign Append failed for head %d, count %d\n", kv.first, count); - } + AppendAsync(kv.first, std::make_shared(kv.second), true); } if (batchReassignCount > 0) { m_totalReassignSubmitted += batchReassignCount; @@ -1424,7 +1466,7 @@ namespace SPTAG::SPANN { } - ErrorCode Append(ExtraWorkSpace* p_exWorkSpace, SizeType headID, int appendNum, std::string& appendPosting, int reassignThreshold = 0, bool requirelock = true) + ErrorCode Append(ExtraWorkSpace* p_exWorkSpace, SizeType headID, int appendNum, std::string& appendPosting, int reassignThreshold = 0) { auto appendBegin = std::chrono::high_resolution_clock::now(); if (appendPosting.empty()) { @@ -1458,8 +1500,7 @@ namespace SPTAG::SPANN { //std::shared_lock lock(m_rwLocks[headID]); //ROCKSDB // [DIAG] measure lock wait time (suspect A: lock contention) auto _lockBegin = std::chrono::high_resolution_clock::now(); - std::unique_lock lock(m_rwLocks[headID], std::defer_lock); //SPDK - if (requirelock) lock.lock(); + std::unique_lock lock(m_rwLocks[headID]); //SPDK auto _lockAcq = std::chrono::high_resolution_clock::now(); uint64_t _lockWaitUs = std::chrono::duration_cast(_lockAcq - _lockBegin).count(); IndexStats::HistAdd(m_stat.m_appendLockWaitUs, _lockWaitUs); @@ -1467,7 +1508,7 @@ namespace SPTAG::SPANN { ErrorCode ret; if (!m_headIndex->ContainSample(headID, m_layer + 1)) { - if (requirelock) lock.unlock(); + lock.unlock(); goto checkDeleted; } { @@ -1483,7 +1524,7 @@ namespace SPTAG::SPANN { ret = Split(p_exWorkSpace, headID, false); if (ret != ErrorCode::Success) SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); - if (requirelock) lock.unlock(); + lock.unlock(); goto checkDeleted; } @@ -1498,7 +1539,7 @@ namespace SPTAG::SPANN { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Split %lld failed!\n", (std::int64_t)headID); return ret; } - if (requirelock) lock.unlock(); + lock.unlock(); goto checkDeleted; } SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed for %lld! Posting Size:%d, limit: %d\n", (std::int64_t)headID, postingSize, m_postingSizeLimit); @@ -1519,7 +1560,6 @@ namespace SPTAG::SPANN { m_stat.m_appendPostingBytesTotal.fetch_add((uint64_t)postingSize, std::memory_order_relaxed); m_stat.m_appendRmwSampleCount.fetch_add(1, std::memory_order_relaxed); postingSize /= m_vectorInfoSize; - if (requirelock) lock.unlock(); } if (postingSize > (m_postingSizeLimit + reassignThreshold)) { // SizeType VID = *(int*)(&appendPosting[0]); @@ -2613,7 +2653,7 @@ namespace SPTAG::SPANN { double avgSplitMs = completed > 0 ? (m_totalSplitTimeUs.load() / 1000.0 / completed) : 0; double maxSplitMs = m_maxSplitTimeUs.load() / 1000.0; SPTAGLIB_LOG(Helper::LogLevel::LL_Info, - "layer %d pending queue:%zu split:%zu merge:%zu reassign:%zu running:%u | " + "layer %d pending queue:%zu split:%zu merge:%zu append:%zu reassign:%zu running:%u | " "total_submitted split:%zu merge:%zu reassign:%zu append:%zu | " "total_completed split:%zu merge:%zu reassign:%zu | " "split_latency avg:%.1fms max:%.1fms\n", @@ -2692,7 +2732,7 @@ namespace SPTAG::SPANN { size_t totalJobs = m_splitThreadPool ? m_splitThreadPool->jobsize() : 0; // if (!ShouldLogProgress(totalJobs)) return; SPTAGLIB_LOG(Helper::LogLevel::LL_Info, - "layer %d pending queue:%zu split:%zu merge:%zu reassign:%zu running:%u | " + "layer %d pending queue:%zu split:%zu merge:%zu append:%zu reassign:%zu running:%u | " "total_submitted split:%zu merge:%zu reassign:%zu append:%zu | " "total_completed split:%zu merge:%zu reassign:%zu | " "split_latency avg:%.1fms max:%.1fms\n", diff --git a/AnnService/inc/Helper/ThreadPool.h b/AnnService/inc/Helper/ThreadPool.h index 6aee44b30..01c82e2a7 100644 --- a/AnnService/inc/Helper/ThreadPool.h +++ b/AnnService/inc/Helper/ThreadPool.h @@ -5,7 +5,7 @@ #define _SPTAG_HELPER_THREADPOOL_H_ #include -#include +#include #include #include #include @@ -78,7 +78,16 @@ namespace SPTAG { { std::lock_guard lock(m_lock); - m_jobs.push(j); + m_jobs.push_back(j); + } + m_cond.notify_one(); + } + + void addfront(Job* j) + { + { + std::lock_guard lock(m_lock); + m_jobs.push_front(j); } m_cond.notify_one(); } @@ -90,7 +99,7 @@ namespace SPTAG if (!m_abort.ShouldAbort()) { j = m_jobs.front(); currentJobs++; - m_jobs.pop(); + m_jobs.pop_front(); return true; } return false; @@ -113,7 +122,7 @@ namespace SPTAG protected: std::atomic_uint32_t currentJobs{ 0 }; - std::queue m_jobs; + std::deque m_jobs; Abort m_abort; std::mutex m_lock; std::condition_variable m_cond; From b1e49d0bdbaec7b23fe9cdb3eb9ca0d133cf4fa7 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Tue, 12 May 2026 05:50:15 +0000 Subject: [PATCH 18/26] do splitAsync in Tikv --- AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h | 13 +++++++++---- Test/src/main.cpp | 7 +++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 7b041e2de..6db32439f 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -312,6 +312,7 @@ namespace SPTAG::SPANN { } // Initialize version map: TiKV-backed or local +#ifdef TIKV if (p_opt.m_storage == Storage::TIKVIO && p_opt.m_distributedVersionMap) { auto tikvMap = std::make_unique(); tikvMap->SetDB(db); @@ -322,7 +323,9 @@ namespace SPTAG::SPANN { m_versionMap = std::move(tikvMap); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Using distributed TiKV VersionMap (layer=%d, chunkSize=%d, cacheTTL=%d, cacheMax=%d)\n", layer, p_opt.m_versionChunkSize, p_opt.m_versionCacheTTLMs, p_opt.m_versionCacheMaxChunks); - } else { + } else +#endif + { m_versionMap = std::make_unique(); SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Using local in-memory VersionMap (layer=%d)\n", layer); } @@ -1519,7 +1522,7 @@ namespace SPTAG::SPANN { } } - if (postingSize + appendNum > (m_postingSizeLimit + m_bufferSizeLimit)) { + if (postingSize + appendNum > (m_postingSizeLimit + m_bufferSizeLimit) && m_opt->m_storage == Storage::FILEIO) { //SPTAGLIB_LOG(Helper::LogLevel::LL_Debug, "After appending, the number of vectors in %lld exceeds the postingsize + buffersize (%d + %d)! Do split now...\n", (std::int64_t)headID, m_postingSizeLimit, m_bufferSizeLimit); ret = Split(p_exWorkSpace, headID, false); if (ret != ErrorCode::Success) @@ -1804,10 +1807,11 @@ namespace SPTAG::SPANN { SearchStats* p_stats, std::set* truth, std::map>* found) override { // Use coprocessor search if enabled and storage is TiKV +#ifdef TIKV if (m_opt->m_useCoprocessorSearch && m_opt->m_storage == Storage::TIKVIO) { return SearchIndexWithCoprocessor(p_exWorkSpace, p_queryResults, p_stats, truth, found); } - +#endif if (p_stats) p_stats->m_exSetUpLatency = 0; COMMON::QueryResultSet& queryResults = *((COMMON::QueryResultSet*) & p_queryResults); @@ -1929,6 +1933,7 @@ namespace SPTAG::SPANN { return ErrorCode::Success; } +#ifdef TIKV // Coprocessor-based search: push distance computation into TiKV. // Instead of fetching raw posting data, sends the query vector and // posting keys to TiKV, which reads postings locally, computes L2 @@ -2027,7 +2032,7 @@ namespace SPTAG::SPANN { queryResults.SetScanned(listElements); return ErrorCode::Success; } - +#endif virtual ErrorCode SearchIndexWithoutParsing(ExtraWorkSpace* p_exWorkSpace) { int retry = 0; diff --git a/Test/src/main.cpp b/Test/src/main.cpp index ab8d1342c..c1a5cde60 100644 --- a/Test/src/main.cpp +++ b/Test/src/main.cpp @@ -7,7 +7,9 @@ #include #include +#ifdef TIKV #include +#endif using namespace boost::unit_test; @@ -36,8 +38,9 @@ struct GlobalFixture // adds GraphCycles bookkeeping under a global spinlock on every Lock(); // observed to consume ~12% CPU under high worker-thread parallelism in // gRPC client paths (perf-recorded 2026-05-06). - absl::SetMutexDeadlockDetectionMode(absl::OnDeadlockCycle::kIgnore); - +#ifdef TIKV + absl::SetMutexDeadlockDetectionMode(absl::OnDeadlockCycle::kIgnore); +#endif SPTAGVisitor visitor; traverse_test_tree(framework::master_test_suite(), visitor, false); } From 9f7b34ad58489d8c752ca0bdcac58e418b34de24 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Thu, 14 May 2026 07:24:22 +0000 Subject: [PATCH 19/26] fix log --- AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 6db32439f..eb80ea91f 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -2663,7 +2663,7 @@ namespace SPTAG::SPANN { "total_completed split:%zu merge:%zu reassign:%zu | " "split_latency avg:%.1fms max:%.1fms\n", m_layer, totalJobs, m_splitJobsInFlight.load(), - m_mergeJobsInFlight.load(), m_reassignJobsInFlight.load(), runningJobs, + m_mergeJobsInFlight.load(), m_appendJobsInFlight.load(), m_reassignJobsInFlight.load(), runningJobs, m_totalSplitSubmitted.load(), m_totalMergeSubmitted.load(), m_totalReassignSubmitted.load(), m_totalAppendCount.load(), m_totalSplitCompleted.load(), m_totalMergeCompleted.load(), m_totalReassignCompleted.load(), avgSplitMs, maxSplitMs); From d2ba6f2205f36b02c5f9a0814dedff585e203f44 Mon Sep 17 00:00:00 2001 From: zqxjjj <471902072@qq.com> Date: Mon, 18 May 2026 06:49:58 +0000 Subject: [PATCH 20/26] Fix TiKV batch append lock ordering --- .../inc/Core/SPANN/ExtraDynamicSearcher.h | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h index 1ca68cda4..fe3d306a1 100644 --- a/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h +++ b/AnnService/inc/Core/SPANN/ExtraDynamicSearcher.h @@ -1671,27 +1671,39 @@ namespace SPTAG::SPANN { std::vector keys; std::vector values; - for (auto& kv : headAppends) + std::vector sortedHeadIDs; + sortedHeadIDs.reserve(headAppends.size()); + for (const auto& kv : headAppends) sortedHeadIDs.push_back(kv.first); + std::sort(sortedHeadIDs.begin(), sortedHeadIDs.end()); + + std::vector> heldLocks; + heldLocks.reserve(sortedHeadIDs.size()); + + for (SizeType headID : sortedHeadIDs) { - m_rwLocks[kv.first].lock(); + auto appendIt = headAppends.find(headID); + if (appendIt == headAppends.end()) continue; + + std::unique_lock headLock(m_rwLocks[headID]); - if (!m_headIndex->ContainSample(kv.first, m_layer + 1)) { - m_rwLocks[kv.first].unlock(); - for (std::uint8_t* ptr = (std::uint8_t*)(kv.second.data()); - ptr < (std::uint8_t*)(kv.second.data() + kv.second.size()); + if (!m_headIndex->ContainSample(headID, m_layer + 1)) { + headLock.unlock(); + for (std::uint8_t* ptr = (std::uint8_t*)(appendIt->second.data()); + ptr < (std::uint8_t*)(appendIt->second.data() + appendIt->second.size()); ptr += m_vectorInfoSize) { SizeType VID = *(SizeType*)(ptr); uint8_t version = *(uint8_t*)(ptr + sizeof(SizeType)); if (m_versionMap->GetVersion(VID) == version) { m_stat.m_headMiss++; - ReassignAsync(std::make_shared((char*)ptr, m_vectorInfoSize), kv.first, true); + ReassignAsync(std::make_shared((char*)ptr, m_vectorInfoSize), headID, true); } } continue; } - keys.push_back(kv.first); - values.push_back(kv.second); + keys.push_back(headID); + values.push_back(appendIt->second); + heldLocks.emplace_back(std::move(headLock)); } if (keys.empty()) return ErrorCode::Success; @@ -1704,17 +1716,14 @@ namespace SPTAG::SPANN { *dbkeys, values, MaxTimeout, &(p_exWorkSpace->m_diskRequests), postingSizes)) != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "MultiMerge failed!\n"); - for (int i = 0; i < keys.size(); i++) { - m_rwLocks[keys[i]].unlock(); - } GetDBStats(); return ret; } auto appendIOEnd = std::chrono::high_resolution_clock::now(); auto appendIOSeconds = std::chrono::duration_cast(appendIOEnd - appendIOBegin).count(); - for (int i = 0; i < keys.size(); i++) { - m_rwLocks[keys[i]].unlock(); + for (size_t i = 0; i < keys.size(); i++) { + heldLocks[i].unlock(); int postingSize = postingSizes[i]; if (postingSize % m_vectorInfoSize != 0) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, @@ -2979,7 +2988,7 @@ namespace SPTAG::SPANN { "total_completed split:%zu merge:%zu reassign:%zu | " "split_latency avg:%.1fms max:%.1fms\n", m_layer, totalJobs, - m_splitJobsInFlight.load(), m_mergeJobsInFlight.load(), m_reassignJobsInFlight.load(), + m_splitJobsInFlight.load(), m_mergeJobsInFlight.load(), m_appendJobsInFlight.load(), m_reassignJobsInFlight.load(), m_splitThreadPool ? static_cast(m_splitThreadPool->runningJobs()) : 0, m_totalSplitSubmitted.load(), m_totalMergeSubmitted.load(), m_totalReassignSubmitted.load(), m_totalAppendCount.load(), m_totalSplitCompleted.load(), m_totalMergeCompleted.load(), m_totalReassignCompleted.load(), From 214457d03f5030516035ab20c5ca51c0189353f7 Mon Sep 17 00:00:00 2001 From: zqxjjj <471902072@qq.com> Date: Mon, 18 May 2026 07:50:00 +0000 Subject: [PATCH 21/26] Guard TiKV multi-chunk count fetch failures --- AnnService/inc/Core/SPANN/ExtraTiKVController.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/AnnService/inc/Core/SPANN/ExtraTiKVController.h b/AnnService/inc/Core/SPANN/ExtraTiKVController.h index 5bbb6642a..2d15eb6a5 100644 --- a/AnnService/inc/Core/SPANN/ExtraTiKVController.h +++ b/AnnService/inc/Core/SPANN/ExtraTiKVController.h @@ -1206,8 +1206,22 @@ namespace SPTAG::SPANN sizes.resize(keys.size()); if (m_useMultiChunkPosting) { std::vector fetchedCounts; - AsyncGetPostingCounts(keys, &fetchedCounts, + ErrorCode countRet = AsyncGetPostingCounts(keys, &fetchedCounts, std::chrono::microseconds(5000000)); + if (countRet != ErrorCode::Success || fetchedCounts.size() != keys.size()) { + if (fetchedCounts.size() != keys.size()) fetchedCounts.assign(keys.size(), -1); + for (size_t i = 0; i < keys.size(); i++) { + if (fetchedCounts[i] >= 0) continue; + fetchedCounts[i] = GetPostingCount(keys[i], MaxTimeout); + } + } + for (size_t i = 0; i < keys.size(); i++) { + if (fetchedCounts[i] < 0) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, + "TiKVIO::MultiMerge failed to fetch posting count headID=%d\n", keys[i]); + return ErrorCode::Fail; + } + } auto batch = std::make_shared(); batch->Add(static_cast(keys.size())); From e971283cb303e35767100bf797d2401d8ab99bd4 Mon Sep 17 00:00:00 2001 From: zqxjjj <471902072@qq.com> Date: Mon, 18 May 2026 08:06:09 +0000 Subject: [PATCH 22/26] Fail multi-chunk base writes on count update errors --- .../inc/Core/SPANN/ExtraTiKVController.h | 94 ++++++++----------- 1 file changed, 41 insertions(+), 53 deletions(-) diff --git a/AnnService/inc/Core/SPANN/ExtraTiKVController.h b/AnnService/inc/Core/SPANN/ExtraTiKVController.h index 2d15eb6a5..dc9424a1f 100644 --- a/AnnService/inc/Core/SPANN/ExtraTiKVController.h +++ b/AnnService/inc/Core/SPANN/ExtraTiKVController.h @@ -653,16 +653,12 @@ namespace SPTAG::SPANN SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "PutPostingToDB: DeletePosting failed for key %d\n", key); return delRet; } - auto ret = PutBaseChunk(key, value, timeout, reqs); + int count = static_cast(value.size()); + auto ret = PutBaseChunkAndCount(key, value, count, timeout, reqs); if (ret != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "PutPostingToDB: PutBaseChunk failed for key %d\n", key); + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "PutPostingToDB: PutBaseChunkAndCount failed for key %d\n", key); return ret; } - int count = static_cast(value.size()); - auto countRet = SetPostingCount(key, count, timeout); - if (countRet != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, "PutPostingToDB: SetPostingCount failed for key %d (data written OK)\n", key); - } if (m_postingCountCache) m_postingCountCache->Put(key, count); return ErrorCode::Success; } @@ -2258,8 +2254,10 @@ namespace SPTAG::SPANN // Same as PutChunkAndCount but writes the BASE chunk (no timestamp suffix). // Used by PutPostingToDB compaction path: replaces (overwrites) the base - // chunk and updates the count in a single RawBatchPut RPC. Saves one - // round trip vs separate PutBaseChunk + SetPostingCount. + // chunk and updates the count in a single RawBatchPut RPC. Do not fall + // back to separate writes here: count is required metadata for + // multi-chunk postings, so partial base/count updates must surface as + // failures instead of silently corrupting future append counts. ErrorCode PutBaseChunkAndCount(SizeType headID, const std::string& chunkValue, int newCount, @@ -2271,56 +2269,46 @@ namespace SPTAG::SPANN { auto stub = GetStubForKey(chunkKey); - if (stub) { - kvrpcpb::RawBatchPutRequest request; - SetContext(request.mutable_context(), chunkKey); + if (!stub) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, + "TiKVIO::PutBaseChunkAndCount missing TiKV stub headID=%d\n", headID); + return ErrorCode::Fail; + } - auto* p1 = request.add_pairs(); - p1->set_key(chunkKey); - p1->set_value(chunkValue); + kvrpcpb::RawBatchPutRequest request; + SetContext(request.mutable_context(), chunkKey); - auto* p2 = request.add_pairs(); - p2->set_key(countKey); - p2->set_value(countValue); + auto* p1 = request.add_pairs(); + p1->set_key(chunkKey); + p1->set_value(chunkValue); - kvrpcpb::RawBatchPutResponse response; - grpc::ClientContext ctx; - SetDeadline(ctx, timeout); + auto* p2 = request.add_pairs(); + p2->set_key(countKey); + p2->set_value(countValue); - auto status = stub->RawBatchPut(&ctx, request, &response); - if (status.ok() && !response.has_region_error() && response.error().empty()) { - return ErrorCode::Success; - } - if (!status.ok()) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, - "TiKVIO::PutBaseChunkAndCount BatchPut gRPC error headID=%d: %s, falling back\n", - headID, status.error_message().c_str()); - } else if (response.has_region_error()) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Info, - "TiKVIO::PutBaseChunkAndCount BatchPut region_error headID=%d, falling back\n", headID); - } else { - SPTAGLIB_LOG(Helper::LogLevel::LL_Error, - "TiKVIO::PutBaseChunkAndCount error: %s\n", response.error().c_str()); - } - InvalidateRegionCache(chunkKey); - InvalidateRegionCache(countKey); - } - } + kvrpcpb::RawBatchPutResponse response; + grpc::ClientContext ctx; + SetDeadline(ctx, timeout); - // Fallback: write chunk and count separately. - auto ret1 = RawPutWithRetry(chunkKey, chunkValue, timeout); - if (ret1 != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, - "TiKVIO::PutBaseChunkAndCount fallback: PutBaseChunk failed headID=%d\n", headID); - return ret1; - } - auto ret2 = RawPutWithRetry(countKey, countValue, timeout); - if (ret2 != ErrorCode::Success) { - SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, - "TiKVIO::PutBaseChunkAndCount fallback: PutCount failed headID=%d\n", headID); - return ret2; + auto status = stub->RawBatchPut(&ctx, request, &response); + if (status.ok() && !response.has_region_error() && response.error().empty()) { + return ErrorCode::Success; + } + if (!status.ok()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Warning, + "TiKVIO::PutBaseChunkAndCount BatchPut gRPC error headID=%d: %s\n", + headID, status.error_message().c_str()); + } else if (response.has_region_error()) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Info, + "TiKVIO::PutBaseChunkAndCount BatchPut region_error headID=%d\n", headID); + } else { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, + "TiKVIO::PutBaseChunkAndCount error: %s\n", response.error().c_str()); + } + InvalidateRegionCache(chunkKey); + InvalidateRegionCache(countKey); } - return ErrorCode::Success; + return ErrorCode::Fail; } // Multi-posting scan: read multiple postings in parallel. From 04428e530b0e28db2c38a380e1d5bedc32fefb07 Mon Sep 17 00:00:00 2001 From: Qianxi Zhang Date: Mon, 18 May 2026 16:14:24 +0800 Subject: [PATCH 23/26] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- AnnService/inc/Core/SPANN/ExtraTiKVController.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/AnnService/inc/Core/SPANN/ExtraTiKVController.h b/AnnService/inc/Core/SPANN/ExtraTiKVController.h index dc9424a1f..6dd9f2bda 100644 --- a/AnnService/inc/Core/SPANN/ExtraTiKVController.h +++ b/AnnService/inc/Core/SPANN/ExtraTiKVController.h @@ -805,7 +805,12 @@ namespace SPTAG::SPANN { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] Append using SINGLE-KEY Get+Put path (no multi-chunk)\n"); } std::string fullPosting; auto ret = Get(key, &fullPosting, MaxTimeout, reqs); - if (ret != ErrorCode::Success) fullPosting.clear(); + if (ret == ErrorCode::KeyNotFound) { + fullPosting.clear(); + } else if (ret != ErrorCode::Success) { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed to read existing posting for %lld before append.\n", (std::int64_t)key); + return ret; + } fullPosting.append(value); size = static_cast(fullPosting.size()); From ff9113536c37e58dad98607699b19e9e52182de1 Mon Sep 17 00:00:00 2001 From: Qianxi Zhang Date: Mon, 18 May 2026 16:15:48 +0800 Subject: [PATCH 24/26] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- AnnService/inc/Helper/KeyValueIO.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/AnnService/inc/Helper/KeyValueIO.h b/AnnService/inc/Helper/KeyValueIO.h index 210baf717..a7c3c25b8 100644 --- a/AnnService/inc/Helper/KeyValueIO.h +++ b/AnnService/inc/Helper/KeyValueIO.h @@ -40,6 +40,9 @@ namespace SPTAG virtual ErrorCode MultiMerge(const std::vector& keys, const std::vector& values, const std::chrono::microseconds& timeout, std::vector* reqs, std::vector& sizes) { + if (keys.size() != values.size()) { + return ErrorCode::Undefined; + } sizes.resize(keys.size()); for (size_t i = 0; i < keys.size(); i++) { auto err = Merge(keys[i], values[i], timeout, reqs, sizes[i]); From 36f1052b84a96e1da1fe2f4ef4692bc4257c0f31 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Mon, 18 May 2026 12:14:09 +0000 Subject: [PATCH 25/26] add distributed clustering and build --- AnnService/inc/Core/SPANN/Options.h | 1 + .../inc/Core/SPANN/ParameterDefinitionList.h | 1 + AnnService/src/BalancedDataPartition/main.cpp | 132 +++++++++++++----- AnnService/src/Core/SPANN/SPANNIndex.cpp | 5 +- 4 files changed, 104 insertions(+), 35 deletions(-) diff --git a/AnnService/inc/Core/SPANN/Options.h b/AnnService/inc/Core/SPANN/Options.h index 5c9c61a6f..2c9c8865e 100644 --- a/AnnService/inc/Core/SPANN/Options.h +++ b/AnnService/inc/Core/SPANN/Options.h @@ -34,6 +34,7 @@ namespace SPTAG { std::string m_truthPath; TruthFileType m_truthType; bool m_generateTruth; + std::string m_globalIDPath; std::string m_indexDirectory; std::string m_headIDFile; std::string m_headVectorFile; diff --git a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h index b96895e82..50823168d 100644 --- a/AnnService/inc/Core/SPANN/ParameterDefinitionList.h +++ b/AnnService/inc/Core/SPANN/ParameterDefinitionList.h @@ -23,6 +23,7 @@ DefineBasicParameter(m_warmupDelimiter, std::string, std::string("|"), "WarmupDe DefineBasicParameter(m_truthPath, std::string, std::string(""), "TruthPath") DefineBasicParameter(m_truthType, SPTAG::TruthFileType, SPTAG::TruthFileType::Undefined, "TruthType") DefineBasicParameter(m_generateTruth, bool, false, "GenerateTruth") // Mutable +DefineBasicParameter(m_globalIDPath, std::string, std::string(""), "GlobalIDPath") DefineBasicParameter(m_indexDirectory, std::string, std::string("SPANN"), "IndexDirectory") DefineBasicParameter(m_headIDFile, std::string, std::string("SPTAGHeadVectorIDs.bin"), "HeadVectorIDs") DefineBasicParameter(m_deleteIDFile, std::string, std::string("DeletedIDs.bin"), "DeletedIDs") diff --git a/AnnService/src/BalancedDataPartition/main.cpp b/AnnService/src/BalancedDataPartition/main.cpp index 886b10790..7bddc0ac8 100644 --- a/AnnService/src/BalancedDataPartition/main.cpp +++ b/AnnService/src/BalancedDataPartition/main.cpp @@ -21,6 +21,11 @@ using namespace SPTAG; } typedef short LabelType; +#ifndef LARGEVID +#define MPIVIDTYPE MPI_INT +#else +#define MPIVIDTYPE MPI_LONG_LONG +#endif class PartitionOptions : public Helper::ReaderOptions { @@ -36,6 +41,7 @@ class PartitionOptions : public Helper::ReaderOptions AddOptionalOption(m_distMethod, "-m", "--dist", "Distance method (L2 or Cosine)."); AddOptionalOption(m_outdir, "-o", "--outdir", "Output directory."); AddOptionalOption(m_weightfile, "-w", "--weight", "vector weight file."); + AddOptionalOption(m_gidfile, "-gid", "--gid", "global id file."); AddOptionalOption(m_wlambda, "-lw", "--wlambda", "lambda for balanced weight level."); AddOptionalOption(m_seed, "-e", "--seed", "Random seed."); AddOptionalOption(m_initIter, "-x", "--init", "Number of iterations for initialization."); @@ -82,6 +88,7 @@ class PartitionOptions : public Helper::ReaderOptions std::string m_outfile = "vectors.bin"; std::string m_outmetafile = "meta.bin"; std::string m_outmetaindexfile = "metaindex.bin"; + std::string m_gidfile = "-"; std::string m_weightfile = "-"; std::string m_stage = "Clustering"; std::string m_status = "."; @@ -495,6 +502,19 @@ template void Process(MPI_Datatype type) win.read((char *)weights.data(), sizeof(float) * rows); win.close(); } + + std::shared_ptr> globalids = nullptr; + if (options.m_gidfile.compare("-") != 0) + { + options.m_gidfile = Helper::StrUtils::ReplaceAll(options.m_gidfile, "*", std::to_string(rank)); + globalids = std::make_shared>(); + if (ErrorCode::Success != globalids->Load(options.m_gidfile, 1024 * 1024, vectors->Count() + 1)) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Rank %d failed to read global ID file %s.\n", rank, + options.m_gidfile.c_str()); + exit(1); + } + } COMMON::Dataset data(vectors->Count(), vectors->Dimension(), 1024 * 1024, vectors->Count() + 1, (T *)vectors->GetData()); COMMON::KmeansArgs args(options.m_clusterNum, vectors->Dimension(), vectors->Count(), options.m_threadNum, @@ -664,7 +684,8 @@ template void Process(MPI_Datatype type) std::string metafile = options.m_outdir + "/" + options.m_outmetafile + "." + std::to_string(i); std::string metaindexfile = options.m_outdir + "/" + options.m_outmetaindexfile + "." + std::to_string(i); - std::shared_ptr out = f_createIO(), metaout = f_createIO(), metaindexout = f_createIO(); + std::string gidfile = options.m_outdir + "/" + options.m_gidfile + "." + std::to_string(i); + std::shared_ptr out = f_createIO(), metaout = f_createIO(), metaindexout = f_createIO(), gidout = f_createIO(); if (out == nullptr || !out->Initialize(vecfile.c_str(), std::ios::binary | std::ios::out)) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot open %s to write.\n", vecfile.c_str()); @@ -681,12 +702,18 @@ template void Process(MPI_Datatype type) SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot open %s to write.\n", metaindexfile.c_str()); exit(1); } - - CHECKIO(out, WriteBinary, sizeof(int), (char *)(&args.counts[i])); - CHECKIO(out, WriteBinary, sizeof(int), (char *)(&args._D)); + if (globalids != nullptr && (gidout == nullptr || !gidout->Initialize(gidfile.c_str(), std::ios::binary | std::ios::out))) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot open %s to write.\n", gidfile.c_str()); + exit(1); + } + CHECKIO(out, WriteBinary, sizeof(SizeType), (char *)(&args.counts[i])); + CHECKIO(out, WriteBinary, sizeof(DimensionType), (char *)(&args._D)); if (metas != nullptr) - CHECKIO(metaindexout, WriteBinary, sizeof(int), (char *)(&args.counts[i])); - + CHECKIO(metaindexout, WriteBinary, sizeof(SizeType), (char *)(&args.counts[i])); + if (globalids != nullptr) { + CHECKIO(gidout, WriteBinary, sizeof(SizeType), (char *)(&args.counts[i])); + } std::uint64_t offset = 0; T *recvbuf = args.newTCenters; int recvmetabuflen = 200; @@ -696,9 +723,9 @@ template void Process(MPI_Datatype type) uint64_t offset_before = offset; if (j != rank) { - int recv = 0; - MPI_Recv(&recv, 1, MPI_INT, j, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - for (int k = 0; k < recv; k++) + SizeType recv = 0; + MPI_Recv(&recv, 1, MPIVIDTYPE, j, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + for (SizeType k = 0; k < recv; k++) { MPI_Recv(recvbuf, args._D, type, j, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE); CHECKIO(out, WriteBinary, sizeof(T) * args._D, (char *)recvbuf); @@ -719,6 +746,12 @@ template void Process(MPI_Datatype type) CHECKIO(metaindexout, WriteBinary, sizeof(std::uint64_t), (char *)(&offset)); offset += len; } + if (globalids != nullptr) + { + SizeType gid; + MPI_Recv(&gid, 1, MPIVIDTYPE, j, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + CHECKIO(gidout, WriteBinary, sizeof(SizeType), (char *)(&gid)); + } } SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "rank %d <- rank %d: %d vectors, %llu bytes meta\n", rank, j, recv, (offset - offset_before)); @@ -726,9 +759,9 @@ template void Process(MPI_Datatype type) else { size_t total_rec = 0; - for (int k = 0; k < data.R(); k++) + for (SizeType k = 0; k < data.R(); k++) { - for (int kk = 0; kk < label.C(); kk++) + for (DimensionType kk = 0; kk < label.C(); kk++) { if (label[k][kk] == (LabelType)i) { @@ -740,6 +773,10 @@ template void Process(MPI_Datatype type) CHECKIO(metaindexout, WriteBinary, sizeof(std::uint64_t), (char *)(&offset)); offset += meta.Length(); } + if (globalids != nullptr) { + SizeType gid = *((*globalids)[localindices[k]]); + CHECKIO(gidout, WriteBinary, sizeof(SizeType), (char *)(&gid)); + } total_rec++; } } @@ -754,16 +791,17 @@ template void Process(MPI_Datatype type) out->ShutDown(); metaout->ShutDown(); metaindexout->ShutDown(); + if (globalids != nullptr) gidout->ShutDown(); } else { int dest = i % size; - MPI_Send(&args.newCounts[i], 1, MPI_INT, dest, 0, MPI_COMM_WORLD); + MPI_Send(&args.newCounts[i], 1, MPIVIDTYPE, dest, 0, MPI_COMM_WORLD); size_t total_len = 0; size_t total_rec = 0; - for (int j = 0; j < data.R(); j++) + for (SizeType j = 0; j < data.R(); j++) { - for (int kk = 0; kk < label.C(); kk++) + for (DimensionType kk = 0; kk < label.C(); kk++) { if (label[j][kk] == (LabelType)i) { @@ -776,6 +814,10 @@ template void Process(MPI_Datatype type) MPI_Send(meta.Data(), len, MPI_CHAR, dest, 3, MPI_COMM_WORLD); total_len += len; } + if (globalids != nullptr) { + SizeType gid = *((*globalids)[localindices[j]]); + MPI_Send(&gid, 1, MPIVIDTYPE, dest, 4, MPI_COMM_WORLD); + } total_rec++; } } @@ -825,12 +867,12 @@ ErrorCode SyncSaveCenter(COMMON::KmeansArgs &args, int rank, int iteration, u CHECKIO(out, WriteBinary, sizeof(float) * args._K * args._D, (const char *)args.newCenters); if (assign) { - CHECKIO(out, WriteBinary, sizeof(int) * args._K, (const char *)args.counts); + CHECKIO(out, WriteBinary, sizeof(SizeType) * args._K, (const char *)args.counts); CHECKIO(out, WriteBinary, sizeof(float) * args._K, (const char *)args.weightedCounts); } else { - CHECKIO(out, WriteBinary, sizeof(int) * args._K, (const char *)args.newCounts); + CHECKIO(out, WriteBinary, sizeof(SizeType) * args._K, (const char *)args.newCounts); CHECKIO(out, WriteBinary, sizeof(float) * args._K, (const char *)args.newWeightedCounts); } out->ShutDown(); @@ -898,7 +940,7 @@ ErrorCode SyncLoadCenter(COMMON::KmeansArgs &args, int rank, int iteration, u } memset(args.newCenters, 0, sizeof(float) * args._K * args._D); - memset(args.counts, 0, sizeof(int) * args._K); + memset(args.counts, 0, sizeof(SizeType) * args._K); memset(args.weightedCounts, 0, sizeof(float) * args._K); std::unique_ptr buf(new char[sizeof(float) * args._K * args._D]); unsigned long long localCount; @@ -926,10 +968,10 @@ ErrorCode SyncLoadCenter(COMMON::KmeansArgs &args, int rank, int iteration, u for (int i = 0; i < args._K * args._D; i++) args.newCenters[i] += *((float *)(buf.get()) + i); - CHECKIO(input, ReadBinary, sizeof(int) * args._K, buf.get()); + CHECKIO(input, ReadBinary, sizeof(SizeType) * args._K, buf.get()); for (int i = 0; i < args._K; i++) { - int partsize = *((int *)(buf.get()) + i); + SizeType partsize = *((SizeType *)(buf.get()) + i); if (partsize >= 0 && args.counts[i] <= MaxSize - partsize) args.counts[i] += partsize; else @@ -1181,7 +1223,16 @@ template void Partition() SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read labels.\n"); exit(1); } - + std::shared_ptr> globalids = nullptr; + if (options.m_gidfile.compare("-") != 0) + { + globalids = std::make_shared>(); + if (ErrorCode::Success != globalids->Load(options.m_gidfile, 1024 * 1024, vectors->Count() + 1)) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Failed to read global ID file %s.\n", options.m_gidfile.c_str()); + exit(1); + } + } std::string taskId = options.m_labels.substr(options.m_labels.rfind(".") + 1); for (int i = 0; i < options.m_clusterNum; i++) { @@ -1189,7 +1240,8 @@ template void Partition() std::string metafile = options.m_outdir + "/" + options.m_outmetafile + "." + taskId + "." + std::to_string(i); std::string metaindexfile = options.m_outdir + "/" + options.m_outmetaindexfile + "." + taskId + "." + std::to_string(i); - std::shared_ptr out = f_createIO(), metaout = f_createIO(), metaindexout = f_createIO(); + std::string gidfile = options.m_outdir + "/" + options.m_gidfile + "." + taskId + "." + std::to_string(i); + std::shared_ptr out = f_createIO(), metaout = f_createIO(), metaindexout = f_createIO(), gidout = f_createIO(); if (out == nullptr || !out->Initialize(vecfile.c_str(), std::ios::binary | std::ios::out)) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot open %s to write.\n", vecfile.c_str()); @@ -1206,18 +1258,25 @@ template void Partition() SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot open %s to write.\n", metaindexfile.c_str()); exit(1); } - - int rows = data.R(), cols = data.C(); - CHECKIO(out, WriteBinary, sizeof(int), (char *)(&rows)); - CHECKIO(out, WriteBinary, sizeof(int), (char *)(&cols)); + if (globalids != nullptr && (gidout == nullptr || !gidout->Initialize(gidfile.c_str(), std::ios::binary | std::ios::out))) + { + SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Cannot open %s to write.\n", gidfile.c_str()); + exit(1); + } + SizeType rows = data.R(); + DimensionType cols = data.C(); + CHECKIO(out, WriteBinary, sizeof(SizeType), (char *)(&rows)); + CHECKIO(out, WriteBinary, sizeof(DimensionType), (char *)(&cols)); if (metas != nullptr) - CHECKIO(metaindexout, WriteBinary, sizeof(int), (char *)(&rows)); - + CHECKIO(metaindexout, WriteBinary, sizeof(SizeType), (char *)(&rows)); + if (globalids != nullptr) { + CHECKIO(gidout, WriteBinary, sizeof(SizeType), (char *)(&rows)); + } std::uint64_t offset = 0; - int records = 0; - for (int k = 0; k < data.R(); k++) + SizeType records = 0; + for (SizeType k = 0; k < data.R(); k++) { - for (int kk = 0; kk < label.C(); kk++) + for (DimensionType kk = 0; kk < label.C(); kk++) { if (label[k][kk] == (LabelType)i) { @@ -1229,6 +1288,10 @@ template void Partition() CHECKIO(metaindexout, WriteBinary, sizeof(std::uint64_t), (char *)(&offset)); offset += meta.Length(); } + if (globalids != nullptr) { + SizeType gid = *((*globalids)[k]); + CHECKIO(gidout, WriteBinary, sizeof(SizeType), (char *)(&gid)); + } records++; } } @@ -1238,12 +1301,15 @@ template void Partition() if (metas != nullptr) CHECKIO(metaindexout, WriteBinary, sizeof(std::uint64_t), (char *)(&offset)); - CHECKIO(out, WriteBinary, sizeof(int), (char *)(&records), 0); - CHECKIO(metaindexout, WriteBinary, sizeof(int), (char *)(&records), 0); - + CHECKIO(out, WriteBinary, sizeof(SizeType), (char *)(&records), 0); + CHECKIO(metaindexout, WriteBinary, sizeof(SizeType), (char *)(&records), 0); + if (globalids != nullptr) { + CHECKIO(gidout, WriteBinary, sizeof(SizeType), (char *)(&records), 0); + } out->ShutDown(); metaout->ShutDown(); metaindexout->ShutDown(); + if (globalids != nullptr) gidout->ShutDown(); } } diff --git a/AnnService/src/Core/SPANN/SPANNIndex.cpp b/AnnService/src/Core/SPANN/SPANNIndex.cpp index 47e033ed1..f3f83dca6 100644 --- a/AnnService/src/Core/SPANN/SPANNIndex.cpp +++ b/AnnService/src/Core/SPANN/SPANNIndex.cpp @@ -1101,13 +1101,14 @@ template ErrorCode Index::BuildIndexInternalLayer(std::shared_pt COMMON::Dataset localToGlobalID; { SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "Loading headIDFile for layer %d...\n", currentLayer - 1); + std::string localToGlobalIDPath = (currentLayer == 0)? m_options.m_globalIDPath : m_options.m_indexDirectory + FolderSep + m_options.m_headIDFile; std::shared_ptr ptr = SPTAG::f_createIO(); if (ptr == nullptr || - !ptr->Initialize((m_options.m_indexDirectory + FolderSep + m_options.m_headIDFile).c_str(), + !ptr->Initialize(localToGlobalIDPath.c_str(), std::ios::binary | std::ios::in)) { SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "No headIDFile file:%s\n", - (m_options.m_indexDirectory + FolderSep + m_options.m_headIDFile).c_str()); + localToGlobalIDPath.c_str()); } else { localToGlobalID.Load(ptr, this->m_iDataBlockSize, this->m_iDataCapacity); From 7db01dedfaa99b0659fdfcb44d833effe1cc5a34 Mon Sep 17 00:00:00 2001 From: zqxjjj <471902072@qq.com> Date: Tue, 19 May 2026 00:57:02 +0000 Subject: [PATCH 26/26] Fix TiKV Key_NotFound handling in append path --- AnnService/inc/Core/SPANN/ExtraTiKVController.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AnnService/inc/Core/SPANN/ExtraTiKVController.h b/AnnService/inc/Core/SPANN/ExtraTiKVController.h index 6dd9f2bda..d7528d479 100644 --- a/AnnService/inc/Core/SPANN/ExtraTiKVController.h +++ b/AnnService/inc/Core/SPANN/ExtraTiKVController.h @@ -805,7 +805,7 @@ namespace SPTAG::SPANN { static std::atomic _logOnce{0}; if (_logOnce.fetch_add(1) == 0) SPTAGLIB_LOG(Helper::LogLevel::LL_Info, "[PATH] Append using SINGLE-KEY Get+Put path (no multi-chunk)\n"); } std::string fullPosting; auto ret = Get(key, &fullPosting, MaxTimeout, reqs); - if (ret == ErrorCode::KeyNotFound) { + if (ret == ErrorCode::Key_NotFound) { fullPosting.clear(); } else if (ret != ErrorCode::Success) { SPTAGLIB_LOG(Helper::LogLevel::LL_Error, "Merge failed to read existing posting for %lld before append.\n", (std::int64_t)key);