Skip to content

Commit 3b2d489

Browse files
pbhandar2meta-codesync[bot]
authored andcommitted
Add edge-case tests for AccessTimeMap shm persistence
Summary: Added the following unit tests: - `RecoverMissingDataSegment`: info segment exists but data segment is gone — verifies the data `attachShm` exception path cleans up the orphaned info segment and leaves the map empty. - `RecoverDataSegmentTooSmall`: info segment claims more entries than the data segment can hold — verifies the `dataAddr.size < expectedSize` guard. - `RecoverCleansUpSegments`: verifies that after a successful recovery, both shm segments are removed so no stale segments leak across subsequent warm restarts. - `PersistOverwritesPriorSegments`: verifies that calling `persist()` twice replaces (not appends to) the prior segments. - `AccessTimeMapSurvivesWarmRoll`: verifies that NVM cache is able to persist and recover ATM on warm roll. Reviewed By: rlyerly Differential Revision: D95332178 fbshipit-source-id: c6121c3d7e8f0b1b58c7e3ea4974bdadbd5a1055
1 parent aad07a8 commit 3b2d489

2 files changed

Lines changed: 266 additions & 0 deletions

File tree

cachelib/allocator/nvmcache/tests/AccessTimeMapTest.cpp

Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,202 @@ TEST(AccessTimeMapTest, RecoverNoShm) {
422422
ShmManager::cleanup(shmDir, true);
423423
}
424424

425+
TEST(AccessTimeMapTest, RecoverMissingDataSegment) {
426+
const auto shmDir =
427+
"/tmp/atm_missing_data_test_" + std::to_string(::getpid());
428+
constexpr size_t kNumShards = 4;
429+
430+
// Populate and persist, then remove the data segment
431+
{
432+
ShmManager shm(shmDir, true);
433+
AccessTimeMap m(kNumShards);
434+
435+
m.set(10, 100);
436+
m.set(20, 200);
437+
m.persist(shm);
438+
439+
// Remove only the data segment, leaving the info segment intact
440+
shm.removeShm(std::string(AccessTimeMap::kShmDataName));
441+
shm.shutDown();
442+
}
443+
444+
// Recovery should detect missing data segment and start empty
445+
{
446+
ShmManager shm(shmDir, true);
447+
AccessTimeMap m(kNumShards);
448+
m.recover(shm);
449+
EXPECT_EQ(m.size(), 0);
450+
shm.shutDown();
451+
}
452+
453+
ShmManager::cleanup(shmDir, true);
454+
}
455+
456+
TEST(AccessTimeMapTest, RecoverDataSegmentTooSmall) {
457+
const auto shmDir = "/tmp/atm_small_data_test_" + std::to_string(::getpid());
458+
constexpr size_t kNumShards = 4;
459+
460+
// Populate and persist, then overwrite the info segment to claim more
461+
// entries than the data segment can hold. The data segment created by
462+
// persist() for 3 entries is page-aligned to 4096 bytes. Setting
463+
// entryCount to 500 makes expectedSize = 500 * sizeof(ShmEntry) = 6000,
464+
// which exceeds the 4096-byte data segment.
465+
{
466+
ShmManager shm(shmDir, true);
467+
AccessTimeMap m(kNumShards);
468+
469+
m.set(10, 100);
470+
m.set(20, 200);
471+
m.set(30, 300);
472+
m.persist(shm);
473+
474+
// Overwrite the info segment with an inflated entryCount
475+
shm.removeShm(std::string(AccessTimeMap::kShmInfoName));
476+
navy::serialization::AccessTimeMapConfig cfg;
477+
*cfg.version() = static_cast<int32_t>(AccessTimeMap::kVersion);
478+
*cfg.numShards() = static_cast<int64_t>(kNumShards);
479+
*cfg.maxSize() = 0;
480+
*cfg.entryCount() = 500; // data segment is only 4096 bytes
481+
482+
auto ioBuf = Serializer::serializeToIOBuf(cfg);
483+
auto infoAddr = shm.createShm(std::string(AccessTimeMap::kShmInfoName),
484+
ioBuf->length());
485+
Serializer serializer(
486+
reinterpret_cast<uint8_t*>(infoAddr.addr),
487+
reinterpret_cast<uint8_t*>(infoAddr.addr) + ioBuf->length());
488+
serializer.writeToBuffer(std::move(ioBuf));
489+
490+
shm.shutDown();
491+
}
492+
493+
// Recovery should detect size mismatch and start empty
494+
{
495+
ShmManager shm(shmDir, true);
496+
AccessTimeMap m(kNumShards);
497+
m.recover(shm);
498+
EXPECT_EQ(m.size(), 0);
499+
shm.shutDown();
500+
}
501+
502+
ShmManager::cleanup(shmDir, true);
503+
}
504+
505+
TEST(AccessTimeMapTest, RecoverCleansUpSegments) {
506+
const auto shmDir = "/tmp/atm_cleanup_test_" + std::to_string(::getpid());
507+
constexpr size_t kNumShards = 4;
508+
constexpr size_t kMaxSize = 1000;
509+
510+
// Populate and persist
511+
{
512+
ShmManager shm(shmDir, true);
513+
AccessTimeMap m(kNumShards, kMaxSize);
514+
515+
m.set(10, 100);
516+
m.set(20, 200);
517+
m.persist(shm);
518+
shm.shutDown();
519+
}
520+
521+
// Recover and verify entries loaded, then check segments are gone
522+
{
523+
ShmManager shm(shmDir, true);
524+
AccessTimeMap m(kNumShards, kMaxSize);
525+
m.recover(shm);
526+
ASSERT_EQ(m.size(), 2);
527+
EXPECT_EQ(m.get(10), 100);
528+
EXPECT_EQ(m.get(20), 200);
529+
530+
// Segments should have been removed after successful recovery
531+
EXPECT_THROW(shm.attachShm(std::string(AccessTimeMap::kShmInfoName)),
532+
std::invalid_argument);
533+
EXPECT_THROW(shm.attachShm(std::string(AccessTimeMap::kShmDataName)),
534+
std::invalid_argument);
535+
536+
shm.shutDown();
537+
}
538+
539+
ShmManager::cleanup(shmDir, true);
540+
}
541+
542+
TEST(AccessTimeMapTest, PersistOverwritesPriorSegments) {
543+
const auto shmDir = "/tmp/atm_overwrite_test_" + std::to_string(::getpid());
544+
constexpr size_t kNumShards = 4;
545+
546+
{
547+
ShmManager shm(shmDir, true);
548+
AccessTimeMap m(kNumShards);
549+
550+
// First persist with two entries
551+
m.set(1, 100);
552+
m.set(2, 200);
553+
m.persist(shm);
554+
555+
// Clear the map and add a different entry
556+
m.remove(1);
557+
m.remove(2);
558+
m.set(3, 300);
559+
ASSERT_EQ(m.size(), 1);
560+
561+
// Second persist should replace the prior segments
562+
m.persist(shm);
563+
shm.shutDown();
564+
}
565+
566+
// Recover should only see the second persist's data
567+
{
568+
ShmManager shm(shmDir, true);
569+
AccessTimeMap m(kNumShards);
570+
m.recover(shm);
571+
ASSERT_EQ(m.size(), 1);
572+
EXPECT_EQ(m.get(1), std::nullopt);
573+
EXPECT_EQ(m.get(2), std::nullopt);
574+
EXPECT_EQ(m.get(3), 300);
575+
shm.shutDown();
576+
}
577+
578+
ShmManager::cleanup(shmDir, true);
579+
}
580+
581+
TEST(AccessTimeMapTest, RecoverZeroEntryCount) {
582+
const auto shmDir = "/tmp/atm_zero_count_test_" + std::to_string(::getpid());
583+
constexpr size_t kNumShards = 4;
584+
585+
// Create an info segment with valid version/numShards but entryCount = 0.
586+
// This exercises the entryCount == 0 branch in recover(), which is NOT
587+
// reached by PersistEmptyMap (persist() returns early for empty maps
588+
// without creating segments, so recovery hits the "no shm" path instead).
589+
{
590+
ShmManager shm(shmDir, true);
591+
592+
navy::serialization::AccessTimeMapConfig cfg;
593+
*cfg.version() = static_cast<int32_t>(AccessTimeMap::kVersion);
594+
*cfg.numShards() = static_cast<int64_t>(kNumShards);
595+
*cfg.maxSize() = 0;
596+
*cfg.entryCount() = 0;
597+
598+
auto ioBuf = Serializer::serializeToIOBuf(cfg);
599+
auto infoAddr = shm.createShm(std::string(AccessTimeMap::kShmInfoName),
600+
ioBuf->length());
601+
Serializer serializer(
602+
reinterpret_cast<uint8_t*>(infoAddr.addr),
603+
reinterpret_cast<uint8_t*>(infoAddr.addr) + ioBuf->length());
604+
serializer.writeToBuffer(std::move(ioBuf));
605+
606+
shm.shutDown();
607+
}
608+
609+
// Recovery should hit the entryCount == 0 branch and start empty
610+
{
611+
ShmManager shm(shmDir, true);
612+
AccessTimeMap m(kNumShards);
613+
m.recover(shm);
614+
EXPECT_EQ(m.size(), 0);
615+
shm.shutDown();
616+
}
617+
618+
ShmManager::cleanup(shmDir, true);
619+
}
620+
425621
} // namespace tests
426622
} // namespace cachelib
427623
} // namespace facebook

cachelib/allocator/nvmcache/tests/NvmCacheTests.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3193,6 +3193,76 @@ TEST_F(NvmCacheTest, AccessTimeMapCleanupTest) {
31933193
expectAtmCleared(groups[3], "NVM-evicted item");
31943194
}
31953195

3196+
TEST_F(NvmCacheTest, AccessTimeMapSurvivesWarmRoll) {
3197+
// Verify that ATM entries persist across a warm restart cycle:
3198+
// shutDown() persists ATM → SharedMemAttach recovers ATM
3199+
this->convertToShmCache();
3200+
auto& nvm = this->cache();
3201+
auto pid = this->poolId();
3202+
const uint32_t allocSize = 15 * 1024;
3203+
const uint32_t numKeysPerRegion =
3204+
config_.blockCache().getRegionSize() / allocSize;
3205+
3206+
// Insert a target item and push it to NVM.
3207+
std::string targetKey = "atm_warm_roll_target";
3208+
{
3209+
auto it = nvm.allocate(pid, targetKey, allocSize);
3210+
ASSERT_NE(nullptr, it);
3211+
cache_->insertOrReplace(it);
3212+
}
3213+
ASSERT_TRUE(this->pushToNvmCacheFromRamForTesting(targetKey));
3214+
nvm.flushNvmCache();
3215+
this->removeFromRamForTesting(targetKey);
3216+
3217+
// Promote target from NVM (making it NvmClean in DRAM).
3218+
{
3219+
auto hdl = this->fetch(targetKey, false /* ramOnly */);
3220+
ASSERT_NE(nullptr, hdl);
3221+
ASSERT_TRUE(hdl->isNvmClean());
3222+
}
3223+
3224+
HashedKey targetHk{targetKey};
3225+
auto* atm = this->getAccessTimeMap();
3226+
ASSERT_NE(nullptr, atm);
3227+
3228+
// ATM should be empty after promotion (entry is created on eviction).
3229+
EXPECT_EQ(std::nullopt, atm->get(targetHk.keyHash()));
3230+
3231+
// Insert filler items to trigger DRAM eviction of the NvmClean target.
3232+
auto timeBefore = util::getCurrentTimeSec();
3233+
auto evictBefore = this->evictionCount();
3234+
for (int i = 0; i < 1024; i++) {
3235+
auto key = folly::sformat("atm_warm_roll_filler_{}", i);
3236+
auto it = nvm.allocate(pid, key, allocSize);
3237+
ASSERT_NE(nullptr, it);
3238+
cache_->insertOrReplace(it);
3239+
if (i % numKeysPerRegion == 0) {
3240+
nvm.flushNvmCache();
3241+
}
3242+
}
3243+
nvm.flushNvmCache();
3244+
ASSERT_LT(evictBefore, this->evictionCount());
3245+
3246+
// The target was NvmClean when evicted, so ATM should have its entry.
3247+
auto accessTime = atm->get(targetHk.keyHash());
3248+
ASSERT_NE(std::nullopt, accessTime);
3249+
EXPECT_GE(*accessTime, timeBefore);
3250+
auto savedTime = *accessTime;
3251+
3252+
// Warm restart: shutDown() persists ATM, SharedMemAttach recovers it.
3253+
this->warmRoll();
3254+
3255+
// Re-acquire the ATM pointer (NvmCache instance is recreated).
3256+
auto* atmAfter = this->getAccessTimeMap();
3257+
ASSERT_NE(nullptr, atmAfter);
3258+
3259+
// Verify the entry survived with the same timestamp.
3260+
auto recoveredTime = atmAfter->get(targetHk.keyHash());
3261+
ASSERT_NE(std::nullopt, recoveredTime)
3262+
<< "ATM entry missing after warm restart";
3263+
EXPECT_EQ(savedTime, *recoveredTime);
3264+
}
3265+
31963266
} // namespace tests
31973267
} // namespace cachelib
31983268
} // namespace facebook

0 commit comments

Comments
 (0)