From c6c332b8594b8c14152ee7436f379f8d30184486 Mon Sep 17 00:00:00 2001
From: krazer <caseymcc@krazer.net>
Date: Sat, 28 Mar 2026 11:53:01 -0400
Subject: [PATCH] add support for storage mtg

---
 .github/copilot-instructions.md   |  10 +
 CMakeLists.txt                    |   3 +
 docs/server.md                    | 315 ++++++++++-
 src/arbiterAI/arbiterAI.cpp       |   6 +
 src/arbiterAI/arbiterAI.h         |   3 +-
 src/arbiterAI/modelDownloader.cpp |  97 +++-
 src/arbiterAI/modelDownloader.h   |  46 +-
 src/arbiterAI/modelRuntime.cpp    |  55 +-
 src/arbiterAI/modelRuntime.h      |   9 +-
 src/arbiterAI/storageManager.cpp  | 862 ++++++++++++++++++++++++++++++
 src/arbiterAI/storageManager.h    | 196 +++++++
 src/server/dashboard.h            | 297 ++++++++++
 src/server/main.cpp               |  69 +++
 src/server/routes.cpp             | 571 +++++++++++++++++++-
 src/server/routes.h               |  18 +
 tests/storageManagerTests.cpp     | 570 ++++++++++++++++++++
 16 files changed, 3107 insertions(+), 20 deletions(-)
 create mode 100644 src/arbiterAI/storageManager.cpp
 create mode 100644 src/arbiterAI/storageManager.h
 create mode 100644 tests/storageManagerTests.cpp

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index 92759f1..ca54224 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -17,6 +17,16 @@ A C++17 library providing a unified interface for multiple LLM providers.
 - **Tests:** Google Test — `./build/linux_x64_debug/arbiterai_tests` (must be run inside Docker)
 - **Language-specific formatting rules** are in `.github/instructions/`
 
+## Important Rules
+
+1. **All commands** must go through `./runDocker.sh ...`.
+2. **All development** (building, testing, running) must be done inside the Docker container. The host environment is not guaranteed to have the correct tools or dependencies.
+3. **Do not** use `python`, `pip`, `pytest` — the host may not have the correct Python version or dependencies.
+4. **Do not** create or use a virtualenv on the host. The container is the virtualenv.
+5. The project source is **bind-mounted** at `/app` inside the container. Edits to files on the host are immediately visible inside the container.
+6. If you change the `Dockerfile`, run `./runDocker.sh --rebuild`.
+7. Don't launch the server, ask the user to launch so that its not running in the agents terminal.
+
 ## Active Tasks
 
 - **[docs/tasks/local_model_management.md](../docs/tasks/local_model_management.md)** — Plan for llama.cpp local model management (hardware detection, model swapping, telemetry, standalone server)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e14de19..38e6bea 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -71,6 +71,8 @@ set(arbiterai_src
     ./src/arbiterAI/modelRuntime.cpp
     ./src/arbiterAI/telemetryCollector.h
     ./src/arbiterAI/telemetryCollector.cpp
+    ./src/arbiterAI/storageManager.h
+    ./src/arbiterAI/storageManager.cpp
     ./src/arbiterAI/providers/baseProvider.h
     ./src/arbiterAI/providers/baseProvider.cpp
     ./src/arbiterAI/providers/openai.h
@@ -136,6 +138,7 @@ target_link_libraries(arbiterai
         tests/modelRuntimeTests.cpp
         tests/telemetryCollectorTests.cpp
         tests/llamaProviderTests.cpp
+        tests/storageManagerTests.cpp
     )
     
     target_link_libraries(arbiterai_tests
diff --git a/docs/server.md b/docs/server.md
index fccf1db..99f025c 100644
--- a/docs/server.md
+++ b/docs/server.md
@@ -11,8 +11,9 @@ Standalone HTTP server that wraps the ArbiterAI library, providing an OpenAI-com
    - [Model Management](#32-model-management)
    - [Model Config Injection](#33-model-config-injection)
    - [Telemetry](#34-telemetry)
-   - [Health & Version](#35-health--version)
-   - [Dashboard](#36-dashboard)
+   - [Storage Management](#35-storage-management)
+   - [Health & Version](#36-health--version)
+   - [Dashboard](#37-dashboard)
 4. [Configuration Persistence](#4-configuration-persistence)
 5. [Error Format](#5-error-format)
 
@@ -28,8 +29,9 @@ The server supports:
 - **Streaming** — Server-Sent Events (SSE) for real-time token delivery
 - **Model lifecycle management** — Load, unload, pin, and download models at runtime
 - **Runtime model config injection** — Add, update, or remove model configurations via REST without restarting
+- **Storage management** — Track downloaded model files, set hot ready / protected flags, configure automated cleanup, monitor disk usage and download progress with speed and ETA
 - **Telemetry** — System snapshots, inference history, swap history, and hardware info
-- **Live dashboard** — Browser-based UI at `/dashboard`
+- **Live dashboard** — Browser-based UI at `/dashboard` with storage bar, download progress, and model management
 - **CORS** — All responses include permissive CORS headers
 
 ---
@@ -52,6 +54,11 @@ The server supports:
 | `-v, --variant` | *(none)* | Default quantization variant (e.g., `Q4_K_M`) |
 | `--override-path` | *(none)* | Path to write runtime model config overrides (enables persistence) |
 | `--ram-budget` | `0` (auto 50%) | Ready-model RAM budget in MB |
+| `--models-dir` | `/models` | Directory where downloaded model files are stored |
+| `--storage-limit` | `0` (unlimited) | Maximum storage for model files (e.g., `50G`, `500M`). `0` = use all free disk space. |
+| `--cleanup-enabled` | `true` | Enable automated storage cleanup |
+| `--cleanup-max-age` | `720` | Max age in hours before a variant becomes a cleanup candidate (default: 30 days) |
+| `--cleanup-interval` | `24` | Hours between automated cleanup runs |
 | `--log-level` | `info` | Log level (`trace`, `debug`, `info`, `warn`, `error`) |
 | `-h, --help` | | Print usage |
 
@@ -69,6 +76,9 @@ The server supports:
 
 # Load a local model with a specific variant
 ./arbiterAI-server -m qwen2.5-7b-instruct -v Q4_K_M --ram-budget 8192
+
+# Limit model storage to 50 GB with cleanup every 12 hours
+./arbiterAI-server --models-dir /data/models --storage-limit 50G --cleanup-interval 12
 ```
 
 ---
@@ -328,6 +338,8 @@ Load a model into VRAM for inference.
 
 **Response (202):** `{"status": "downloading", "model": "qwen2.5-7b-instruct"}` — model file is being downloaded.
 
+**Response (507):** Insufficient storage — the model file won't fit within the configured storage limit. Includes `available_bytes`, `required_bytes`, and `storage_limit_bytes` for programmatic decision-making.
+
 #### `POST /api/models/:name/unload`
 
 Unload a model from VRAM. Pinned models move to `Ready` state instead.
@@ -354,16 +366,32 @@ Initiate a model download. Query parameter `variant` selects the quantization va
 
 **Response (202):** `{"status": "downloading", "model": "..."}` — download started.
 
+**Response (507):** Insufficient storage. Same format as the load endpoint.
+
 #### `GET /api/models/:name/download`
 
-Get download status for a model.
+Get download status for a model. Includes speed and ETA when download is active.
 
 **Response:**
 
 ```json
 {
   "model": "qwen2.5-7b-instruct",
-  "state": "Downloading"
+  "state": "Downloading",
+  "bytes_downloaded": 1250000000,
+  "total_bytes": 4680000000,
+  "percent_complete": 26.7,
+  "speed_mbps": 85.3,
+  "eta_seconds": 38
+}
+```
+
+When not downloading:
+
+```json
+{
+  "model": "qwen2.5-7b-instruct",
+  "state": "Loaded"
 }
 ```
 
@@ -650,7 +678,273 @@ Current hardware information (refreshed on each call).
 
 ---
 
-### 3.5 Health & Version
+### 3.5 Storage Management
+
+Manage downloaded model files on disk — track usage, set protection flags, configure automated cleanup, and monitor active downloads.
+
+#### Concepts
+
+- **Hot Ready** — Per-variant flag. Keeps model weights in system RAM after VRAM eviction for fast reload. Hot ready variants are protected from deletion.
+- **Protected** — Per-variant flag. Prevents deletion by both manual delete requests and automated cleanup. Must be cleared before the file can be removed.
+- **Guarded** — A variant is "guarded" if either hot ready or protected is set.
+
+#### `GET /api/storage`
+
+Current storage overview.
+
+**Response:**
+
+```json
+{
+  "models_directory": "/models",
+  "total_disk_bytes": 500107862016,
+  "free_disk_bytes": 350000000000,
+  "used_by_models_bytes": 12500000000,
+  "storage_limit_bytes": 53687091200,
+  "available_for_models_bytes": 41187091200,
+  "model_count": 3,
+  "cleanup_enabled": true
+}
+```
+
+#### `GET /api/storage/models`
+
+List all downloaded model files with usage statistics and flags.
+
+**Query parameters:**
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `sort` | `last_used` | Sort by: `last_used`, `size`, `name`, `downloads` |
+
+**Response:**
+
+```json
+{
+  "models": [
+    {
+      "model": "qwen2.5-7b-instruct",
+      "variant": "Q4_K_M",
+      "filename": "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
+      "file_path": "/models/Qwen2.5-7B-Instruct-Q4_K_M.gguf",
+      "file_size_bytes": 4680000000,
+      "file_size_display": "4.4 GB",
+      "downloaded_at": "2025-01-15T10:30:00Z",
+      "last_used_at": "2025-01-20T14:22:00Z",
+      "usage_count": 47,
+      "hot_ready": true,
+      "protected": false,
+      "runtime_state": "Loaded"
+    }
+  ],
+  "total_count": 1,
+  "total_size_bytes": 4680000000
+}
+```
+
+#### `GET /api/storage/models/:name`
+
+Get storage stats for all variants of a model.
+
+**Response:**
+
+```json
+{
+  "model": "qwen2.5-7b-instruct",
+  "variants": [
+    {
+      "variant": "Q4_K_M",
+      "filename": "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
+      "file_size_bytes": 4680000000,
+      "usage_count": 47,
+      "hot_ready": true,
+      "protected": false
+    }
+  ]
+}
+```
+
+#### `GET /api/storage/models/:name/variants/:variant`
+
+Get storage stats for a specific variant.
+
+**Response (200):** Single variant object (same fields as above).
+
+**Response (404):** Variant not found.
+
+#### `PUT /api/storage/limit`
+
+Set the storage limit.
+
+**Request body:**
+
+```json
+{
+  "limit_bytes": 53687091200
+}
+```
+
+**Response (200):**
+
+```json
+{
+  "storage_limit_bytes": 53687091200,
+  "available_for_models_bytes": 41187091200
+}
+```
+
+#### `DELETE /api/models/:name/files`
+
+Delete downloaded files for a model. Specify `variant` query parameter to delete a single variant, or omit to delete all variants.
+
+**Query parameters:**
+
+| Parameter | Description |
+|-----------|-------------|
+| `variant` | Specific variant to delete. Omit to delete all. |
+
+**Response (200):**
+
+```json
+{
+  "status": "deleted",
+  "model": "qwen2.5-7b-instruct",
+  "freed_bytes": 4680000000
+}
+```
+
+**Response (409):** Variant is guarded (hot ready or protected). Clear the flag first.
+
+```json
+{
+  "error": {
+    "message": "Cannot delete: variant is guarded (hot_ready or protected). Clear flags first.",
+    "type": "invalid_request_error",
+    "param": null,
+    "code": null
+  },
+  "hot_ready": true,
+  "protected": false
+}
+```
+
+**Response (404):** Model or variant not found.
+
+#### `POST /api/models/:name/variants/:variant/hot-ready`
+
+Enable hot ready for a variant.
+
+**Response (200):** `{"status": "hot_ready_set", "model": "...", "variant": "..."}`
+
+**Response (404):** Variant not found.
+
+#### `DELETE /api/models/:name/variants/:variant/hot-ready`
+
+Disable hot ready for a variant.
+
+**Response (200):** `{"status": "hot_ready_cleared", "model": "...", "variant": "..."}`
+
+#### `POST /api/models/:name/variants/:variant/protected`
+
+Enable protection for a variant.
+
+**Response (200):** `{"status": "protected_set", "model": "...", "variant": "..."}`
+
+**Response (404):** Variant not found.
+
+#### `DELETE /api/models/:name/variants/:variant/protected`
+
+Disable protection for a variant.
+
+**Response (200):** `{"status": "protected_cleared", "model": "...", "variant": "..."}`
+
+#### `GET /api/storage/cleanup/preview`
+
+Preview what automated cleanup would delete without actually deleting anything.
+
+**Response:**
+
+```json
+{
+  "candidate_count": 2,
+  "total_reclaimable_bytes": 12500000000,
+  "candidates": [
+    {
+      "model": "old-model",
+      "variant": "Q8_0",
+      "filename": "old-model-q8.gguf",
+      "file_size_bytes": 8100000000,
+      "last_used_at": "2024-12-01T00:00:00Z",
+      "usage_count": 3
+    }
+  ]
+}
+```
+
+#### `POST /api/storage/cleanup/run`
+
+Execute cleanup immediately. Deletes unguarded, unloaded variants that exceed the configured max age.
+
+**Response:**
+
+```json
+{
+  "freed_bytes": 8100000000,
+  "deleted_count": 1
+}
+```
+
+#### `GET /api/storage/cleanup/config`
+
+Get the current cleanup policy.
+
+**Response:**
+
+```json
+{
+  "enabled": true,
+  "max_age_hours": 720,
+  "check_interval_hours": 24,
+  "target_free_percent": 20.0,
+  "respect_hot_ready": true,
+  "respect_protected": true
+}
+```
+
+#### `PUT /api/storage/cleanup/config`
+
+Update the cleanup policy.
+
+**Request body:** Same format as the GET response. All fields are optional — only provided fields are updated.
+
+**Response (200):** Updated policy (same format as GET).
+
+#### `GET /api/downloads`
+
+List all active downloads with progress, speed, and ETA.
+
+**Response:**
+
+```json
+{
+  "downloads": [
+    {
+      "model": "qwen2.5-7b-instruct",
+      "variant": "Q4_K_M",
+      "state": "Downloading",
+      "bytes_downloaded": 1250000000,
+      "total_bytes": 4680000000,
+      "percent_complete": 26.7,
+      "speed_mbps": 85.3,
+      "eta_seconds": 38
+    }
+  ]
+}
+```
+
+---
+
+### 3.6 Health & Version
 
 #### `GET /health` (or `/v1/health`)
 
@@ -675,7 +969,7 @@ Library version.
 
 ---
 
-### 3.6 Dashboard
+### 3.7 Dashboard
 
 #### `GET /dashboard`
 
@@ -685,6 +979,10 @@ Returns an HTML page with a live-updating dashboard showing:
 - Loaded models with state, variant, context size, GPU assignment
 - Performance charts (tokens/sec, memory usage)
 - Model management controls (load/unload/pin)
+- **Downloaded models** — Storage bar (used/limit), table of all downloaded GGUF files with size, download date, last used, usage count, runtime state, and toggle buttons for hot ready / protected flags
+- **Download progress** — Active downloads with progress bar, bytes transferred, speed (MB/s), and ETA
+- **Row age coloring** — Fresh (green, <14 days), stale (yellow, 14–30 days), old (red, >30 days)
+- Model deletion (guarded variants show disabled delete button with tooltip)
 
 Open in a browser: `http://localhost:8080/dashboard`
 
@@ -760,8 +1058,9 @@ HTTP status codes:
 | `202` | Accepted (model downloading) |
 | `400` | Bad request / validation error |
 | `404` | Not found |
-| `409` | Conflict (model already exists on POST) |
+| `409` | Conflict (model already exists on POST, or variant is guarded on DELETE) |
 | `500` | Internal server error |
+| `507` | Insufficient storage (download or load rejected) |
 
 ---
 
diff --git a/src/arbiterAI/arbiterAI.cpp b/src/arbiterAI/arbiterAI.cpp
index 913e49f..53869fc 100644
--- a/src/arbiterAI/arbiterAI.cpp
+++ b/src/arbiterAI/arbiterAI.cpp
@@ -6,6 +6,7 @@
 #include "arbiterAI/modelManager.h"
 #include "arbiterAI/modelRuntime.h"
 #include "arbiterAI/telemetryCollector.h"
+#include "arbiterAI/storageManager.h"
 #include "arbiterAI/providers/baseProvider.h"
 #include "arbiterAI/providers/openai.h"
 #include "arbiterAI/providers/anthropic.h"
@@ -85,6 +86,10 @@ ErrorCode ArbiterAI::initialize(const std::vector<std::filesystem::path> &config
 
     // Mark global singleton initialized so subsequent operations succeed
     ArbiterAI::instance().initialized = true;
+
+    // Initialize StorageManager with default models directory
+    StorageManager::instance().initialize("/models");
+
     return ErrorCode::Success;
 }
 
@@ -570,6 +575,7 @@ std::vector<InferenceStats> ArbiterAI::getInferenceHistory(std::chrono::minutes
 
 ErrorCode ArbiterAI::shutdown()
 {
+    StorageManager::instance().shutdown();
     providers.clear();
     initialized = false;
     return ErrorCode::Success;
diff --git a/src/arbiterAI/arbiterAI.h b/src/arbiterAI/arbiterAI.h
index ceb5e90..2a6e2cb 100644
--- a/src/arbiterAI/arbiterAI.h
+++ b/src/arbiterAI/arbiterAI.h
@@ -73,7 +73,8 @@ enum class ErrorCode
     ModelNotLoaded,
     ModelLoadError,
     ModelDownloading,
-    ModelDownloadFailed
+    ModelDownloadFailed,
+    InsufficientStorage
 };
 
 /**
diff --git a/src/arbiterAI/modelDownloader.cpp b/src/arbiterAI/modelDownloader.cpp
index 0a9cc33..33d5109 100644
--- a/src/arbiterAI/modelDownloader.cpp
+++ b/src/arbiterAI/modelDownloader.cpp
@@ -184,12 +184,15 @@ std::future<bool> ModelDownloader::downloadModelWithProgress(
     const std::string &filePathStr,
     const std::optional<std::string> &fileHash,
     DownloadProgressCallback progressCallback,
-    const std::string &modelName)
+    const std::string &modelName,
+    const std::string &variant)
 {
     // Create tracking state
     auto downloadState = std::make_shared<ActiveDownload>();
     downloadState->modelName = modelName.empty() ? filePathStr : modelName;
+    downloadState->variant = variant;
     downloadState->status = DownloadStatus::Pending;
+    downloadState->startTime = std::chrono::steady_clock::now();
 
     {
         std::lock_guard<std::mutex> lock(m_downloadsMutex);
@@ -258,6 +261,21 @@ std::future<bool> ModelDownloader::downloadModelWithProgress(
                     percent = (static_cast<float>(downloadNow) / downloadTotal) * 100.0f;
                 }
                 downloadState->percentComplete = percent;
+
+                // Record speed sample
+                {
+                    std::lock_guard<std::mutex> lock(downloadState->speedMutex);
+                    auto now = std::chrono::steady_clock::now();
+
+                    downloadState->speedSamples.push_back({now, downloadNow});
+
+                    // Keep only last 10 seconds of samples
+                    auto cutoff = now - std::chrono::seconds(10);
+                    while(!downloadState->speedSamples.empty() && downloadState->speedSamples.front().first < cutoff)
+                    {
+                        downloadState->speedSamples.pop_front();
+                    }
+                }
                 
                 if (progressCallback)
                 {
@@ -341,4 +359,81 @@ int64_t ModelDownloader::getPartialDownloadSize(const std::string &filePath)
     return 0;
 }
 
+DownloadProgressSnapshot ModelDownloader::buildSnapshot(const std::shared_ptr<ActiveDownload> &download)
+{
+    DownloadProgressSnapshot snap;
+
+    snap.bytesDownloaded=download->bytesDownloaded.load();
+    snap.totalBytes=download->totalBytes.load();
+    snap.percentComplete=download->percentComplete.load();
+    snap.modelName=download->modelName;
+    snap.variant=download->variant;
+
+    // Calculate speed from rolling window
+    {
+        std::lock_guard<std::mutex> lock(download->speedMutex);
+
+        if(download->speedSamples.size()>=2)
+        {
+            const std::pair<std::chrono::steady_clock::time_point, int64_t> &oldest=download->speedSamples.front();
+            const std::pair<std::chrono::steady_clock::time_point, int64_t> &newest=download->speedSamples.back();
+
+            double elapsedSec=std::chrono::duration<double>(newest.first-oldest.first).count();
+            int64_t byteDelta=newest.second-oldest.second;
+
+            if(elapsedSec>0.0 && byteDelta>0)
+            {
+                double bytesPerSec=static_cast<double>(byteDelta)/elapsedSec;
+                snap.speedMbps=bytesPerSec/(1024.0*1024.0);
+
+                // ETA from remaining bytes and current speed
+                int64_t remaining=snap.totalBytes-snap.bytesDownloaded;
+                if(remaining>0 && bytesPerSec>0.0)
+                {
+                    snap.etaSeconds=static_cast<int>(static_cast<double>(remaining)/bytesPerSec);
+                }
+            }
+        }
+    }
+
+    return snap;
+}
+
+std::optional<DownloadProgressSnapshot> ModelDownloader::getProgressSnapshot(const std::string &modelName)
+{
+    std::lock_guard<std::mutex> lock(m_downloadsMutex);
+
+    auto it=m_activeDownloads.find(modelName);
+    if(it==m_activeDownloads.end())
+    {
+        return std::nullopt;
+    }
+
+    DownloadStatus status=it->second->status.load();
+    if(status!=DownloadStatus::InProgress && status!=DownloadStatus::Pending)
+    {
+        return std::nullopt;
+    }
+
+    return buildSnapshot(it->second);
+}
+
+std::vector<DownloadProgressSnapshot> ModelDownloader::getActiveSnapshots()
+{
+    std::lock_guard<std::mutex> lock(m_downloadsMutex);
+
+    std::vector<DownloadProgressSnapshot> result;
+
+    for(const std::pair<const std::string, std::shared_ptr<ActiveDownload>> &entry:m_activeDownloads)
+    {
+        DownloadStatus status=entry.second->status.load();
+        if(status==DownloadStatus::InProgress || status==DownloadStatus::Pending)
+        {
+            result.push_back(buildSnapshot(entry.second));
+        }
+    }
+
+    return result;
+}
+
 } // namespace arbiterAI
\ No newline at end of file
diff --git a/src/arbiterAI/modelDownloader.h b/src/arbiterAI/modelDownloader.h
index c692011..9afaf34 100644
--- a/src/arbiterAI/modelDownloader.h
+++ b/src/arbiterAI/modelDownloader.h
@@ -12,6 +12,9 @@
 #include <memory>
 #include <functional>
 #include <atomic>
+#include <chrono>
+#include <deque>
+#include <mutex>
 
 namespace arbiterAI
 {
@@ -27,17 +30,36 @@ using DownloadProgressCallback = std::function<void(int64_t bytesDownloaded,
                                                       float percentComplete)>;
 
 /**
- * @struct DownloadState
+ * @struct ActiveDownload
  * @brief Tracks the state of an active download
  */
-struct ActiveDownload
-{
+struct ActiveDownload {
     std::atomic<int64_t> bytesDownloaded{0};
     std::atomic<int64_t> totalBytes{0};
     std::atomic<float> percentComplete{0.0f};
     std::atomic<DownloadStatus> status{DownloadStatus::NotStarted};
     std::string error;
     std::string modelName;
+    std::string variant;
+
+    // Speed tracking (guarded by speedMutex)
+    mutable std::mutex speedMutex;
+    std::deque<std::pair<std::chrono::steady_clock::time_point, int64_t>> speedSamples;
+    std::chrono::steady_clock::time_point startTime;
+};
+
+/**
+ * @struct DownloadProgressSnapshot
+ * @brief Point-in-time snapshot of a download's progress including speed and ETA
+ */
+struct DownloadProgressSnapshot {
+    int64_t bytesDownloaded=0;
+    int64_t totalBytes=0;
+    float percentComplete=0.0f;
+    double speedMbps=0.0;       // rolling average MB/s
+    int etaSeconds=0;           // estimated time remaining
+    std::string modelName;
+    std::string variant;
 };
 
 /**
@@ -78,13 +100,15 @@ class ModelDownloader
      * @param fileHash Expected SHA256 hash (optional)
      * @param progressCallback Callback for progress updates
      * @param modelName Name for tracking in active downloads
+     * @param variant Quantization variant name for tracking
      * @return Future that resolves to true on success
      */
     std::future<bool> downloadModelWithProgress(const std::string &downloadUrl,
                                                   const std::string &filePath,
                                                   const std::optional<std::string> &fileHash,
                                                   DownloadProgressCallback progressCallback,
-                                                  const std::string &modelName = "");
+                                                  const std::string &modelName = "",
+                                                  const std::string &variant = "");
 
     /**
      * @brief Get the current download state for a model
@@ -93,6 +117,19 @@ class ModelDownloader
      */
     std::shared_ptr<ActiveDownload> getDownloadState(const std::string &modelName);
 
+    /**
+     * @brief Get a progress snapshot with speed and ETA for a model
+     * @param modelName Name of the model
+     * @return Snapshot with speed/ETA, or nullopt if not downloading
+     */
+    std::optional<DownloadProgressSnapshot> getProgressSnapshot(const std::string &modelName);
+
+    /**
+     * @brief Get progress snapshots for all active downloads
+     * @return Vector of snapshots for all in-progress or pending downloads
+     */
+    std::vector<DownloadProgressSnapshot> getActiveSnapshots();
+
     /**
      * @brief Check if a download can be resumed
      * @param filePath Path to the partial file
@@ -111,6 +148,7 @@ class ModelDownloader
     std::string getCachePath(const std::string &key);
     std::optional<nlohmann::json> loadFromCache(const std::string &key);
     void saveToCache(const std::string &key, const nlohmann::json &config);
+    DownloadProgressSnapshot buildSnapshot(const std::shared_ptr<ActiveDownload> &download);
 
     std::filesystem::path m_cacheDir;
     std::shared_ptr<IFileVerifier> m_fileVerifier;
diff --git a/src/arbiterAI/modelRuntime.cpp b/src/arbiterAI/modelRuntime.cpp
index 826e4e1..e915a9f 100644
--- a/src/arbiterAI/modelRuntime.cpp
+++ b/src/arbiterAI/modelRuntime.cpp
@@ -1,6 +1,7 @@
 #include "arbiterAI/modelRuntime.h"
 #include "arbiterAI/hardwareDetector.h"
 #include "arbiterAI/telemetryCollector.h"
+#include "arbiterAI/storageManager.h"
 
 #include <llama.h>
 #include <spdlog/spdlog.h>
@@ -149,6 +150,20 @@ ErrorCode ModelRuntime::loadModel(
                 std::string filePath="/models/"+selectedVar->download.filename;
                 if(!std::filesystem::exists(filePath)&&!selectedVar->download.url.empty())
                 {
+                    // Check storage quota before downloading
+                    int64_t fileSizeBytes=static_cast<int64_t>(selectedVar->fileSizeMb)*1024*1024;
+                    if(!StorageManager::instance().canDownload(fileSizeBytes))
+                    {
+                        // Try cleanup first
+                        StorageManager::instance().runCleanup();
+                        if(!StorageManager::instance().canDownload(fileSizeBytes))
+                        {
+                            spdlog::error("Insufficient storage to download '{}' variant '{}' ({} MB)",
+                                model, selectedVariant, selectedVar->fileSizeMb);
+                            return ErrorCode::InsufficientStorage;
+                        }
+                    }
+
                     // Mark as downloading
                     LoadedModel &dlEntry=m_models[model];
                     dlEntry.modelName=model;
@@ -163,7 +178,8 @@ ErrorCode ModelRuntime::loadModel(
                         selectedVar->download.url,
                         filePath,
                         selectedVar->download.sha256,
-                        model);
+                        model,
+                        selectedVariant);
                     m_mutex.lock();
 
                     if(!downloadOk)
@@ -171,6 +187,16 @@ ErrorCode ModelRuntime::loadModel(
                         m_models.erase(model);
                         return ErrorCode::ModelDownloadFailed;
                     }
+
+                    // Register the download with StorageManager
+                    int64_t actualSize=0;
+                    std::error_code ec;
+                    if(std::filesystem::exists(filePath, ec))
+                    {
+                        actualSize=static_cast<int64_t>(std::filesystem::file_size(filePath, ec));
+                    }
+                    StorageManager::instance().registerDownload(
+                        model, selectedVariant, selectedVar->download.filename, actualSize);
                 }
             }
 
@@ -395,6 +421,16 @@ std::vector<ModelFit> ModelRuntime::getLocalModelCapabilities() const
     return ModelFitCalculator::calculateFittableModels(allModels, hw);
 }
 
+std::vector<DownloadProgressSnapshot> ModelRuntime::getActiveDownloadSnapshots()
+{
+    return m_downloader.getActiveSnapshots();
+}
+
+std::optional<DownloadProgressSnapshot> ModelRuntime::getDownloadSnapshot(const std::string &modelName)
+{
+    return m_downloader.getProgressSnapshot(modelName);
+}
+
 void ModelRuntime::setReadyRamBudget(int mb)
 {
     std::lock_guard<std::mutex> lock(m_mutex);
@@ -490,6 +526,17 @@ void ModelRuntime::beginInference(const std::string &model)
 
 void ModelRuntime::endInference()
 {
+    // Record usage for storage tracking
+    if(!m_inferenceModel.empty())
+    {
+        std::lock_guard<std::mutex> lock(m_mutex);
+        auto it=m_models.find(m_inferenceModel);
+        if(it!=m_models.end())
+        {
+            StorageManager::instance().recordUsage(m_inferenceModel, it->second.variant);
+        }
+    }
+
     m_inferenceActive=false;
     m_inferenceModel.clear();
     drainPendingSwaps();
@@ -735,7 +782,8 @@ bool ModelRuntime::downloadModelFile(
     const std::string &url,
     const std::string &filePath,
     const std::string &sha256,
-    const std::string &modelName)
+    const std::string &modelName,
+    const std::string &variant)
 {
     std::optional<std::string> hash=std::nullopt;
     if(!sha256.empty())
@@ -765,7 +813,8 @@ bool ModelRuntime::downloadModelFile(
                 }
             }
         },
-        modelName);
+        modelName,
+        variant);
 
     bool success=result.get();
 
diff --git a/src/arbiterAI/modelRuntime.h b/src/arbiterAI/modelRuntime.h
index 3e01b82..288ceac 100644
--- a/src/arbiterAI/modelRuntime.h
+++ b/src/arbiterAI/modelRuntime.h
@@ -87,6 +87,12 @@ class ModelRuntime {
     /// Get model fit capabilities for all local models given current hardware.
     std::vector<ModelFit> getLocalModelCapabilities() const;
 
+    /// Get progress snapshots for all active downloads (with speed and ETA).
+    std::vector<DownloadProgressSnapshot> getActiveDownloadSnapshots();
+
+    /// Get a download progress snapshot for a specific model.
+    std::optional<DownloadProgressSnapshot> getDownloadSnapshot(const std::string &modelName);
+
     /// Set the RAM budget for "Ready" tier models (MB).
     void setReadyRamBudget(int mb);
 
@@ -153,7 +159,8 @@ class ModelRuntime {
         const std::string &url,
         const std::string &filePath,
         const std::string &sha256,
-        const std::string &modelName);
+        const std::string &modelName,
+        const std::string &variant);
 
     std::map<std::string, LoadedModel> m_models;
     mutable std::mutex m_mutex;
diff --git a/src/arbiterAI/storageManager.cpp b/src/arbiterAI/storageManager.cpp
new file mode 100644
index 0000000..5d64b8a
--- /dev/null
+++ b/src/arbiterAI/storageManager.cpp
@@ -0,0 +1,862 @@
+#include "arbiterAI/storageManager.h"
+#include "arbiterAI/modelRuntime.h"
+
+#include <spdlog/spdlog.h>
+#include <nlohmann/json.hpp>
+#include <fstream>
+#include <algorithm>
+
+namespace arbiterAI
+{
+
+namespace
+{
+
+std::string timePointToIso(const std::chrono::system_clock::time_point &tp)
+{
+    std::time_t t=std::chrono::system_clock::to_time_t(tp);
+    std::tm tm{};
+    gmtime_r(&t, &tm);
+
+    char buf[32];
+    std::strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &tm);
+    return std::string(buf);
+}
+
+std::chrono::system_clock::time_point isoToTimePoint(const std::string &iso)
+{
+    if(iso.empty())
+    {
+        return std::chrono::system_clock::now();
+    }
+
+    std::tm tm{};
+    strptime(iso.c_str(), "%Y-%m-%dT%H:%M:%SZ", &tm);
+    std::time_t t=timegm(&tm);
+    return std::chrono::system_clock::from_time_t(t);
+}
+
+std::string formatBytes(int64_t bytes)
+{
+    if(bytes>=1073741824)
+        return std::to_string(bytes/1073741824)+"."+std::to_string((bytes%1073741824)*10/1073741824)+" GB";
+    if(bytes>=1048576)
+        return std::to_string(bytes/1048576)+"."+std::to_string((bytes%1048576)*10/1048576)+" MB";
+    return std::to_string(bytes)+" B";
+}
+
+} // anonymous namespace
+
+StorageManager &StorageManager::instance()
+{
+    static StorageManager mgr;
+    return mgr;
+}
+
+void StorageManager::reset()
+{
+    StorageManager &mgr=instance();
+
+    mgr.shutdown();
+
+    std::lock_guard<std::mutex> lock(mgr.m_mutex);
+    mgr.m_entries.clear();
+    mgr.m_modelsDir.clear();
+    mgr.m_storageLimitBytes=0;
+    mgr.m_initialized=false;
+    mgr.m_dirty=false;
+    mgr.m_cleanupPolicy=CleanupPolicy{};
+}
+
+StorageManager::~StorageManager()
+{
+    shutdown();
+}
+
+void StorageManager::initialize(const std::filesystem::path &modelsDir)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    m_modelsDir=modelsDir;
+
+    if(!std::filesystem::exists(m_modelsDir))
+    {
+        std::filesystem::create_directories(m_modelsDir);
+    }
+
+    loadUsageData();
+    scanModelsDirectory();
+    m_initialized=true;
+
+    spdlog::info("StorageManager initialized: modelsDir={}", m_modelsDir.string());
+
+    startBackgroundTimer();
+}
+
+void StorageManager::shutdown()
+{
+    m_timerRunning=false;
+    if(m_timerThread.joinable())
+    {
+        m_timerThread.join();
+    }
+
+    flush();
+}
+
+void StorageManager::setStorageLimit(int64_t limitBytes)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    m_storageLimitBytes=limitBytes;
+    m_dirty=true;
+}
+
+int64_t StorageManager::getStorageLimit() const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    return m_storageLimitBytes;
+}
+
+StorageInfo StorageManager::getStorageInfo() const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    StorageInfo info;
+    info.modelsDirectory=m_modelsDir;
+    info.storageLimitBytes=m_storageLimitBytes;
+    info.cleanupEnabled=m_cleanupPolicy.enabled;
+
+    // Query disk space
+    if(!m_modelsDir.empty()&&std::filesystem::exists(m_modelsDir))
+    {
+        std::error_code ec;
+        std::filesystem::space_info si=std::filesystem::space(m_modelsDir, ec);
+
+        if(!ec)
+        {
+            info.totalDiskBytes=static_cast<int64_t>(si.capacity);
+            info.freeDiskBytes=static_cast<int64_t>(si.available);
+        }
+    }
+
+    // Sum model file sizes
+    int64_t usedBytes=0;
+    for(const ModelFileEntry &entry:m_entries)
+    {
+        usedBytes+=entry.fileSizeBytes;
+    }
+    info.usedByModelsBytes=usedBytes;
+    info.modelCount=static_cast<int>(m_entries.size());
+
+    // Calculate available space
+    if(m_storageLimitBytes>0)
+    {
+        int64_t limitRemaining=m_storageLimitBytes-usedBytes;
+        if(limitRemaining<0) limitRemaining=0;
+        info.availableForModelsBytes=std::min(info.freeDiskBytes, limitRemaining);
+    }
+    else
+    {
+        info.availableForModelsBytes=info.freeDiskBytes;
+    }
+
+    return info;
+}
+
+bool StorageManager::canDownload(int64_t fileSizeBytes) const
+{
+    StorageInfo info=getStorageInfo();
+    return info.availableForModelsBytes>=fileSizeBytes;
+}
+
+std::vector<DownloadedModelFile> StorageManager::getDownloadedModels() const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    std::vector<DownloadedModelFile> result;
+    result.reserve(m_entries.size());
+
+    for(const ModelFileEntry &entry:m_entries)
+    {
+        result.push_back(entryToPublic(entry));
+    }
+
+    return result;
+}
+
+void StorageManager::registerDownload(const std::string &modelName,
+    const std::string &variant,
+    const std::string &filename,
+    int64_t fileSizeBytes)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    // Check if entry already exists
+    ModelFileEntry *existing=findEntry(modelName, variant);
+    if(existing)
+    {
+        existing->filename=filename;
+        existing->fileSizeBytes=fileSizeBytes;
+        existing->downloadedAt=std::chrono::system_clock::now();
+        m_dirty=true;
+        return;
+    }
+
+    ModelFileEntry entry;
+    entry.modelName=modelName;
+    entry.variant=variant;
+    entry.filename=filename;
+    entry.fileSizeBytes=fileSizeBytes;
+    entry.downloadedAt=std::chrono::system_clock::now();
+    entry.lastUsedAt=std::chrono::system_clock::now();
+    entry.usageCount=0;
+    entry.hotReady=false;
+    entry.isProtected=false;
+
+    m_entries.push_back(entry);
+    m_dirty=true;
+
+    spdlog::info("StorageManager: registered download {} variant {} ({})",
+        modelName, variant, formatBytes(fileSizeBytes));
+}
+
+void StorageManager::recordUsage(const std::string &modelName, const std::string &variant)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    ModelFileEntry *entry=findEntry(modelName, variant);
+    if(entry)
+    {
+        entry->lastUsedAt=std::chrono::system_clock::now();
+        entry->usageCount++;
+        m_dirty=true;
+    }
+}
+
+bool StorageManager::deleteModelFile(const std::string &modelName, const std::string &variant,
+    int64_t &freedBytes)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    freedBytes=0;
+
+    if(variant.empty())
+    {
+        // Delete all variants of this model
+        std::vector<size_t> toRemove;
+
+        for(size_t i=0; i<m_entries.size(); ++i)
+        {
+            if(m_entries[i].modelName==modelName)
+            {
+                if(m_entries[i].hotReady||m_entries[i].isProtected)
+                {
+                    spdlog::warn("StorageManager: cannot delete {} variant {} — guarded (hotReady={}, protected={})",
+                        modelName, m_entries[i].variant, m_entries[i].hotReady, m_entries[i].isProtected);
+                    return false;
+                }
+                toRemove.push_back(i);
+            }
+        }
+
+        if(toRemove.empty())
+        {
+            return false;
+        }
+
+        // Delete files and remove entries (in reverse order to preserve indices)
+        for(auto it=toRemove.rbegin(); it!=toRemove.rend(); ++it)
+        {
+            std::filesystem::path filePath=m_modelsDir/m_entries[*it].filename;
+
+            std::error_code ec;
+            if(std::filesystem::exists(filePath, ec))
+            {
+                std::filesystem::remove(filePath, ec);
+                if(ec)
+                {
+                    spdlog::error("StorageManager: failed to delete file {}: {}", filePath.string(), ec.message());
+                    return false;
+                }
+            }
+
+            freedBytes+=m_entries[*it].fileSizeBytes;
+            spdlog::info("StorageManager: deleted {} variant {} ({})",
+                m_entries[*it].modelName, m_entries[*it].variant, formatBytes(m_entries[*it].fileSizeBytes));
+            m_entries.erase(m_entries.begin()+static_cast<ptrdiff_t>(*it));
+        }
+    }
+    else
+    {
+        // Delete a specific variant
+        for(auto it=m_entries.begin(); it!=m_entries.end(); ++it)
+        {
+            if(it->modelName==modelName&&it->variant==variant)
+            {
+                if(it->hotReady||it->isProtected)
+                {
+                    spdlog::warn("StorageManager: cannot delete {} variant {} — guarded", modelName, variant);
+                    return false;
+                }
+
+                std::filesystem::path filePath=m_modelsDir/it->filename;
+
+                std::error_code ec;
+                if(std::filesystem::exists(filePath, ec))
+                {
+                    std::filesystem::remove(filePath, ec);
+                    if(ec)
+                    {
+                        spdlog::error("StorageManager: failed to delete file {}: {}", filePath.string(), ec.message());
+                        return false;
+                    }
+                }
+
+                freedBytes=it->fileSizeBytes;
+                spdlog::info("StorageManager: deleted {} variant {} ({})",
+                    modelName, variant, formatBytes(it->fileSizeBytes));
+                m_entries.erase(it);
+                break;
+            }
+        }
+    }
+
+    if(freedBytes>0)
+    {
+        m_dirty=true;
+        saveUsageData();
+    }
+
+    return freedBytes>0;
+}
+
+bool StorageManager::setHotReady(const std::string &modelName, const std::string &variant, bool enabled)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    ModelFileEntry *entry=findEntry(modelName, variant);
+    if(!entry)
+    {
+        return false;
+    }
+
+    entry->hotReady=enabled;
+    m_dirty=true;
+    return true;
+}
+
+bool StorageManager::setProtected(const std::string &modelName, const std::string &variant, bool enabled)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    ModelFileEntry *entry=findEntry(modelName, variant);
+    if(!entry)
+    {
+        return false;
+    }
+
+    entry->isProtected=enabled;
+    m_dirty=true;
+    return true;
+}
+
+std::vector<DownloadedModelFile> StorageManager::getModelStats(const std::string &modelName) const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    std::vector<DownloadedModelFile> result;
+    for(const ModelFileEntry &entry:m_entries)
+    {
+        if(entry.modelName==modelName)
+        {
+            result.push_back(entryToPublic(entry));
+        }
+    }
+    return result;
+}
+
+std::optional<DownloadedModelFile> StorageManager::getVariantStats(
+    const std::string &modelName, const std::string &variant) const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    const ModelFileEntry *entry=findEntry(modelName, variant);
+    if(!entry)
+    {
+        return std::nullopt;
+    }
+    return entryToPublic(*entry);
+}
+
+bool StorageManager::isGuarded(const std::string &modelName, const std::string &variant) const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    const ModelFileEntry *entry=findEntry(modelName, variant);
+    if(!entry)
+    {
+        return false;
+    }
+    return entry->hotReady||entry->isProtected;
+}
+
+void StorageManager::flush()
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    if(!m_initialized||!m_dirty)
+    {
+        return;
+    }
+
+    saveUsageData();
+    m_dirty=false;
+}
+
+void StorageManager::scanModelsDirectory()
+{
+    // NOTE: caller must hold m_mutex
+
+    if(m_modelsDir.empty()||!std::filesystem::exists(m_modelsDir))
+    {
+        return;
+    }
+
+    std::error_code ec;
+
+    for(const std::filesystem::directory_entry &dirEntry:std::filesystem::directory_iterator(m_modelsDir, ec))
+    {
+        if(!dirEntry.is_regular_file())
+        {
+            continue;
+        }
+
+        std::string filename=dirEntry.path().filename().string();
+
+        // Only track GGUF files
+        if(filename.size()<5||filename.substr(filename.size()-5)!=".gguf")
+        {
+            continue;
+        }
+
+        // Check if already tracked
+        bool found=false;
+        for(const ModelFileEntry &entry:m_entries)
+        {
+            if(entry.filename==filename)
+            {
+                found=true;
+                break;
+            }
+        }
+
+        if(!found)
+        {
+            // New file discovered on disk — add with unknown model/variant
+            ModelFileEntry entry;
+            entry.filename=filename;
+            entry.fileSizeBytes=static_cast<int64_t>(dirEntry.file_size(ec));
+            entry.downloadedAt=std::chrono::system_clock::now();
+            entry.lastUsedAt=std::chrono::system_clock::now();
+
+            // Try to infer model name and variant from filename
+            // Typical pattern: ModelName-VariantName.gguf
+            std::string stem=filename.substr(0, filename.size()-5);
+            size_t lastDash=stem.rfind('-');
+            if(lastDash!=std::string::npos&&lastDash>0)
+            {
+                entry.modelName=stem.substr(0, lastDash);
+                entry.variant=stem.substr(lastDash+1);
+            }
+            else
+            {
+                entry.modelName=stem;
+                entry.variant="default";
+            }
+
+            m_entries.push_back(entry);
+            m_dirty=true;
+
+            spdlog::info("StorageManager: discovered untracked GGUF file: {} ({})",
+                filename, formatBytes(entry.fileSizeBytes));
+        }
+    }
+
+    // Remove entries for files that no longer exist on disk
+    auto removeIt=std::remove_if(m_entries.begin(), m_entries.end(),
+        [this](const ModelFileEntry &entry)
+        {
+            std::filesystem::path filePath=m_modelsDir/entry.filename;
+            std::error_code ec;
+            bool exists=std::filesystem::exists(filePath, ec);
+            if(!exists)
+            {
+                spdlog::info("StorageManager: removing entry for missing file: {}", entry.filename);
+            }
+            return !exists;
+        });
+
+    if(removeIt!=m_entries.end())
+    {
+        m_entries.erase(removeIt, m_entries.end());
+        m_dirty=true;
+    }
+}
+
+// ========== Cleanup ==========
+
+void StorageManager::setCleanupPolicy(const CleanupPolicy &policy)
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    m_cleanupPolicy=policy;
+}
+
+CleanupPolicy StorageManager::getCleanupPolicy() const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    return m_cleanupPolicy;
+}
+
+std::vector<CleanupCandidate> StorageManager::previewCleanup() const
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    return collectCleanupCandidates();
+}
+
+int64_t StorageManager::runCleanup()
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+
+    if(!m_cleanupPolicy.enabled)
+    {
+        return 0;
+    }
+
+    std::vector<CleanupCandidate> candidates=collectCleanupCandidates();
+
+    if(candidates.empty())
+    {
+        return 0;
+    }
+
+    int64_t totalFreed=0;
+
+    for(const CleanupCandidate &candidate:candidates)
+    {
+        // Delete the file
+        std::filesystem::path filePath=m_modelsDir/candidate.filename;
+
+        std::error_code ec;
+        if(std::filesystem::exists(filePath, ec))
+        {
+            std::filesystem::remove(filePath, ec);
+            if(ec)
+            {
+                spdlog::error("StorageManager cleanup: failed to delete {}: {}", filePath.string(), ec.message());
+                continue;
+            }
+        }
+
+        // Remove from entries
+        for(auto it=m_entries.begin(); it!=m_entries.end(); ++it)
+        {
+            if(it->modelName==candidate.modelName&&it->variant==candidate.variant)
+            {
+                totalFreed+=it->fileSizeBytes;
+                spdlog::info("StorageManager cleanup: deleted {} variant {} ({})",
+                    it->modelName, it->variant, formatBytes(it->fileSizeBytes));
+                m_entries.erase(it);
+                break;
+            }
+        }
+    }
+
+    if(totalFreed>0)
+    {
+        m_dirty=true;
+        saveUsageData();
+    }
+
+    spdlog::info("StorageManager cleanup: freed {}", formatBytes(totalFreed));
+    return totalFreed;
+}
+
+// ========== Private Methods ==========
+
+StorageManager::ModelFileEntry *StorageManager::findEntry(
+    const std::string &modelName, const std::string &variant)
+{
+    for(ModelFileEntry &entry:m_entries)
+    {
+        if(entry.modelName==modelName&&entry.variant==variant)
+        {
+            return &entry;
+        }
+    }
+    return nullptr;
+}
+
+const StorageManager::ModelFileEntry *StorageManager::findEntry(
+    const std::string &modelName, const std::string &variant) const
+{
+    for(const ModelFileEntry &entry:m_entries)
+    {
+        if(entry.modelName==modelName&&entry.variant==variant)
+        {
+            return &entry;
+        }
+    }
+    return nullptr;
+}
+
+void StorageManager::loadUsageData()
+{
+    // NOTE: caller must hold m_mutex
+
+    std::filesystem::path usagePath=m_modelsDir/"model_usage.json";
+
+    if(!std::filesystem::exists(usagePath))
+    {
+        return;
+    }
+
+    try
+    {
+        std::ifstream file(usagePath);
+        if(!file.is_open())
+        {
+            return;
+        }
+
+        nlohmann::json data;
+        file>>data;
+
+        if(data.contains("storage_limit_bytes"))
+        {
+            m_storageLimitBytes=data["storage_limit_bytes"].get<int64_t>();
+        }
+
+        if(data.contains("cleanup_policy"))
+        {
+            const nlohmann::json &cp=data["cleanup_policy"];
+            if(cp.contains("enabled")) m_cleanupPolicy.enabled=cp["enabled"].get<bool>();
+            if(cp.contains("max_age_hours")) m_cleanupPolicy.maxAge=std::chrono::hours(cp["max_age_hours"].get<int>());
+            if(cp.contains("check_interval_hours")) m_cleanupPolicy.checkInterval=std::chrono::hours(cp["check_interval_hours"].get<int>());
+            if(cp.contains("target_free_percent")) m_cleanupPolicy.targetFreePercent=cp["target_free_percent"].get<double>();
+        }
+
+        if(data.contains("models")&&data["models"].is_array())
+        {
+            for(const nlohmann::json &m:data["models"])
+            {
+                ModelFileEntry entry;
+                entry.modelName=m.value("model", "");
+                entry.variant=m.value("variant", "");
+                entry.filename=m.value("filename", "");
+                entry.fileSizeBytes=m.value("file_size_bytes", int64_t(0));
+                entry.downloadedAt=isoToTimePoint(m.value("downloaded_at", ""));
+                entry.lastUsedAt=isoToTimePoint(m.value("last_used_at", ""));
+                entry.usageCount=m.value("usage_count", 0);
+                entry.hotReady=m.value("hot_ready", false);
+                entry.isProtected=m.value("protected", false);
+
+                if(!entry.filename.empty())
+                {
+                    m_entries.push_back(entry);
+                }
+            }
+        }
+
+        spdlog::info("StorageManager: loaded {} entries from {}", m_entries.size(), usagePath.string());
+    }
+    catch(const std::exception &e)
+    {
+        spdlog::warn("StorageManager: failed to load usage data: {}", e.what());
+    }
+}
+
+void StorageManager::saveUsageData() const
+{
+    // NOTE: caller must hold m_mutex
+
+    if(m_modelsDir.empty())
+    {
+        return;
+    }
+
+    std::filesystem::path usagePath=m_modelsDir/"model_usage.json";
+
+    nlohmann::json data;
+    data["version"]=1;
+    data["storage_limit_bytes"]=m_storageLimitBytes;
+
+    nlohmann::json cleanupJson;
+    cleanupJson["enabled"]=m_cleanupPolicy.enabled;
+    cleanupJson["max_age_hours"]=m_cleanupPolicy.maxAge.count();
+    cleanupJson["check_interval_hours"]=m_cleanupPolicy.checkInterval.count();
+    cleanupJson["target_free_percent"]=m_cleanupPolicy.targetFreePercent;
+    data["cleanup_policy"]=cleanupJson;
+
+    nlohmann::json models=nlohmann::json::array();
+    for(const ModelFileEntry &entry:m_entries)
+    {
+        nlohmann::json m;
+        m["model"]=entry.modelName;
+        m["variant"]=entry.variant;
+        m["filename"]=entry.filename;
+        m["file_size_bytes"]=entry.fileSizeBytes;
+        m["downloaded_at"]=timePointToIso(entry.downloadedAt);
+        m["last_used_at"]=timePointToIso(entry.lastUsedAt);
+        m["usage_count"]=entry.usageCount;
+        m["hot_ready"]=entry.hotReady;
+        m["protected"]=entry.isProtected;
+        models.push_back(m);
+    }
+    data["models"]=models;
+
+    try
+    {
+        std::ofstream file(usagePath);
+        file<<data.dump(4);
+        file.close();
+    }
+    catch(const std::exception &e)
+    {
+        spdlog::error("StorageManager: failed to save usage data: {}", e.what());
+    }
+}
+
+std::vector<CleanupCandidate> StorageManager::collectCleanupCandidates() const
+{
+    // NOTE: caller must hold m_mutex
+
+    std::vector<CleanupCandidate> candidates;
+
+    auto now=std::chrono::system_clock::now();
+
+    for(const ModelFileEntry &entry:m_entries)
+    {
+        // Skip guarded entries
+        if(m_cleanupPolicy.respectHotReady&&entry.hotReady) continue;
+        if(m_cleanupPolicy.respectProtected&&entry.isProtected) continue;
+
+        // Skip entries that are currently Loaded or Ready in ModelRuntime
+        // Note: we don't hold ModelRuntime's lock here, so this is a best-effort check
+        std::optional<LoadedModel> runtimeState=ModelRuntime::instance().getModelState(entry.modelName);
+        if(runtimeState.has_value())
+        {
+            ModelState state=runtimeState->state;
+            if(state==ModelState::Loaded||state==ModelState::Ready)
+            {
+                continue;
+            }
+        }
+
+        // Check staleness
+        auto age=std::chrono::duration_cast<std::chrono::hours>(now-entry.lastUsedAt);
+        if(age<m_cleanupPolicy.maxAge)
+        {
+            continue;
+        }
+
+        CleanupCandidate candidate;
+        candidate.modelName=entry.modelName;
+        candidate.variant=entry.variant;
+        candidate.filename=entry.filename;
+        candidate.fileSizeBytes=entry.fileSizeBytes;
+        candidate.lastUsedAt=entry.lastUsedAt;
+        candidate.usageCount=entry.usageCount;
+        candidates.push_back(candidate);
+    }
+
+    // Sort by lastUsedAt ascending (oldest first)
+    std::sort(candidates.begin(), candidates.end(),
+        [](const CleanupCandidate &a, const CleanupCandidate &b)
+        {
+            return a.lastUsedAt<b.lastUsedAt;
+        });
+
+    return candidates;
+}
+
+DownloadedModelFile StorageManager::entryToPublic(const ModelFileEntry &entry) const
+{
+    DownloadedModelFile f;
+    f.modelName=entry.modelName;
+    f.variant=entry.variant;
+    f.filename=entry.filename;
+    f.filePath=m_modelsDir/entry.filename;
+    f.fileSizeBytes=entry.fileSizeBytes;
+    f.downloadedAt=entry.downloadedAt;
+    f.lastUsedAt=entry.lastUsedAt;
+    f.usageCount=entry.usageCount;
+    f.hotReady=entry.hotReady;
+    f.isProtected=entry.isProtected;
+
+    // Cross-reference runtime state
+    std::optional<LoadedModel> runtimeState=ModelRuntime::instance().getModelState(entry.modelName);
+    if(runtimeState.has_value()&&runtimeState->variant==entry.variant)
+    {
+        switch(runtimeState->state)
+        {
+            case ModelState::Loaded:      f.runtimeState="Loaded"; break;
+            case ModelState::Ready:       f.runtimeState="Ready"; break;
+            case ModelState::Downloading: f.runtimeState="Downloading"; break;
+            case ModelState::Unloading:   f.runtimeState="Unloading"; break;
+            default:                      f.runtimeState="Unloaded"; break;
+        }
+    }
+    else
+    {
+        f.runtimeState="Unloaded";
+    }
+
+    return f;
+}
+
+void StorageManager::startBackgroundTimer()
+{
+    if(m_timerRunning)
+    {
+        return;
+    }
+
+    m_timerRunning=true;
+    m_timerThread=std::thread([this]()
+    {
+        // Flush every 5 minutes, cleanup on the cleanup interval
+        constexpr int flushIntervalSeconds=300; // 5 minutes
+        int elapsedSeconds=0;
+
+        while(m_timerRunning)
+        {
+            std::this_thread::sleep_for(std::chrono::seconds(1));
+            elapsedSeconds++;
+
+            if(!m_timerRunning)
+            {
+                break;
+            }
+
+            // Periodic flush
+            if(elapsedSeconds%flushIntervalSeconds==0)
+            {
+                flush();
+            }
+
+            // Periodic cleanup
+            int cleanupIntervalSeconds=0;
+            {
+                std::lock_guard<std::mutex> lock(m_mutex);
+                cleanupIntervalSeconds=static_cast<int>(m_cleanupPolicy.checkInterval.count()*3600);
+            }
+
+            if(cleanupIntervalSeconds>0&&elapsedSeconds%cleanupIntervalSeconds==0)
+            {
+                runCleanup();
+            }
+        }
+    });
+}
+
+} // namespace arbiterAI
diff --git a/src/arbiterAI/storageManager.h b/src/arbiterAI/storageManager.h
new file mode 100644
index 0000000..dcb9fea
--- /dev/null
+++ b/src/arbiterAI/storageManager.h
@@ -0,0 +1,196 @@
+#ifndef _ARBITERAI_STORAGEMANAGER_H_
+#define _ARBITERAI_STORAGEMANAGER_H_
+
+#include <string>
+#include <vector>
+#include <optional>
+#include <filesystem>
+#include <mutex>
+#include <chrono>
+#include <thread>
+#include <atomic>
+
+namespace arbiterAI
+{
+
+struct StorageInfo {
+    std::filesystem::path modelsDirectory;
+    int64_t totalDiskBytes=0;           // partition total
+    int64_t freeDiskBytes=0;            // partition free
+    int64_t usedByModelsBytes=0;        // sum of all tracked model files
+    int64_t storageLimitBytes=0;        // configured limit (0 = use all free)
+    int64_t availableForModelsBytes=0;  // min(freeDisk, limit - usedByModels)
+    int modelCount=0;
+    bool cleanupEnabled=true;
+};
+
+struct DownloadedModelFile {
+    std::string modelName;
+    std::string variant;             // quantization (e.g., "Q4_K_M")
+    std::string filename;
+    std::filesystem::path filePath;
+    int64_t fileSizeBytes=0;
+    std::chrono::system_clock::time_point downloadedAt;
+    std::chrono::system_clock::time_point lastUsedAt;
+    int usageCount=0;                // number of inference requests served
+    bool hotReady=false;             // keep weights in RAM for quick VRAM reload
+    bool isProtected=false;          // protected from deletion (manual and automated)
+    std::string runtimeState;        // cross-referenced from ModelRuntime
+};
+
+struct CleanupPolicy {
+    bool enabled=true;
+    std::chrono::hours maxAge{30*24};              // 30 days
+    std::chrono::hours checkInterval{24};          // run every 24 hours
+    double targetFreePercent=20.0;                 // try to keep 20% free
+    bool respectHotReady=true;                     // never delete hot_ready variants
+    bool respectProtected=true;                    // never delete protected variants
+};
+
+struct CleanupCandidate {
+    std::string modelName;
+    std::string variant;
+    std::string filename;
+    int64_t fileSizeBytes=0;
+    std::chrono::system_clock::time_point lastUsedAt;
+    int usageCount=0;
+};
+
+class StorageManager {
+public:
+    static StorageManager &instance();
+    static void reset(); // For testing
+
+    /// Initialize with the models directory path.
+    void initialize(const std::filesystem::path &modelsDir);
+
+    /// Shut down the background flush/cleanup timers.
+    void shutdown();
+
+    /// Set the storage limit in bytes. 0 = use all free disk space.
+    void setStorageLimit(int64_t limitBytes);
+
+    /// Get the current storage limit.
+    int64_t getStorageLimit() const;
+
+    /// Get a snapshot of current storage usage.
+    StorageInfo getStorageInfo() const;
+
+    /// Check if a download of the given size can proceed.
+    /// @return true if enough space, false otherwise.
+    bool canDownload(int64_t fileSizeBytes) const;
+
+    /// Get the list of all downloaded model files with usage stats.
+    std::vector<DownloadedModelFile> getDownloadedModels() const;
+
+    /// Register a completed download (updates inventory).
+    void registerDownload(const std::string &modelName,
+        const std::string &variant,
+        const std::string &filename,
+        int64_t fileSizeBytes);
+
+    /// Record a model usage event (inference served).
+    void recordUsage(const std::string &modelName, const std::string &variant);
+
+    /// Delete a downloaded model file from disk.
+    /// Fails if the variant is hot ready or protected (must clear flags first).
+    /// @param modelName Model name.
+    /// @param variant Specific variant to delete. If empty, deletes all variants of the model.
+    /// @param freedBytes [out] Total bytes freed.
+    /// @return true if deleted, false if file not found, deletion failed, or guarded.
+    bool deleteModelFile(const std::string &modelName, const std::string &variant,
+        int64_t &freedBytes);
+
+    /// Set/clear hot ready on a variant (keep weights in RAM for quick VRAM reload).
+    /// @return true if the variant was found, false otherwise.
+    bool setHotReady(const std::string &modelName, const std::string &variant, bool enabled);
+
+    /// Set/clear protected on a variant (prevent deletion, manual or automated).
+    /// @return true if the variant was found, false otherwise.
+    bool setProtected(const std::string &modelName, const std::string &variant, bool enabled);
+
+    /// Get the storage stats for a specific model (all variants).
+    std::vector<DownloadedModelFile> getModelStats(const std::string &modelName) const;
+
+    /// Get the storage stats for a specific model variant.
+    std::optional<DownloadedModelFile> getVariantStats(
+        const std::string &modelName, const std::string &variant) const;
+
+    /// Check if a variant is guarded (hot ready or protected).
+    /// @return true if either flag is set, false otherwise.
+    bool isGuarded(const std::string &modelName, const std::string &variant) const;
+
+    /// Flush usage stats to disk (called periodically and on shutdown).
+    void flush();
+
+    /// Scan the models directory for GGUF files not yet in the inventory.
+    void scanModelsDirectory();
+
+    // ========== Cleanup ==========
+
+    /// Set the cleanup policy.
+    void setCleanupPolicy(const CleanupPolicy &policy);
+
+    /// Get the current cleanup policy.
+    CleanupPolicy getCleanupPolicy() const;
+
+    /// Preview what automated cleanup would delete (without deleting anything).
+    std::vector<CleanupCandidate> previewCleanup() const;
+
+    /// Run cleanup: remove stale, unguarded, unloaded variants.
+    /// @return Total bytes freed.
+    int64_t runCleanup();
+
+private:
+    StorageManager()=default;
+    ~StorageManager();
+
+    StorageManager(const StorageManager &)=delete;
+    StorageManager &operator=(const StorageManager &)=delete;
+
+    struct ModelFileEntry {
+        std::string modelName;
+        std::string variant;
+        std::string filename;
+        int64_t fileSizeBytes=0;
+        std::chrono::system_clock::time_point downloadedAt;
+        std::chrono::system_clock::time_point lastUsedAt;
+        int usageCount=0;
+        bool hotReady=false;
+        bool isProtected=false;
+    };
+
+    /// Find an entry by model name and variant.
+    ModelFileEntry *findEntry(const std::string &modelName, const std::string &variant);
+    const ModelFileEntry *findEntry(const std::string &modelName, const std::string &variant) const;
+
+    void loadUsageData();
+    void saveUsageData() const;
+
+    /// Collect cleanup candidates (caller holds m_mutex).
+    std::vector<CleanupCandidate> collectCleanupCandidates() const;
+
+    /// Convert internal entry to public DownloadedModelFile.
+    DownloadedModelFile entryToPublic(const ModelFileEntry &entry) const;
+
+    /// Start the background flush/cleanup timer.
+    void startBackgroundTimer();
+
+    std::filesystem::path m_modelsDir;
+    int64_t m_storageLimitBytes=0;
+
+    std::vector<ModelFileEntry> m_entries;
+    mutable std::mutex m_mutex;
+    bool m_initialized=false;
+    bool m_dirty=false; // has unsaved changes
+
+    CleanupPolicy m_cleanupPolicy;
+
+    // Background timer
+    std::thread m_timerThread;
+    std::atomic<bool> m_timerRunning{false};
+};
+
+} // namespace arbiterAI
+
+#endif//_ARBITERAI_STORAGEMANAGER_H_
diff --git a/src/server/dashboard.h b/src/server/dashboard.h
index 296dd22..fc9b3e6 100644
--- a/src/server/dashboard.h
+++ b/src/server/dashboard.h
@@ -198,6 +198,89 @@ td
     background: #3a1515;
     color: #ff6060;
 }
+.btn-disabled
+{
+    opacity: 0.4;
+    cursor: not-allowed;
+}
+.btn-toggle
+{
+    padding: 2px 8px;
+    font-size: 11px;
+}
+.btn-toggle.active
+{
+    background: #1b3a2a;
+    border-color: #4caf50;
+    color: #4caf50;
+}
+.storage-bar-outer
+{
+    background: #1f2230;
+    border-radius: 4px;
+    height: 24px;
+    margin: 8px 0;
+    overflow: hidden;
+    position: relative;
+}
+.storage-bar-fill
+{
+    height: 100%;
+    border-radius: 4px;
+    background: linear-gradient(90deg, #4a6cf7, #7c8aff);
+    transition: width 0.5s ease;
+}
+.storage-bar-text
+{
+    position: absolute;
+    top: 0;
+    left: 0;
+    right: 0;
+    height: 100%;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    font-size: 12px;
+    color: #e0e0e0;
+    font-weight: 500;
+}
+.storage-info
+{
+    display: flex;
+    justify-content: space-between;
+    font-size: 12px;
+    color: #888;
+    margin-bottom: 4px;
+}
+.row-fresh
+{
+    border-left: 3px solid #4caf50;
+}
+.row-stale
+{
+    border-left: 3px solid #f0c040;
+}
+.row-old
+{
+    border-left: 3px solid #ff4444;
+}
+.progress-inline
+{
+    display: inline-block;
+    width: 120px;
+    height: 12px;
+    background: #1f2230;
+    border-radius: 3px;
+    overflow: hidden;
+    vertical-align: middle;
+    margin-right: 6px;
+}
+.progress-inline-fill
+{
+    height: 100%;
+    background: linear-gradient(90deg, #4a6cf7, #7c8aff);
+    transition: width 0.3s ease;
+}
 .chart-container
 {
     height: 120px;
@@ -319,6 +402,43 @@ td
             </tbody>
         </table>
     </div>
+    <div class="card" style="margin-bottom:20px;">
+        <h2>Downloaded Models</h2>
+        <div id="storageBarSection">
+            <div class="storage-info">
+                <span id="storageUsedLabel">Used: -</span>
+                <span id="storageLimitLabel">Limit: -</span>
+            </div>
+            <div class="storage-bar-outer">
+                <div class="storage-bar-fill" id="storageBarFill" style="width:0%"></div>
+                <div class="storage-bar-text" id="storageBarText">-</div>
+            </div>
+            <div class="storage-info">
+                <span id="storageCleanupLabel">Auto-cleanup: -</span>
+                <span id="storageCandidatesLabel"></span>
+            </div>
+        </div>
+        <div id="downloadProgressSection" style="margin:8px 0;"></div>
+        <table>
+            <thead>
+                <tr>
+                    <th>Model</th>
+                    <th>Variant</th>
+                    <th>Size</th>
+                    <th>Downloaded</th>
+                    <th>Last Used</th>
+                    <th>Uses</th>
+                    <th>State</th>
+                    <th>Hot Ready</th>
+                    <th>Protected</th>
+                    <th>Actions</th>
+                </tr>
+            </thead>
+            <tbody id="downloadedModelTable">
+                <tr><td colspan="10" style="color:#666;text-align:center;">No downloaded models</td></tr>
+            </tbody>
+        </table>
+    </div>
     <div class="grid">
         <div class="card">
             <h2>Recent Inferences</h2>
@@ -342,8 +462,10 @@ td
 </div>
 <script>
 const POLL_INTERVAL=2000;
+const FAST_POLL_INTERVAL=1000;
 let tpsHistory=[];
 const MAX_TPS_POINTS=60;
+let hasActiveDownloads=false;
 
 function formatMb(mb)
 {
@@ -540,6 +662,178 @@ function renderSwaps(swaps)
     el.innerHTML=html;
 }
 
+function formatBytesJs(bytes)
+{
+    if(bytes>=1073741824) return (bytes/1073741824).toFixed(1)+" GB";
+    if(bytes>=1048576) return (bytes/1048576).toFixed(1)+" MB";
+    return bytes+" B";
+}
+
+function formatDate(isoStr)
+{
+    if(!isoStr) return "-";
+    const d=new Date(isoStr);
+    return d.toLocaleDateString();
+}
+
+function daysSince(isoStr)
+{
+    if(!isoStr) return 999;
+    const d=new Date(isoStr);
+    const now=new Date();
+    return Math.floor((now-d)/(1000*60*60*24));
+}
+
+function rowAgeClass(lastUsed)
+{
+    const days=daysSince(lastUsed);
+    if(days>30) return "row-old";
+    if(days>14) return "row-stale";
+    return "row-fresh";
+}
+
+async function toggleHotReady(name, variant, currentlyHotReady)
+{
+    const method=currentlyHotReady?"DELETE":"POST";
+    const url="/api/models/"+encodeURIComponent(name)+"/variants/"+encodeURIComponent(variant)+"/hot-ready";
+    await fetch(url, {method});
+    await refreshStorage();
+}
+
+async function toggleProtected(name, variant, currentlyProtected)
+{
+    const method=currentlyProtected?"DELETE":"POST";
+    const url="/api/models/"+encodeURIComponent(name)+"/variants/"+encodeURIComponent(variant)+"/protected";
+    await fetch(url, {method});
+    await refreshStorage();
+}
+
+async function deleteModelFile(name, variant)
+{
+    if(!confirm("Delete "+name+" "+variant+"? This cannot be undone.")) return;
+    const url="/api/models/"+encodeURIComponent(name)+"/files"+(variant?"?variant="+encodeURIComponent(variant):"");
+    const resp=await fetch(url, {method:"DELETE"});
+    if(resp.status===409)
+    {
+        const data=await resp.json();
+        alert(data.error?.message||"Cannot delete: variant is guarded");
+    }
+    await refreshStorage();
+}
+
+function renderStorageBar(storage)
+{
+    if(!storage) return;
+
+    const used=storage.used_by_models_bytes||0;
+    const limit=storage.storage_limit_bytes;
+    const free=storage.free_disk_bytes||0;
+    const total=limit>0?limit:(used+free);
+    const pct=total>0?(used/total*100):0;
+
+    document.getElementById("storageUsedLabel").textContent="Used: "+formatBytesJs(used);
+    document.getElementById("storageLimitLabel").textContent=limit>0?"Limit: "+formatBytesJs(limit):"Limit: All free space";
+    document.getElementById("storageBarFill").style.width=pct.toFixed(1)+"%";
+    document.getElementById("storageBarText").textContent=formatBytesJs(used)+" / "+formatBytesJs(total)+" ("+pct.toFixed(1)+"%)";
+    document.getElementById("storageCleanupLabel").textContent="Auto-cleanup: "+(storage.cleanup_enabled?"ON":"OFF");
+}
+
+function renderDownloadProgress(downloads)
+{
+    const el=document.getElementById("downloadProgressSection");
+    if(!downloads||downloads.length===0)
+    {
+        el.innerHTML="";
+        hasActiveDownloads=false;
+        return;
+    }
+
+    hasActiveDownloads=true;
+    let html="";
+    for(const dl of downloads)
+    {
+        const pct=dl.percent_complete||0;
+        const downloaded=dl.bytes_downloaded||0;
+        const total=dl.total_bytes||0;
+        const speed=dl.speed_mbps||0;
+        const eta=dl.eta_seconds||0;
+
+        html+=`<div style="padding:6px 0;border-bottom:1px solid #1f2230;">
+            <span style="font-weight:500;">${dl.model}</span>
+            <span style="color:#888;margin-left:4px;">${dl.variant||""}</span>
+            <span class="badge badge-downloading" style="margin-left:8px;">Downloading</span>
+            <div style="margin-top:4px;">
+                <div class="progress-inline"><div class="progress-inline-fill" style="width:${pct.toFixed(1)}%"></div></div>
+                <span style="font-size:12px;color:#ccc;">${pct.toFixed(1)}%</span>
+                ${total>0?`<span style="font-size:12px;color:#888;margin-left:8px;">${formatBytesJs(downloaded)} / ${formatBytesJs(total)}</span>`:""}
+                ${speed>0?`<span style="font-size:12px;color:#888;margin-left:8px;">${speed.toFixed(1)} MB/s</span>`:""}
+                ${eta>0?`<span style="font-size:12px;color:#888;margin-left:8px;">~${eta}s left</span>`:""}
+            </div>
+        </div>`;
+    }
+    el.innerHTML=html;
+}
+
+function renderDownloadedModels(models)
+{
+    const el=document.getElementById("downloadedModelTable");
+
+    if(!models||models.length===0)
+    {
+        el.innerHTML='<tr><td colspan="10" style="color:#666;text-align:center;">No downloaded models</td></tr>';
+        return;
+    }
+
+    let html="";
+    for(const m of models)
+    {
+        const ageClass=rowAgeClass(m.last_used_at);
+        const guarded=m.hot_ready||m.protected;
+        const hrClass=m.hot_ready?"btn-toggle active":"btn-toggle";
+        const prClass=m.protected?"btn-toggle active":"btn-toggle";
+        const deleteDisabled=guarded?"btn-disabled":"";
+        const deleteTitle=guarded?"Clear hot_ready and protected first":"Delete model file";
+
+        html+=`<tr class="${ageClass}">
+            <td>${m.model}</td>
+            <td>${m.variant||"-"}</td>
+            <td>${m.file_size_display||formatBytesJs(m.file_size_bytes)}</td>
+            <td>${formatDate(m.downloaded_at)}</td>
+            <td>${formatDate(m.last_used_at)}</td>
+            <td>${m.usage_count||0}</td>
+            <td><span class="badge ${stateClass(m.runtime_state)}">${m.runtime_state||"Unloaded"}</span></td>
+            <td><button class="btn ${hrClass}" onclick="toggleHotReady('${m.model}','${m.variant}',${m.hot_ready})">${m.hot_ready?"ON":"OFF"}</button></td>
+            <td><button class="btn ${prClass}" onclick="toggleProtected('${m.model}','${m.variant}',${m.protected})">${m.protected?"ON":"OFF"}</button></td>
+            <td><button class="btn btn-danger ${deleteDisabled}" title="${deleteTitle}" onclick="${guarded?"":`deleteModelFile('${m.model}','${m.variant}')`}" ${guarded?"disabled":""}>Delete</button></td>
+        </tr>`;
+    }
+    el.innerHTML=html;
+}
+
+async function refreshStorage()
+{
+    const [storage, storageModels, downloads, cleanupPreview]=await Promise.all([
+        fetchJson("/api/storage"),
+        fetchJson("/api/storage/models"),
+        fetchJson("/api/downloads"),
+        fetchJson("/api/storage/cleanup/preview")
+    ]);
+
+    renderStorageBar(storage);
+
+    if(downloads&&downloads.downloads) renderDownloadProgress(downloads.downloads);
+    else renderDownloadProgress([]);
+
+    if(storageModels&&storageModels.models) renderDownloadedModels(storageModels.models);
+    else renderDownloadedModels([]);
+
+    if(cleanupPreview)
+    {
+        const count=cleanupPreview.candidate_count||0;
+        document.getElementById("storageCandidatesLabel").textContent=count>0?count+" cleanup candidate"+(count>1?"s":""):"";
+    }
+}
+
 async function refresh()
 {
     const [stats, history, swaps, hw]=await Promise.all([
@@ -597,6 +891,9 @@ async function refresh()
 
     // Swaps
     if(swaps) renderSwaps(swaps);
+
+    // Storage (runs in parallel)
+    refreshStorage();
 }
 
 async function loadVersion()
diff --git a/src/server/main.cpp b/src/server/main.cpp
index 96131d7..1f59cb8 100644
--- a/src/server/main.cpp
+++ b/src/server/main.cpp
@@ -2,6 +2,7 @@
 
 #include "arbiterAI/arbiterAI.h"
 #include "arbiterAI/modelRuntime.h"
+#include "arbiterAI/storageManager.h"
 
 #include <httplib.h>
 #include <cxxopts.hpp>
@@ -24,6 +25,11 @@ int main(int argc, char *argv[])
         ("v,variant", "Default variant (e.g., Q4_K_M)", cxxopts::value<std::string>()->default_value(""))
         ("override-path", "Path to write runtime model config overrides (enables persistence)", cxxopts::value<std::string>()->default_value(""))
         ("ram-budget", "Ready model RAM budget in MB (0 = auto 50%)", cxxopts::value<int>()->default_value("0"))
+        ("models-dir", "Path to directory for downloaded model files", cxxopts::value<std::string>()->default_value("/models"))
+        ("storage-limit", "Maximum bytes for model storage (0 = all free, supports suffixes: 10G, 500M)", cxxopts::value<std::string>()->default_value("0"))
+        ("cleanup-enabled", "Enable automated storage cleanup", cxxopts::value<bool>()->default_value("true"))
+        ("cleanup-max-age", "Maximum days since last use before cleanup candidacy", cxxopts::value<int>()->default_value("30"))
+        ("cleanup-interval", "Hours between automated cleanup runs", cxxopts::value<int>()->default_value("24"))
         ("log-level", "Log level (trace, debug, info, warn, error)", cxxopts::value<std::string>()->default_value("info"))
         ("h,help", "Print usage");
 
@@ -62,6 +68,39 @@ int main(int argc, char *argv[])
     std::string defaultVariant=result["variant"].as<std::string>();
     std::string overridePath=result["override-path"].as<std::string>();
     int ramBudget=result["ram-budget"].as<int>();
+    std::string modelsDir=result["models-dir"].as<std::string>();
+    std::string storageLimitStr=result["storage-limit"].as<std::string>();
+    bool cleanupEnabled=result["cleanup-enabled"].as<bool>();
+    int cleanupMaxAgeDays=result["cleanup-max-age"].as<int>();
+    int cleanupIntervalHours=result["cleanup-interval"].as<int>();
+
+    // Parse storage limit (supports suffixes: G, M, K)
+    int64_t storageLimitBytes=0;
+    if(!storageLimitStr.empty()&&storageLimitStr!="0")
+    {
+        char suffix=storageLimitStr.back();
+        std::string numStr=storageLimitStr;
+
+        if(suffix=='G'||suffix=='g')
+        {
+            numStr.pop_back();
+            storageLimitBytes=static_cast<int64_t>(std::stod(numStr)*1073741824);
+        }
+        else if(suffix=='M'||suffix=='m')
+        {
+            numStr.pop_back();
+            storageLimitBytes=static_cast<int64_t>(std::stod(numStr)*1048576);
+        }
+        else if(suffix=='K'||suffix=='k')
+        {
+            numStr.pop_back();
+            storageLimitBytes=static_cast<int64_t>(std::stod(numStr)*1024);
+        }
+        else
+        {
+            storageLimitBytes=std::stoll(storageLimitStr);
+        }
+    }
 
     // Convert config paths
     std::vector<std::filesystem::path> configPaths;
@@ -84,6 +123,24 @@ int main(int argc, char *argv[])
 
     spdlog::info("ArbiterAI initialized successfully");
 
+    // Configure StorageManager
+    arbiterAI::StorageManager &storage=arbiterAI::StorageManager::instance();
+    storage.initialize(modelsDir);
+
+    if(storageLimitBytes>0)
+    {
+        storage.setStorageLimit(storageLimitBytes);
+        spdlog::info("Storage limit set to {} bytes", storageLimitBytes);
+    }
+
+    arbiterAI::CleanupPolicy cleanupPolicy;
+    cleanupPolicy.enabled=cleanupEnabled;
+    cleanupPolicy.maxAge=std::chrono::hours(cleanupMaxAgeDays*24);
+    cleanupPolicy.checkInterval=std::chrono::hours(cleanupIntervalHours);
+    storage.setCleanupPolicy(cleanupPolicy);
+    spdlog::info("Cleanup policy: enabled={}, maxAge={}d, interval={}h",
+        cleanupEnabled, cleanupMaxAgeDays, cleanupIntervalHours);
+
     // Set RAM budget if specified
     if(ramBudget>0)
     {
@@ -145,6 +202,18 @@ int main(int argc, char *argv[])
     spdlog::info("  GET  /api/stats/history      - Inference history");
     spdlog::info("  GET  /api/stats/swaps        - Swap history");
     spdlog::info("  GET  /api/hardware           - Hardware info");
+    spdlog::info("  GET  /api/storage            - Storage overview");
+    spdlog::info("  GET  /api/storage/models     - Downloaded models");
+    spdlog::info("  GET  /api/storage/models/:n  - Model storage stats");
+    spdlog::info("  POST /api/storage/limit       - Set storage limit");
+    spdlog::info("  DEL  /api/models/:n/files     - Delete model files");
+    spdlog::info("  POST /api/models/:n/variants/:v/hot-ready    - Set hot ready");
+    spdlog::info("  DEL  /api/models/:n/variants/:v/hot-ready    - Clear hot ready");
+    spdlog::info("  POST /api/models/:n/variants/:v/protected    - Set protected");
+    spdlog::info("  DEL  /api/models/:n/variants/:v/protected    - Clear protected");
+    spdlog::info("  GET  /api/storage/cleanup/preview - Preview cleanup");
+    spdlog::info("  POST /api/storage/cleanup/run     - Run cleanup");
+    spdlog::info("  GET  /api/downloads          - Active downloads");
     spdlog::info("  GET  /dashboard              - Live dashboard");
 
     spdlog::info("Starting server on {}:{}", host, port);
diff --git a/src/server/routes.cpp b/src/server/routes.cpp
index a3994d6..527a819 100644
--- a/src/server/routes.cpp
+++ b/src/server/routes.cpp
@@ -7,6 +7,7 @@
 #include "arbiterAI/modelFitCalculator.h"
 #include "arbiterAI/hardwareDetector.h"
 #include "arbiterAI/telemetryCollector.h"
+#include "arbiterAI/storageManager.h"
 
 #include <nlohmann/json.hpp>
 #include <spdlog/spdlog.h>
@@ -184,6 +185,7 @@ std::string errorCodeToString(ErrorCode code)
         case ErrorCode::ModelLoadError:      return "model_load_error";
         case ErrorCode::ModelDownloading:    return "model_downloading";
         case ErrorCode::ModelDownloadFailed: return "model_download_failed";
+        case ErrorCode::InsufficientStorage: return "insufficient_storage";
         case ErrorCode::UnknownModel:        return "unknown_model";
         case ErrorCode::UnsupportedProvider: return "unsupported_provider";
         case ErrorCode::InvalidRequest:      return "invalid_request";
@@ -209,6 +211,19 @@ void setOverridePath(const std::string &path)
 
 void registerRoutes(httplib::Server &server)
 {
+    // Ensure POST/PUT/DELETE requests without a body include Content-Length: 0.
+    // httplib's read_content() rejects requests that lack Content-Length,
+    // but many REST clients omit it for bodyless POST (e.g. /api/models/:name/load).
+    server.set_pre_routing_handler([](const httplib::Request &req, httplib::Response &) -> httplib::Server::HandlerResponse {
+        if((req.method=="POST"||req.method=="PUT"||req.method=="DELETE")
+            &&!req.has_header("Content-Length")
+            &&!req.has_header("Transfer-Encoding"))
+        {
+            const_cast<httplib::Request &>(req).set_header("Content-Length", "0");
+        }
+        return httplib::Server::HandlerResponse::Unhandled;
+    });
+
     // CORS preflight handler for all routes
     server.Options(R"(.*)", [](const httplib::Request &, httplib::Response &res)
     {
@@ -264,6 +279,23 @@ void registerRoutes(httplib::Server &server)
     server.Get("/api/stats/swaps", handleGetStatsSwaps);
     server.Get("/api/hardware", handleGetHardware);
 
+    // Storage management
+    server.Get("/api/storage", handleGetStorage);
+    server.Get("/api/storage/models", handleGetStorageModels);
+    server.Get(R"(/api/storage/models/([^/]+)/([^/]+))", handleGetStorageModelVariant);
+    server.Get(R"(/api/storage/models/([^/]+))", handleGetStorageModel);
+    server.Post("/api/storage/limit", handleSetStorageLimit);
+    server.Delete(R"(/api/models/([^/]+)/files)", handleDeleteModelFiles);
+    server.Post(R"(/api/models/([^/]+)/variants/([^/]+)/hot-ready)", handleSetHotReady);
+    server.Delete(R"(/api/models/([^/]+)/variants/([^/]+)/hot-ready)", handleClearHotReady);
+    server.Post(R"(/api/models/([^/]+)/variants/([^/]+)/protected)", handleSetProtected);
+    server.Delete(R"(/api/models/([^/]+)/variants/([^/]+)/protected)", handleClearProtected);
+    server.Get("/api/storage/cleanup/preview", handleGetCleanupPreview);
+    server.Post("/api/storage/cleanup/run", handleRunCleanup);
+    server.Get("/api/storage/cleanup/config", handleGetCleanupConfig);
+    server.Put("/api/storage/cleanup/config", handleSetCleanupConfig);
+    server.Get("/api/downloads", handleGetActiveDownloads);
+
     // Dashboard
     server.Get("/dashboard", handleDashboard);
 
@@ -830,6 +862,24 @@ void handleLoadModel(const httplib::Request &req, httplib::Response &res)
             res.status=202;
             res.set_content(nlohmann::json{{"status", "downloading"}, {"model", modelName}}.dump(), "application/json");
         }
+        else if(err==ErrorCode::InsufficientStorage)
+        {
+            StorageInfo storageInfo=StorageManager::instance().getStorageInfo();
+            nlohmann::json details={
+                {"available_bytes", storageInfo.availableForModelsBytes},
+                {"storage_limit_bytes", storageInfo.storageLimitBytes},
+                {"used_by_models_bytes", storageInfo.usedByModelsBytes},
+                {"suggestion", "Delete unused models or increase the storage limit"}
+            };
+            res.status=507;
+            res.set_content(nlohmann::json{
+                {"error", {
+                    {"message", "Insufficient storage to load model"},
+                    {"type", "insufficient_storage"},
+                    {"details", details}
+                }}
+            }.dump(), "application/json");
+        }
         else
         {
             res.status=400;
@@ -917,8 +967,43 @@ void handleDownloadModel(const httplib::Request &req, httplib::Response &res)
     }
     else
     {
-        res.status=400;
-        res.set_content(errorJson("Download failed: "+errorCodeToString(err), "invalid_request_error", "model", errorCodeToString(err)).dump(), "application/json");
+        if(err==ErrorCode::InsufficientStorage)
+        {
+            StorageInfo storageInfo=StorageManager::instance().getStorageInfo();
+            std::vector<CleanupCandidate> cleanupCandidates=StorageManager::instance().previewCleanup();
+
+            nlohmann::json candidatesJson=nlohmann::json::array();
+            for(const CleanupCandidate &c:cleanupCandidates)
+            {
+                candidatesJson.push_back({
+                    {"model", c.modelName},
+                    {"variant", c.variant},
+                    {"file_size_bytes", c.fileSizeBytes},
+                    {"usage_count", c.usageCount}
+                });
+            }
+
+            nlohmann::json details={
+                {"available_bytes", storageInfo.availableForModelsBytes},
+                {"storage_limit_bytes", storageInfo.storageLimitBytes},
+                {"used_by_models_bytes", storageInfo.usedByModelsBytes},
+                {"cleanup_candidates", candidatesJson},
+                {"suggestion", "Delete unused models or increase the storage limit"}
+            };
+            res.status=507;
+            res.set_content(nlohmann::json{
+                {"error", {
+                    {"message", "Insufficient storage to download model"},
+                    {"type", "insufficient_storage"},
+                    {"details", details}
+                }}
+            }.dump(), "application/json");
+        }
+        else
+        {
+            res.status=400;
+            res.set_content(errorJson("Download failed: "+errorCodeToString(err), "invalid_request_error", "model", errorCodeToString(err)).dump(), "application/json");
+        }
     }
 }
 
@@ -941,6 +1026,17 @@ void handleGetDownloadStatus(const httplib::Request &req, httplib::Response &res
         response["error"]=error;
     }
 
+    // Include speed/ETA snapshot if download is active
+    std::optional<DownloadProgressSnapshot> snap=ModelRuntime::instance().getDownloadSnapshot(modelName);
+    if(snap.has_value())
+    {
+        response["bytes_downloaded"]=snap->bytesDownloaded;
+        response["total_bytes"]=snap->totalBytes;
+        response["percent_complete"]=snap->percentComplete;
+        response["speed_mbps"]=snap->speedMbps;
+        response["eta_seconds"]=snap->etaSeconds;
+    }
+
     res.set_content(response.dump(), "application/json");
 }
 
@@ -1190,6 +1286,477 @@ void handleGetHardware(const httplib::Request &, httplib::Response &res)
     res.set_content(systemInfoToJson(hw).dump(), "application/json");
 }
 
+// ========== Storage Management ==========
+
+namespace
+{
+
+std::string formatBytesDisplay(int64_t bytes)
+{
+    if(bytes>=1073741824)
+    {
+        double gb=static_cast<double>(bytes)/1073741824.0;
+        std::ostringstream ss;
+        ss<<std::fixed<<std::setprecision(1)<<gb<<" GB";
+        return ss.str();
+    }
+    if(bytes>=1048576)
+    {
+        double mb=static_cast<double>(bytes)/1048576.0;
+        std::ostringstream ss;
+        ss<<std::fixed<<std::setprecision(1)<<mb<<" MB";
+        return ss.str();
+    }
+    return std::to_string(bytes)+" B";
+}
+
+std::string timePointToIsoStr(const std::chrono::system_clock::time_point &tp)
+{
+    std::time_t t=std::chrono::system_clock::to_time_t(tp);
+    std::tm tm{};
+    gmtime_r(&t, &tm);
+
+    char buf[32];
+    std::strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &tm);
+    return std::string(buf);
+}
+
+nlohmann::json downloadedModelToJson(const DownloadedModelFile &f)
+{
+    return {
+        {"model", f.modelName},
+        {"variant", f.variant},
+        {"filename", f.filename},
+        {"file_size_bytes", f.fileSizeBytes},
+        {"file_size_display", formatBytesDisplay(f.fileSizeBytes)},
+        {"downloaded_at", timePointToIsoStr(f.downloadedAt)},
+        {"last_used_at", timePointToIsoStr(f.lastUsedAt)},
+        {"usage_count", f.usageCount},
+        {"hot_ready", f.hotReady},
+        {"protected", f.isProtected},
+        {"runtime_state", f.runtimeState}
+    };
+}
+
+} // anonymous namespace
+
+void handleGetStorage(const httplib::Request &, httplib::Response &res)
+{
+    StorageInfo info=StorageManager::instance().getStorageInfo();
+
+    nlohmann::json response={
+        {"models_directory", info.modelsDirectory.string()},
+        {"total_disk_bytes", info.totalDiskBytes},
+        {"free_disk_bytes", info.freeDiskBytes},
+        {"used_by_models_bytes", info.usedByModelsBytes},
+        {"storage_limit_bytes", info.storageLimitBytes},
+        {"available_for_models_bytes", info.availableForModelsBytes},
+        {"model_count", info.modelCount},
+        {"cleanup_enabled", info.cleanupEnabled}
+    };
+
+    res.set_content(response.dump(), "application/json");
+}
+
+void handleGetStorageModels(const httplib::Request &req, httplib::Response &res)
+{
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+
+    // Sorting
+    std::string sortField=req.has_param("sort")?req.get_param_value("sort"):"last_used";
+    std::string sortOrder=req.has_param("order")?req.get_param_value("order"):"desc";
+
+    auto compare=[&](const DownloadedModelFile &a, const DownloadedModelFile &b) -> bool
+    {
+        bool result=false;
+        if(sortField=="name") result=a.modelName<b.modelName;
+        else if(sortField=="size") result=a.fileSizeBytes<b.fileSizeBytes;
+        else if(sortField=="usage_count") result=a.usageCount<b.usageCount;
+        else result=a.lastUsedAt<b.lastUsedAt; // default: last_used
+
+        return sortOrder=="asc"?result:!result;
+    };
+
+    std::sort(models.begin(), models.end(), compare);
+
+    int64_t totalSize=0;
+    nlohmann::json modelsJson=nlohmann::json::array();
+    for(const DownloadedModelFile &f:models)
+    {
+        modelsJson.push_back(downloadedModelToJson(f));
+        totalSize+=f.fileSizeBytes;
+    }
+
+    nlohmann::json response={
+        {"models", modelsJson},
+        {"total_size_bytes", totalSize},
+        {"total_size_display", formatBytesDisplay(totalSize)}
+    };
+
+    res.set_content(response.dump(), "application/json");
+}
+
+void handleGetStorageModel(const httplib::Request &req, httplib::Response &res)
+{
+    std::string modelName=req.matches[1];
+
+    std::vector<DownloadedModelFile> variants=StorageManager::instance().getModelStats(modelName);
+
+    if(variants.empty())
+    {
+        res.status=404;
+        res.set_content(errorJson("No downloaded files found for model: "+modelName, "not_found_error").dump(), "application/json");
+        return;
+    }
+
+    nlohmann::json variantsJson=nlohmann::json::array();
+    int64_t totalSize=0;
+    for(const DownloadedModelFile &f:variants)
+    {
+        variantsJson.push_back(downloadedModelToJson(f));
+        totalSize+=f.fileSizeBytes;
+    }
+
+    nlohmann::json response={
+        {"model", modelName},
+        {"variants", variantsJson},
+        {"total_size_bytes", totalSize},
+        {"total_size_display", formatBytesDisplay(totalSize)}
+    };
+
+    res.set_content(response.dump(), "application/json");
+}
+
+void handleGetStorageModelVariant(const httplib::Request &req, httplib::Response &res)
+{
+    std::string modelName=req.matches[1];
+    std::string variant=req.matches[2];
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats(modelName, variant);
+
+    if(!stats.has_value())
+    {
+        res.status=404;
+        res.set_content(errorJson("Variant not found: "+modelName+" "+variant, "not_found_error").dump(), "application/json");
+        return;
+    }
+
+    res.set_content(downloadedModelToJson(stats.value()).dump(), "application/json");
+}
+
+void handleSetStorageLimit(const httplib::Request &req, httplib::Response &res)
+{
+    try
+    {
+        nlohmann::json body=nlohmann::json::parse(req.body);
+        int64_t limitBytes=body.value("limit_bytes", int64_t(0));
+
+        StorageManager::instance().setStorageLimit(limitBytes);
+
+        res.set_content(nlohmann::json{
+            {"status", "updated"},
+            {"storage_limit_bytes", limitBytes}
+        }.dump(), "application/json");
+    }
+    catch(const std::exception &e)
+    {
+        res.status=400;
+        res.set_content(errorJson("Invalid request: "+std::string(e.what()), "invalid_request_error").dump(), "application/json");
+    }
+}
+
+void handleDeleteModelFiles(const httplib::Request &req, httplib::Response &res)
+{
+    std::string modelName=req.matches[1];
+    std::string variant;
+    if(req.has_param("variant"))
+    {
+        variant=req.get_param_value("variant");
+    }
+
+    // Check if guarded
+    if(!variant.empty()&&StorageManager::instance().isGuarded(modelName, variant))
+    {
+        std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats(modelName, variant);
+        bool hotReady=stats.has_value()?stats->hotReady:false;
+        bool isProtected=stats.has_value()?stats->isProtected:false;
+
+        res.status=409;
+        res.set_content(nlohmann::json{
+            {"error", {
+                {"message", "Cannot delete variant '"+variant+"' of model '"+modelName+"': variant is "+(isProtected?"protected":"hot_ready")},
+                {"type", "invalid_request_error"},
+                {"details", {
+                    {"hot_ready", hotReady},
+                    {"protected", isProtected}
+                }}
+            }}
+        }.dump(), "application/json");
+        return;
+    }
+
+    // Unload from ModelRuntime if loaded
+    std::optional<LoadedModel> state=ModelRuntime::instance().getModelState(modelName);
+    if(state.has_value()&&(state->state==ModelState::Loaded||state->state==ModelState::Ready))
+    {
+        if(variant.empty()||state->variant==variant)
+        {
+            ArbiterAI::instance().unloadModel(modelName);
+        }
+    }
+
+    int64_t freedBytes=0;
+    bool deleted=StorageManager::instance().deleteModelFile(modelName, variant, freedBytes);
+
+    if(!deleted)
+    {
+        if(freedBytes==0)
+        {
+            res.status=404;
+            res.set_content(errorJson("Model files not found: "+modelName, "not_found_error").dump(), "application/json");
+        }
+        else
+        {
+            res.status=409;
+            res.set_content(errorJson("Cannot delete: variant is hot_ready or protected", "invalid_request_error").dump(), "application/json");
+        }
+        return;
+    }
+
+    nlohmann::json response={
+        {"status", "deleted"},
+        {"model", modelName},
+        {"freed_bytes", freedBytes}
+    };
+    if(!variant.empty())
+    {
+        response["variant"]=variant;
+    }
+
+    res.set_content(response.dump(), "application/json");
+}
+
+void handleSetHotReady(const httplib::Request &req, httplib::Response &res)
+{
+    std::string modelName=req.matches[1];
+    std::string variant=req.matches[2];
+
+    if(!StorageManager::instance().setHotReady(modelName, variant, true))
+    {
+        res.status=404;
+        res.set_content(errorJson("Variant not found in downloaded inventory", "not_found_error").dump(), "application/json");
+        return;
+    }
+
+    res.set_content(nlohmann::json{
+        {"status", "hot_ready_set"},
+        {"model", modelName},
+        {"variant", variant}
+    }.dump(), "application/json");
+}
+
+void handleClearHotReady(const httplib::Request &req, httplib::Response &res)
+{
+    std::string modelName=req.matches[1];
+    std::string variant=req.matches[2];
+
+    if(!StorageManager::instance().setHotReady(modelName, variant, false))
+    {
+        res.status=404;
+        res.set_content(errorJson("Variant not found", "not_found_error").dump(), "application/json");
+        return;
+    }
+
+    res.set_content(nlohmann::json{
+        {"status", "hot_ready_cleared"},
+        {"model", modelName},
+        {"variant", variant}
+    }.dump(), "application/json");
+}
+
+void handleSetProtected(const httplib::Request &req, httplib::Response &res)
+{
+    std::string modelName=req.matches[1];
+    std::string variant=req.matches[2];
+
+    if(!StorageManager::instance().setProtected(modelName, variant, true))
+    {
+        res.status=404;
+        res.set_content(errorJson("Variant not found in downloaded inventory", "not_found_error").dump(), "application/json");
+        return;
+    }
+
+    res.set_content(nlohmann::json{
+        {"status", "protected_set"},
+        {"model", modelName},
+        {"variant", variant}
+    }.dump(), "application/json");
+}
+
+void handleClearProtected(const httplib::Request &req, httplib::Response &res)
+{
+    std::string modelName=req.matches[1];
+    std::string variant=req.matches[2];
+
+    if(!StorageManager::instance().setProtected(modelName, variant, false))
+    {
+        res.status=404;
+        res.set_content(errorJson("Variant not found", "not_found_error").dump(), "application/json");
+        return;
+    }
+
+    res.set_content(nlohmann::json{
+        {"status", "protected_cleared"},
+        {"model", modelName},
+        {"variant", variant}
+    }.dump(), "application/json");
+}
+
+void handleGetCleanupPreview(const httplib::Request &, httplib::Response &res)
+{
+    std::vector<CleanupCandidate> candidates=StorageManager::instance().previewCleanup();
+
+    int64_t totalFreeable=0;
+    nlohmann::json candidatesJson=nlohmann::json::array();
+    for(const CleanupCandidate &c:candidates)
+    {
+        candidatesJson.push_back({
+            {"model", c.modelName},
+            {"variant", c.variant},
+            {"filename", c.filename},
+            {"file_size_bytes", c.fileSizeBytes},
+            {"file_size_display", formatBytesDisplay(c.fileSizeBytes)},
+            {"last_used_at", timePointToIsoStr(c.lastUsedAt)},
+            {"usage_count", c.usageCount}
+        });
+        totalFreeable+=c.fileSizeBytes;
+    }
+
+    nlohmann::json response={
+        {"candidates", candidatesJson},
+        {"candidate_count", static_cast<int>(candidates.size())},
+        {"total_freeable_bytes", totalFreeable},
+        {"total_freeable_display", formatBytesDisplay(totalFreeable)}
+    };
+
+    res.set_content(response.dump(), "application/json");
+}
+
+void handleRunCleanup(const httplib::Request &, httplib::Response &res)
+{
+    int64_t freed=StorageManager::instance().runCleanup();
+
+    res.set_content(nlohmann::json{
+        {"status", "completed"},
+        {"freed_bytes", freed},
+        {"freed_display", formatBytesDisplay(freed)}
+    }.dump(), "application/json");
+}
+
+void handleGetCleanupConfig(const httplib::Request &, httplib::Response &res)
+{
+    CleanupPolicy policy=StorageManager::instance().getCleanupPolicy();
+
+    res.set_content(nlohmann::json{
+        {"enabled", policy.enabled},
+        {"max_age_days", policy.maxAge.count()/24},
+        {"check_interval_hours", policy.checkInterval.count()},
+        {"target_free_percent", policy.targetFreePercent},
+        {"respect_hot_ready", policy.respectHotReady},
+        {"respect_protected", policy.respectProtected}
+    }.dump(), "application/json");
+}
+
+void handleSetCleanupConfig(const httplib::Request &req, httplib::Response &res)
+{
+    try
+    {
+        nlohmann::json body=nlohmann::json::parse(req.body);
+
+        CleanupPolicy policy=StorageManager::instance().getCleanupPolicy();
+
+        if(body.contains("enabled")) policy.enabled=body["enabled"].get<bool>();
+        if(body.contains("max_age_days")) policy.maxAge=std::chrono::hours(body["max_age_days"].get<int>()*24);
+        if(body.contains("check_interval_hours")) policy.checkInterval=std::chrono::hours(body["check_interval_hours"].get<int>());
+        if(body.contains("target_free_percent")) policy.targetFreePercent=body["target_free_percent"].get<double>();
+
+        StorageManager::instance().setCleanupPolicy(policy);
+
+        res.set_content(nlohmann::json{
+            {"status", "updated"},
+            {"enabled", policy.enabled},
+            {"max_age_days", policy.maxAge.count()/24},
+            {"check_interval_hours", policy.checkInterval.count()},
+            {"target_free_percent", policy.targetFreePercent}
+        }.dump(), "application/json");
+    }
+    catch(const std::exception &e)
+    {
+        res.status=400;
+        res.set_content(errorJson("Invalid request: "+std::string(e.what()), "invalid_request_error").dump(), "application/json");
+    }
+}
+
+void handleGetActiveDownloads(const httplib::Request &, httplib::Response &res)
+{
+    // Get snapshots with speed and ETA from ModelRuntime
+    std::vector<DownloadProgressSnapshot> snapshots=ModelRuntime::instance().getActiveDownloadSnapshots();
+
+    nlohmann::json downloads=nlohmann::json::array();
+    for(const DownloadProgressSnapshot &snap:snapshots)
+    {
+        nlohmann::json dl={
+            {"model", snap.modelName},
+            {"variant", snap.variant},
+            {"state", "Downloading"},
+            {"bytes_downloaded", snap.bytesDownloaded},
+            {"total_bytes", snap.totalBytes},
+            {"percent_complete", snap.percentComplete},
+            {"speed_mbps", snap.speedMbps},
+            {"eta_seconds", snap.etaSeconds}
+        };
+        downloads.push_back(dl);
+    }
+
+    // Also include any models in Downloading state that don't have snapshots
+    // (e.g. download hasn't started sending data yet)
+    std::vector<LoadedModel> models=ModelRuntime::instance().getModelStates();
+    for(const LoadedModel &m:models)
+    {
+        if(m.state!=ModelState::Downloading)
+        {
+            continue;
+        }
+
+        bool alreadyIncluded=false;
+        for(const DownloadProgressSnapshot &snap:snapshots)
+        {
+            if(snap.modelName==m.modelName)
+            {
+                alreadyIncluded=true;
+                break;
+            }
+        }
+
+        if(!alreadyIncluded)
+        {
+            nlohmann::json dl={
+                {"model", m.modelName},
+                {"variant", m.variant},
+                {"state", "Downloading"},
+                {"bytes_downloaded", 0},
+                {"total_bytes", 0},
+                {"percent_complete", 0.0},
+                {"speed_mbps", 0.0},
+                {"eta_seconds", 0}
+            };
+            downloads.push_back(dl);
+        }
+    }
+
+    res.set_content(nlohmann::json{{"downloads", downloads}}.dump(), "application/json");
+}
+
 // ========== Dashboard ==========
 
 void handleDashboard(const httplib::Request &, httplib::Response &res)
diff --git a/src/server/routes.h b/src/server/routes.h
index 83e154b..3a120b4 100644
--- a/src/server/routes.h
+++ b/src/server/routes.h
@@ -57,6 +57,24 @@ void handleGetStatsHistory(const httplib::Request &req, httplib::Response &res);
 void handleGetStatsSwaps(const httplib::Request &req, httplib::Response &res);
 void handleGetHardware(const httplib::Request &req, httplib::Response &res);
 
+// ========== Storage Management ==========
+
+void handleGetStorage(const httplib::Request &req, httplib::Response &res);
+void handleGetStorageModels(const httplib::Request &req, httplib::Response &res);
+void handleGetStorageModel(const httplib::Request &req, httplib::Response &res);
+void handleGetStorageModelVariant(const httplib::Request &req, httplib::Response &res);
+void handleSetStorageLimit(const httplib::Request &req, httplib::Response &res);
+void handleDeleteModelFiles(const httplib::Request &req, httplib::Response &res);
+void handleSetHotReady(const httplib::Request &req, httplib::Response &res);
+void handleClearHotReady(const httplib::Request &req, httplib::Response &res);
+void handleSetProtected(const httplib::Request &req, httplib::Response &res);
+void handleClearProtected(const httplib::Request &req, httplib::Response &res);
+void handleGetCleanupPreview(const httplib::Request &req, httplib::Response &res);
+void handleRunCleanup(const httplib::Request &req, httplib::Response &res);
+void handleGetCleanupConfig(const httplib::Request &req, httplib::Response &res);
+void handleSetCleanupConfig(const httplib::Request &req, httplib::Response &res);
+void handleGetActiveDownloads(const httplib::Request &req, httplib::Response &res);
+
 // ========== Dashboard ==========
 
 void handleDashboard(const httplib::Request &req, httplib::Response &res);
diff --git a/tests/storageManagerTests.cpp b/tests/storageManagerTests.cpp
new file mode 100644
index 0000000..eb9f3e8
--- /dev/null
+++ b/tests/storageManagerTests.cpp
@@ -0,0 +1,570 @@
+#include "arbiterAI/storageManager.h"
+#include <gtest/gtest.h>
+#include <fstream>
+#include <filesystem>
+#include <thread>
+#include <chrono>
+
+namespace arbiterAI
+{
+
+class StorageManagerTest : public ::testing::Test
+{
+protected:
+    void SetUp() override
+    {
+        StorageManager::reset();
+
+        m_testDir="sm_test_models";
+        std::filesystem::create_directories(m_testDir);
+    }
+
+    void TearDown() override
+    {
+        StorageManager::instance().shutdown();
+        std::filesystem::remove_all(m_testDir);
+    }
+
+    void createDummyGguf(const std::string &filename, int64_t sizeBytes)
+    {
+        std::filesystem::path path=m_testDir/filename;
+        std::ofstream out(path, std::ios::binary);
+        std::string data(static_cast<size_t>(sizeBytes), '\0');
+        out.write(data.data(), sizeBytes);
+        out.close();
+    }
+
+    std::filesystem::path m_testDir;
+};
+
+// ========== Initialization ==========
+
+TEST_F(StorageManagerTest, InitializeCreatesDirectory)
+{
+    std::filesystem::path subdir=m_testDir/"subdir"/"models";
+    StorageManager::instance().initialize(subdir);
+
+    EXPECT_TRUE(std::filesystem::exists(subdir));
+}
+
+TEST_F(StorageManagerTest, GetStorageInfoReturnsValidData)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageInfo info=StorageManager::instance().getStorageInfo();
+
+    EXPECT_EQ(info.modelsDirectory, m_testDir);
+    EXPECT_GT(info.totalDiskBytes, 0);
+    EXPECT_GT(info.freeDiskBytes, 0);
+    EXPECT_EQ(info.usedByModelsBytes, 0);
+    EXPECT_EQ(info.modelCount, 0);
+}
+
+// ========== Storage Limit ==========
+
+TEST_F(StorageManagerTest, SetAndGetStorageLimit)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    int64_t limit=10LL*1024*1024*1024; // 10 GB
+    StorageManager::instance().setStorageLimit(limit);
+
+    EXPECT_EQ(StorageManager::instance().getStorageLimit(), limit);
+}
+
+TEST_F(StorageManagerTest, CanDownloadRespectsLimit)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    // Set a small limit
+    StorageManager::instance().setStorageLimit(100*1024*1024); // 100 MB
+
+    EXPECT_TRUE(StorageManager::instance().canDownload(50*1024*1024));  // 50 MB fits
+    EXPECT_FALSE(StorageManager::instance().canDownload(200*1024*1024)); // 200 MB doesn't
+}
+
+TEST_F(StorageManagerTest, CanDownloadWithZeroLimitUsesFreeDisk)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    // 0 = use all free disk space
+    StorageManager::instance().setStorageLimit(0);
+
+    // A small download should always fit
+    EXPECT_TRUE(StorageManager::instance().canDownload(1024));
+}
+
+// ========== Registration ==========
+
+TEST_F(StorageManagerTest, RegisterDownloadAddsEntry)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 4370*1024*1024LL);
+
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+
+    ASSERT_EQ(models.size(), 1u);
+    EXPECT_EQ(models[0].modelName, "test-model");
+    EXPECT_EQ(models[0].variant, "Q4_K_M");
+    EXPECT_EQ(models[0].filename, "test-q4.gguf");
+    EXPECT_EQ(models[0].fileSizeBytes, 4370*1024*1024LL);
+    EXPECT_EQ(models[0].usageCount, 0);
+    EXPECT_FALSE(models[0].hotReady);
+    EXPECT_FALSE(models[0].isProtected);
+}
+
+TEST_F(StorageManagerTest, RegisterDuplicateUpdatesEntry)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 4000*1024*1024LL);
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4-v2.gguf", 4500*1024*1024LL);
+
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+
+    ASSERT_EQ(models.size(), 1u);
+    EXPECT_EQ(models[0].filename, "test-q4-v2.gguf");
+    EXPECT_EQ(models[0].fileSizeBytes, 4500*1024*1024LL);
+}
+
+TEST_F(StorageManagerTest, MultipleVariantsTrackedSeparately)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 4000*1024*1024LL);
+    StorageManager::instance().registerDownload("test-model", "Q8_0", "test-q8.gguf", 8000*1024*1024LL);
+
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+    EXPECT_EQ(models.size(), 2u);
+
+    StorageInfo info=StorageManager::instance().getStorageInfo();
+    EXPECT_EQ(info.usedByModelsBytes, (4000+8000)*1024*1024LL);
+    EXPECT_EQ(info.modelCount, 2);
+}
+
+// ========== Usage Tracking ==========
+
+TEST_F(StorageManagerTest, RecordUsageUpdatesCount)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 1024*1024LL);
+
+    StorageManager::instance().recordUsage("test-model", "Q4_K_M");
+    StorageManager::instance().recordUsage("test-model", "Q4_K_M");
+    StorageManager::instance().recordUsage("test-model", "Q4_K_M");
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("test-model", "Q4_K_M");
+
+    ASSERT_TRUE(stats.has_value());
+    EXPECT_EQ(stats->usageCount, 3);
+}
+
+TEST_F(StorageManagerTest, RecordUsageUpdatesLastUsedTime)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 1024*1024LL);
+
+    auto before=std::chrono::system_clock::now();
+    std::this_thread::sleep_for(std::chrono::milliseconds(10));
+
+    StorageManager::instance().recordUsage("test-model", "Q4_K_M");
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("test-model", "Q4_K_M");
+
+    ASSERT_TRUE(stats.has_value());
+    EXPECT_GE(stats->lastUsedAt, before);
+}
+
+TEST_F(StorageManagerTest, RecordUsageForUnknownVariantIsNoOp)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    // Should not crash
+    StorageManager::instance().recordUsage("nonexistent", "Q4_K_M");
+
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+    EXPECT_EQ(models.size(), 0u);
+}
+
+// ========== Model/Variant Stats ==========
+
+TEST_F(StorageManagerTest, GetModelStatsReturnsAllVariants)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 4000*1024*1024LL);
+    StorageManager::instance().registerDownload("test-model", "Q8_0", "q8.gguf", 8000*1024*1024LL);
+    StorageManager::instance().registerDownload("other-model", "Q4_K_M", "other.gguf", 3000*1024*1024LL);
+
+    std::vector<DownloadedModelFile> stats=StorageManager::instance().getModelStats("test-model");
+
+    EXPECT_EQ(stats.size(), 2u);
+}
+
+TEST_F(StorageManagerTest, GetVariantStatsReturnsNulloptForMissing)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("nonexistent", "Q4_K_M");
+
+    EXPECT_FALSE(stats.has_value());
+}
+
+// ========== Hot Ready Flag ==========
+
+TEST_F(StorageManagerTest, SetHotReadyOnVariant)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024*1024LL);
+
+    bool found=StorageManager::instance().setHotReady("test-model", "Q4_K_M", true);
+
+    EXPECT_TRUE(found);
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("test-model", "Q4_K_M");
+    ASSERT_TRUE(stats.has_value());
+    EXPECT_TRUE(stats->hotReady);
+}
+
+TEST_F(StorageManagerTest, ClearHotReadyOnVariant)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024*1024LL);
+    StorageManager::instance().setHotReady("test-model", "Q4_K_M", true);
+    StorageManager::instance().setHotReady("test-model", "Q4_K_M", false);
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("test-model", "Q4_K_M");
+    ASSERT_TRUE(stats.has_value());
+    EXPECT_FALSE(stats->hotReady);
+}
+
+TEST_F(StorageManagerTest, SetHotReadyOnUnknownVariantReturnsFalse)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    bool found=StorageManager::instance().setHotReady("nonexistent", "Q4_K_M", true);
+
+    EXPECT_FALSE(found);
+}
+
+// ========== Protected Flag ==========
+
+TEST_F(StorageManagerTest, SetProtectedOnVariant)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024*1024LL);
+
+    bool found=StorageManager::instance().setProtected("test-model", "Q4_K_M", true);
+
+    EXPECT_TRUE(found);
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("test-model", "Q4_K_M");
+    ASSERT_TRUE(stats.has_value());
+    EXPECT_TRUE(stats->isProtected);
+}
+
+TEST_F(StorageManagerTest, ClearProtectedOnVariant)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024*1024LL);
+    StorageManager::instance().setProtected("test-model", "Q4_K_M", true);
+    StorageManager::instance().setProtected("test-model", "Q4_K_M", false);
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("test-model", "Q4_K_M");
+    ASSERT_TRUE(stats.has_value());
+    EXPECT_FALSE(stats->isProtected);
+}
+
+// ========== Guarded (hot ready OR protected) ==========
+
+TEST_F(StorageManagerTest, IsGuardedWhenHotReady)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024*1024LL);
+    StorageManager::instance().setHotReady("test-model", "Q4_K_M", true);
+
+    EXPECT_TRUE(StorageManager::instance().isGuarded("test-model", "Q4_K_M"));
+}
+
+TEST_F(StorageManagerTest, IsGuardedWhenProtected)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024*1024LL);
+    StorageManager::instance().setProtected("test-model", "Q4_K_M", true);
+
+    EXPECT_TRUE(StorageManager::instance().isGuarded("test-model", "Q4_K_M"));
+}
+
+TEST_F(StorageManagerTest, NotGuardedByDefault)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024*1024LL);
+
+    EXPECT_FALSE(StorageManager::instance().isGuarded("test-model", "Q4_K_M"));
+}
+
+TEST_F(StorageManagerTest, IsGuardedReturnsFalseForUnknown)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    EXPECT_FALSE(StorageManager::instance().isGuarded("nonexistent", "Q4_K_M"));
+}
+
+// ========== Deletion ==========
+
+TEST_F(StorageManagerTest, DeleteModelFileRemovesFromDisk)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    createDummyGguf("test-q4.gguf", 1024);
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 1024);
+
+    int64_t freed=0;
+    bool result=StorageManager::instance().deleteModelFile("test-model", "Q4_K_M", freed);
+
+    EXPECT_TRUE(result);
+    EXPECT_GT(freed, 0);
+    EXPECT_FALSE(std::filesystem::exists(m_testDir/"test-q4.gguf"));
+
+    // Entry should be removed
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+    EXPECT_EQ(models.size(), 0u);
+}
+
+TEST_F(StorageManagerTest, DeleteGuardedHotReadyVariantFails)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    createDummyGguf("test-q4.gguf", 1024);
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 1024);
+    StorageManager::instance().setHotReady("test-model", "Q4_K_M", true);
+
+    int64_t freed=0;
+    bool result=StorageManager::instance().deleteModelFile("test-model", "Q4_K_M", freed);
+
+    EXPECT_FALSE(result);
+    EXPECT_EQ(freed, 0);
+    EXPECT_TRUE(std::filesystem::exists(m_testDir/"test-q4.gguf"));
+}
+
+TEST_F(StorageManagerTest, DeleteGuardedProtectedVariantFails)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    createDummyGguf("test-q4.gguf", 1024);
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 1024);
+    StorageManager::instance().setProtected("test-model", "Q4_K_M", true);
+
+    int64_t freed=0;
+    bool result=StorageManager::instance().deleteModelFile("test-model", "Q4_K_M", freed);
+
+    EXPECT_FALSE(result);
+    EXPECT_EQ(freed, 0);
+    EXPECT_TRUE(std::filesystem::exists(m_testDir/"test-q4.gguf"));
+}
+
+TEST_F(StorageManagerTest, DeleteNonexistentModelReturnsFalse)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    int64_t freed=0;
+    bool result=StorageManager::instance().deleteModelFile("nonexistent", "Q4_K_M", freed);
+
+    EXPECT_FALSE(result);
+}
+
+// ========== Directory Scanning ==========
+
+TEST_F(StorageManagerTest, ScanPicksUpUntrackedGgufFiles)
+{
+    createDummyGguf("untracked-model.gguf", 2048);
+
+    StorageManager::instance().initialize(m_testDir);
+
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+
+    // Should have found the GGUF file
+    ASSERT_EQ(models.size(), 1u);
+    EXPECT_EQ(models[0].filename, "untracked-model.gguf");
+    EXPECT_EQ(models[0].fileSizeBytes, 2048);
+}
+
+// ========== Cleanup ==========
+
+TEST_F(StorageManagerTest, SetAndGetCleanupPolicy)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    CleanupPolicy policy;
+    policy.enabled=true;
+    policy.maxAge=std::chrono::hours{14*24}; // 14 days
+    policy.checkInterval=std::chrono::hours{12};
+    policy.targetFreePercent=30.0;
+
+    StorageManager::instance().setCleanupPolicy(policy);
+
+    CleanupPolicy result=StorageManager::instance().getCleanupPolicy();
+
+    EXPECT_TRUE(result.enabled);
+    EXPECT_EQ(result.maxAge.count(), 14*24);
+    EXPECT_EQ(result.checkInterval.count(), 12);
+    EXPECT_DOUBLE_EQ(result.targetFreePercent, 30.0);
+}
+
+TEST_F(StorageManagerTest, PreviewCleanupReturnsNoGuardedCandidates)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    createDummyGguf("old-model.gguf", 1024);
+    StorageManager::instance().registerDownload("old-model", "Q4_K_M", "old-model.gguf", 1024);
+    StorageManager::instance().setProtected("old-model", "Q4_K_M", true);
+
+    std::vector<CleanupCandidate> candidates=StorageManager::instance().previewCleanup();
+
+    // Protected variant should not be a candidate
+    for(const CleanupCandidate &c:candidates)
+    {
+        EXPECT_NE(c.modelName, "old-model");
+    }
+}
+
+TEST_F(StorageManagerTest, PreviewCleanupReturnsNoHotReadyCandidates)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    createDummyGguf("hot-model.gguf", 1024);
+    StorageManager::instance().registerDownload("hot-model", "Q4_K_M", "hot-model.gguf", 1024);
+    StorageManager::instance().setHotReady("hot-model", "Q4_K_M", true);
+
+    std::vector<CleanupCandidate> candidates=StorageManager::instance().previewCleanup();
+
+    for(const CleanupCandidate &c:candidates)
+    {
+        EXPECT_NE(c.modelName, "hot-model");
+    }
+}
+
+TEST_F(StorageManagerTest, RunCleanupDeletesStaleCandidates)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    // Set a very short max age so everything qualifies
+    CleanupPolicy policy;
+    policy.enabled=true;
+    policy.maxAge=std::chrono::hours{0};
+    policy.targetFreePercent=0.0;
+    StorageManager::instance().setCleanupPolicy(policy);
+
+    createDummyGguf("stale.gguf", 2048);
+    StorageManager::instance().registerDownload("stale-model", "Q4_K_M", "stale.gguf", 2048);
+
+    int64_t freed=StorageManager::instance().runCleanup();
+
+    EXPECT_GT(freed, 0);
+    EXPECT_FALSE(std::filesystem::exists(m_testDir/"stale.gguf"));
+}
+
+TEST_F(StorageManagerTest, RunCleanupSkipsGuardedVariants)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    CleanupPolicy policy;
+    policy.enabled=true;
+    policy.maxAge=std::chrono::hours{0};
+    policy.targetFreePercent=0.0;
+    StorageManager::instance().setCleanupPolicy(policy);
+
+    createDummyGguf("guarded.gguf", 2048);
+    StorageManager::instance().registerDownload("guarded-model", "Q4_K_M", "guarded.gguf", 2048);
+    StorageManager::instance().setProtected("guarded-model", "Q4_K_M", true);
+
+    int64_t freed=StorageManager::instance().runCleanup();
+
+    EXPECT_EQ(freed, 0);
+    EXPECT_TRUE(std::filesystem::exists(m_testDir/"guarded.gguf"));
+}
+
+// ========== Persistence ==========
+
+TEST_F(StorageManagerTest, FlushAndReloadPreservesData)
+{
+    // Initialize, register, set flags, flush
+    {
+        StorageManager::instance().initialize(m_testDir);
+        createDummyGguf("test-q4.gguf", 4000);
+        StorageManager::instance().registerDownload("test-model", "Q4_K_M", "test-q4.gguf", 4000LL);
+        StorageManager::instance().recordUsage("test-model", "Q4_K_M");
+        StorageManager::instance().recordUsage("test-model", "Q4_K_M");
+        StorageManager::instance().setHotReady("test-model", "Q4_K_M", true);
+        StorageManager::instance().setProtected("test-model", "Q4_K_M", true);
+        StorageManager::instance().flush();
+    }
+
+    // Reset and reinitialize — should reload from disk
+    StorageManager::reset();
+    StorageManager::instance().initialize(m_testDir);
+
+    std::optional<DownloadedModelFile> stats=StorageManager::instance().getVariantStats("test-model", "Q4_K_M");
+
+    ASSERT_TRUE(stats.has_value());
+    EXPECT_EQ(stats->modelName, "test-model");
+    EXPECT_EQ(stats->variant, "Q4_K_M");
+    EXPECT_EQ(stats->filename, "test-q4.gguf");
+    EXPECT_EQ(stats->usageCount, 2);
+    EXPECT_TRUE(stats->hotReady);
+    EXPECT_TRUE(stats->isProtected);
+}
+
+// ========== Delete All Variants of a Model ==========
+
+TEST_F(StorageManagerTest, DeleteAllVariantsOfModel)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    createDummyGguf("q4.gguf", 1024);
+    createDummyGguf("q8.gguf", 2048);
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024);
+    StorageManager::instance().registerDownload("test-model", "Q8_0", "q8.gguf", 2048);
+
+    int64_t freed=0;
+    bool result=StorageManager::instance().deleteModelFile("test-model", "", freed);
+
+    EXPECT_TRUE(result);
+    EXPECT_GT(freed, 0);
+    EXPECT_FALSE(std::filesystem::exists(m_testDir/"q4.gguf"));
+    EXPECT_FALSE(std::filesystem::exists(m_testDir/"q8.gguf"));
+
+    std::vector<DownloadedModelFile> models=StorageManager::instance().getDownloadedModels();
+    EXPECT_EQ(models.size(), 0u);
+}
+
+TEST_F(StorageManagerTest, DeleteAllVariantsFailsIfAnyGuarded)
+{
+    StorageManager::instance().initialize(m_testDir);
+
+    createDummyGguf("q4.gguf", 1024);
+    createDummyGguf("q8.gguf", 2048);
+    StorageManager::instance().registerDownload("test-model", "Q4_K_M", "q4.gguf", 1024);
+    StorageManager::instance().registerDownload("test-model", "Q8_0", "q8.gguf", 2048);
+    StorageManager::instance().setProtected("test-model", "Q8_0", true);
+
+    int64_t freed=0;
+    bool result=StorageManager::instance().deleteModelFile("test-model", "", freed);
+
+    EXPECT_FALSE(result);
+    // Both files should still exist since one is guarded
+    EXPECT_TRUE(std::filesystem::exists(m_testDir/"q4.gguf"));
+    EXPECT_TRUE(std::filesystem::exists(m_testDir/"q8.gguf"));
+}
+
+} // namespace arbiterAI