From c6c332b8594b8c14152ee7436f379f8d30184486 Mon Sep 17 00:00:00 2001 From: krazer Date: Sat, 28 Mar 2026 11:53:01 -0400 Subject: [PATCH] add support for storage mtg --- .github/copilot-instructions.md | 10 + CMakeLists.txt | 3 + docs/server.md | 315 ++++++++++- src/arbiterAI/arbiterAI.cpp | 6 + src/arbiterAI/arbiterAI.h | 3 +- src/arbiterAI/modelDownloader.cpp | 97 +++- src/arbiterAI/modelDownloader.h | 46 +- src/arbiterAI/modelRuntime.cpp | 55 +- src/arbiterAI/modelRuntime.h | 9 +- src/arbiterAI/storageManager.cpp | 862 ++++++++++++++++++++++++++++++ src/arbiterAI/storageManager.h | 196 +++++++ src/server/dashboard.h | 297 ++++++++++ src/server/main.cpp | 69 +++ src/server/routes.cpp | 571 +++++++++++++++++++- src/server/routes.h | 18 + tests/storageManagerTests.cpp | 570 ++++++++++++++++++++ 16 files changed, 3107 insertions(+), 20 deletions(-) create mode 100644 src/arbiterAI/storageManager.cpp create mode 100644 src/arbiterAI/storageManager.h create mode 100644 tests/storageManagerTests.cpp diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 92759f1..ca54224 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -17,6 +17,16 @@ A C++17 library providing a unified interface for multiple LLM providers. - **Tests:** Google Test — `./build/linux_x64_debug/arbiterai_tests` (must be run inside Docker) - **Language-specific formatting rules** are in `.github/instructions/` +## Important Rules + +1. **All commands** must go through `./runDocker.sh ...`. +2. **All development** (building, testing, running) must be done inside the Docker container. The host environment is not guaranteed to have the correct tools or dependencies. +3. **Do not** use `python`, `pip`, `pytest` — the host may not have the correct Python version or dependencies. +4. **Do not** create or use a virtualenv on the host. The container is the virtualenv. +5. The project source is **bind-mounted** at `/app` inside the container. Edits to files on the host are immediately visible inside the container. +6. If you change the `Dockerfile`, run `./runDocker.sh --rebuild`. +7. Don't launch the server, ask the user to launch so that its not running in the agents terminal. + ## Active Tasks - **[docs/tasks/local_model_management.md](../docs/tasks/local_model_management.md)** — Plan for llama.cpp local model management (hardware detection, model swapping, telemetry, standalone server) diff --git a/CMakeLists.txt b/CMakeLists.txt index e14de19..38e6bea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,8 @@ set(arbiterai_src ./src/arbiterAI/modelRuntime.cpp ./src/arbiterAI/telemetryCollector.h ./src/arbiterAI/telemetryCollector.cpp + ./src/arbiterAI/storageManager.h + ./src/arbiterAI/storageManager.cpp ./src/arbiterAI/providers/baseProvider.h ./src/arbiterAI/providers/baseProvider.cpp ./src/arbiterAI/providers/openai.h @@ -136,6 +138,7 @@ target_link_libraries(arbiterai tests/modelRuntimeTests.cpp tests/telemetryCollectorTests.cpp tests/llamaProviderTests.cpp + tests/storageManagerTests.cpp ) target_link_libraries(arbiterai_tests diff --git a/docs/server.md b/docs/server.md index fccf1db..99f025c 100644 --- a/docs/server.md +++ b/docs/server.md @@ -11,8 +11,9 @@ Standalone HTTP server that wraps the ArbiterAI library, providing an OpenAI-com - [Model Management](#32-model-management) - [Model Config Injection](#33-model-config-injection) - [Telemetry](#34-telemetry) - - [Health & Version](#35-health--version) - - [Dashboard](#36-dashboard) + - [Storage Management](#35-storage-management) + - [Health & Version](#36-health--version) + - [Dashboard](#37-dashboard) 4. [Configuration Persistence](#4-configuration-persistence) 5. [Error Format](#5-error-format) @@ -28,8 +29,9 @@ The server supports: - **Streaming** — Server-Sent Events (SSE) for real-time token delivery - **Model lifecycle management** — Load, unload, pin, and download models at runtime - **Runtime model config injection** — Add, update, or remove model configurations via REST without restarting +- **Storage management** — Track downloaded model files, set hot ready / protected flags, configure automated cleanup, monitor disk usage and download progress with speed and ETA - **Telemetry** — System snapshots, inference history, swap history, and hardware info -- **Live dashboard** — Browser-based UI at `/dashboard` +- **Live dashboard** — Browser-based UI at `/dashboard` with storage bar, download progress, and model management - **CORS** — All responses include permissive CORS headers --- @@ -52,6 +54,11 @@ The server supports: | `-v, --variant` | *(none)* | Default quantization variant (e.g., `Q4_K_M`) | | `--override-path` | *(none)* | Path to write runtime model config overrides (enables persistence) | | `--ram-budget` | `0` (auto 50%) | Ready-model RAM budget in MB | +| `--models-dir` | `/models` | Directory where downloaded model files are stored | +| `--storage-limit` | `0` (unlimited) | Maximum storage for model files (e.g., `50G`, `500M`). `0` = use all free disk space. | +| `--cleanup-enabled` | `true` | Enable automated storage cleanup | +| `--cleanup-max-age` | `720` | Max age in hours before a variant becomes a cleanup candidate (default: 30 days) | +| `--cleanup-interval` | `24` | Hours between automated cleanup runs | | `--log-level` | `info` | Log level (`trace`, `debug`, `info`, `warn`, `error`) | | `-h, --help` | | Print usage | @@ -69,6 +76,9 @@ The server supports: # Load a local model with a specific variant ./arbiterAI-server -m qwen2.5-7b-instruct -v Q4_K_M --ram-budget 8192 + +# Limit model storage to 50 GB with cleanup every 12 hours +./arbiterAI-server --models-dir /data/models --storage-limit 50G --cleanup-interval 12 ``` --- @@ -328,6 +338,8 @@ Load a model into VRAM for inference. **Response (202):** `{"status": "downloading", "model": "qwen2.5-7b-instruct"}` — model file is being downloaded. +**Response (507):** Insufficient storage — the model file won't fit within the configured storage limit. Includes `available_bytes`, `required_bytes`, and `storage_limit_bytes` for programmatic decision-making. + #### `POST /api/models/:name/unload` Unload a model from VRAM. Pinned models move to `Ready` state instead. @@ -354,16 +366,32 @@ Initiate a model download. Query parameter `variant` selects the quantization va **Response (202):** `{"status": "downloading", "model": "..."}` — download started. +**Response (507):** Insufficient storage. Same format as the load endpoint. + #### `GET /api/models/:name/download` -Get download status for a model. +Get download status for a model. Includes speed and ETA when download is active. **Response:** ```json { "model": "qwen2.5-7b-instruct", - "state": "Downloading" + "state": "Downloading", + "bytes_downloaded": 1250000000, + "total_bytes": 4680000000, + "percent_complete": 26.7, + "speed_mbps": 85.3, + "eta_seconds": 38 +} +``` + +When not downloading: + +```json +{ + "model": "qwen2.5-7b-instruct", + "state": "Loaded" } ``` @@ -650,7 +678,273 @@ Current hardware information (refreshed on each call). --- -### 3.5 Health & Version +### 3.5 Storage Management + +Manage downloaded model files on disk — track usage, set protection flags, configure automated cleanup, and monitor active downloads. + +#### Concepts + +- **Hot Ready** — Per-variant flag. Keeps model weights in system RAM after VRAM eviction for fast reload. Hot ready variants are protected from deletion. +- **Protected** — Per-variant flag. Prevents deletion by both manual delete requests and automated cleanup. Must be cleared before the file can be removed. +- **Guarded** — A variant is "guarded" if either hot ready or protected is set. + +#### `GET /api/storage` + +Current storage overview. + +**Response:** + +```json +{ + "models_directory": "/models", + "total_disk_bytes": 500107862016, + "free_disk_bytes": 350000000000, + "used_by_models_bytes": 12500000000, + "storage_limit_bytes": 53687091200, + "available_for_models_bytes": 41187091200, + "model_count": 3, + "cleanup_enabled": true +} +``` + +#### `GET /api/storage/models` + +List all downloaded model files with usage statistics and flags. + +**Query parameters:** + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `sort` | `last_used` | Sort by: `last_used`, `size`, `name`, `downloads` | + +**Response:** + +```json +{ + "models": [ + { + "model": "qwen2.5-7b-instruct", + "variant": "Q4_K_M", + "filename": "Qwen2.5-7B-Instruct-Q4_K_M.gguf", + "file_path": "/models/Qwen2.5-7B-Instruct-Q4_K_M.gguf", + "file_size_bytes": 4680000000, + "file_size_display": "4.4 GB", + "downloaded_at": "2025-01-15T10:30:00Z", + "last_used_at": "2025-01-20T14:22:00Z", + "usage_count": 47, + "hot_ready": true, + "protected": false, + "runtime_state": "Loaded" + } + ], + "total_count": 1, + "total_size_bytes": 4680000000 +} +``` + +#### `GET /api/storage/models/:name` + +Get storage stats for all variants of a model. + +**Response:** + +```json +{ + "model": "qwen2.5-7b-instruct", + "variants": [ + { + "variant": "Q4_K_M", + "filename": "Qwen2.5-7B-Instruct-Q4_K_M.gguf", + "file_size_bytes": 4680000000, + "usage_count": 47, + "hot_ready": true, + "protected": false + } + ] +} +``` + +#### `GET /api/storage/models/:name/variants/:variant` + +Get storage stats for a specific variant. + +**Response (200):** Single variant object (same fields as above). + +**Response (404):** Variant not found. + +#### `PUT /api/storage/limit` + +Set the storage limit. + +**Request body:** + +```json +{ + "limit_bytes": 53687091200 +} +``` + +**Response (200):** + +```json +{ + "storage_limit_bytes": 53687091200, + "available_for_models_bytes": 41187091200 +} +``` + +#### `DELETE /api/models/:name/files` + +Delete downloaded files for a model. Specify `variant` query parameter to delete a single variant, or omit to delete all variants. + +**Query parameters:** + +| Parameter | Description | +|-----------|-------------| +| `variant` | Specific variant to delete. Omit to delete all. | + +**Response (200):** + +```json +{ + "status": "deleted", + "model": "qwen2.5-7b-instruct", + "freed_bytes": 4680000000 +} +``` + +**Response (409):** Variant is guarded (hot ready or protected). Clear the flag first. + +```json +{ + "error": { + "message": "Cannot delete: variant is guarded (hot_ready or protected). Clear flags first.", + "type": "invalid_request_error", + "param": null, + "code": null + }, + "hot_ready": true, + "protected": false +} +``` + +**Response (404):** Model or variant not found. + +#### `POST /api/models/:name/variants/:variant/hot-ready` + +Enable hot ready for a variant. + +**Response (200):** `{"status": "hot_ready_set", "model": "...", "variant": "..."}` + +**Response (404):** Variant not found. + +#### `DELETE /api/models/:name/variants/:variant/hot-ready` + +Disable hot ready for a variant. + +**Response (200):** `{"status": "hot_ready_cleared", "model": "...", "variant": "..."}` + +#### `POST /api/models/:name/variants/:variant/protected` + +Enable protection for a variant. + +**Response (200):** `{"status": "protected_set", "model": "...", "variant": "..."}` + +**Response (404):** Variant not found. + +#### `DELETE /api/models/:name/variants/:variant/protected` + +Disable protection for a variant. + +**Response (200):** `{"status": "protected_cleared", "model": "...", "variant": "..."}` + +#### `GET /api/storage/cleanup/preview` + +Preview what automated cleanup would delete without actually deleting anything. + +**Response:** + +```json +{ + "candidate_count": 2, + "total_reclaimable_bytes": 12500000000, + "candidates": [ + { + "model": "old-model", + "variant": "Q8_0", + "filename": "old-model-q8.gguf", + "file_size_bytes": 8100000000, + "last_used_at": "2024-12-01T00:00:00Z", + "usage_count": 3 + } + ] +} +``` + +#### `POST /api/storage/cleanup/run` + +Execute cleanup immediately. Deletes unguarded, unloaded variants that exceed the configured max age. + +**Response:** + +```json +{ + "freed_bytes": 8100000000, + "deleted_count": 1 +} +``` + +#### `GET /api/storage/cleanup/config` + +Get the current cleanup policy. + +**Response:** + +```json +{ + "enabled": true, + "max_age_hours": 720, + "check_interval_hours": 24, + "target_free_percent": 20.0, + "respect_hot_ready": true, + "respect_protected": true +} +``` + +#### `PUT /api/storage/cleanup/config` + +Update the cleanup policy. + +**Request body:** Same format as the GET response. All fields are optional — only provided fields are updated. + +**Response (200):** Updated policy (same format as GET). + +#### `GET /api/downloads` + +List all active downloads with progress, speed, and ETA. + +**Response:** + +```json +{ + "downloads": [ + { + "model": "qwen2.5-7b-instruct", + "variant": "Q4_K_M", + "state": "Downloading", + "bytes_downloaded": 1250000000, + "total_bytes": 4680000000, + "percent_complete": 26.7, + "speed_mbps": 85.3, + "eta_seconds": 38 + } + ] +} +``` + +--- + +### 3.6 Health & Version #### `GET /health` (or `/v1/health`) @@ -675,7 +969,7 @@ Library version. --- -### 3.6 Dashboard +### 3.7 Dashboard #### `GET /dashboard` @@ -685,6 +979,10 @@ Returns an HTML page with a live-updating dashboard showing: - Loaded models with state, variant, context size, GPU assignment - Performance charts (tokens/sec, memory usage) - Model management controls (load/unload/pin) +- **Downloaded models** — Storage bar (used/limit), table of all downloaded GGUF files with size, download date, last used, usage count, runtime state, and toggle buttons for hot ready / protected flags +- **Download progress** — Active downloads with progress bar, bytes transferred, speed (MB/s), and ETA +- **Row age coloring** — Fresh (green, <14 days), stale (yellow, 14–30 days), old (red, >30 days) +- Model deletion (guarded variants show disabled delete button with tooltip) Open in a browser: `http://localhost:8080/dashboard` @@ -760,8 +1058,9 @@ HTTP status codes: | `202` | Accepted (model downloading) | | `400` | Bad request / validation error | | `404` | Not found | -| `409` | Conflict (model already exists on POST) | +| `409` | Conflict (model already exists on POST, or variant is guarded on DELETE) | | `500` | Internal server error | +| `507` | Insufficient storage (download or load rejected) | --- diff --git a/src/arbiterAI/arbiterAI.cpp b/src/arbiterAI/arbiterAI.cpp index 913e49f..53869fc 100644 --- a/src/arbiterAI/arbiterAI.cpp +++ b/src/arbiterAI/arbiterAI.cpp @@ -6,6 +6,7 @@ #include "arbiterAI/modelManager.h" #include "arbiterAI/modelRuntime.h" #include "arbiterAI/telemetryCollector.h" +#include "arbiterAI/storageManager.h" #include "arbiterAI/providers/baseProvider.h" #include "arbiterAI/providers/openai.h" #include "arbiterAI/providers/anthropic.h" @@ -85,6 +86,10 @@ ErrorCode ArbiterAI::initialize(const std::vector &config // Mark global singleton initialized so subsequent operations succeed ArbiterAI::instance().initialized = true; + + // Initialize StorageManager with default models directory + StorageManager::instance().initialize("/models"); + return ErrorCode::Success; } @@ -570,6 +575,7 @@ std::vector ArbiterAI::getInferenceHistory(std::chrono::minutes ErrorCode ArbiterAI::shutdown() { + StorageManager::instance().shutdown(); providers.clear(); initialized = false; return ErrorCode::Success; diff --git a/src/arbiterAI/arbiterAI.h b/src/arbiterAI/arbiterAI.h index ceb5e90..2a6e2cb 100644 --- a/src/arbiterAI/arbiterAI.h +++ b/src/arbiterAI/arbiterAI.h @@ -73,7 +73,8 @@ enum class ErrorCode ModelNotLoaded, ModelLoadError, ModelDownloading, - ModelDownloadFailed + ModelDownloadFailed, + InsufficientStorage }; /** diff --git a/src/arbiterAI/modelDownloader.cpp b/src/arbiterAI/modelDownloader.cpp index 0a9cc33..33d5109 100644 --- a/src/arbiterAI/modelDownloader.cpp +++ b/src/arbiterAI/modelDownloader.cpp @@ -184,12 +184,15 @@ std::future ModelDownloader::downloadModelWithProgress( const std::string &filePathStr, const std::optional &fileHash, DownloadProgressCallback progressCallback, - const std::string &modelName) + const std::string &modelName, + const std::string &variant) { // Create tracking state auto downloadState = std::make_shared(); downloadState->modelName = modelName.empty() ? filePathStr : modelName; + downloadState->variant = variant; downloadState->status = DownloadStatus::Pending; + downloadState->startTime = std::chrono::steady_clock::now(); { std::lock_guard lock(m_downloadsMutex); @@ -258,6 +261,21 @@ std::future ModelDownloader::downloadModelWithProgress( percent = (static_cast(downloadNow) / downloadTotal) * 100.0f; } downloadState->percentComplete = percent; + + // Record speed sample + { + std::lock_guard lock(downloadState->speedMutex); + auto now = std::chrono::steady_clock::now(); + + downloadState->speedSamples.push_back({now, downloadNow}); + + // Keep only last 10 seconds of samples + auto cutoff = now - std::chrono::seconds(10); + while(!downloadState->speedSamples.empty() && downloadState->speedSamples.front().first < cutoff) + { + downloadState->speedSamples.pop_front(); + } + } if (progressCallback) { @@ -341,4 +359,81 @@ int64_t ModelDownloader::getPartialDownloadSize(const std::string &filePath) return 0; } +DownloadProgressSnapshot ModelDownloader::buildSnapshot(const std::shared_ptr &download) +{ + DownloadProgressSnapshot snap; + + snap.bytesDownloaded=download->bytesDownloaded.load(); + snap.totalBytes=download->totalBytes.load(); + snap.percentComplete=download->percentComplete.load(); + snap.modelName=download->modelName; + snap.variant=download->variant; + + // Calculate speed from rolling window + { + std::lock_guard lock(download->speedMutex); + + if(download->speedSamples.size()>=2) + { + const std::pair &oldest=download->speedSamples.front(); + const std::pair &newest=download->speedSamples.back(); + + double elapsedSec=std::chrono::duration(newest.first-oldest.first).count(); + int64_t byteDelta=newest.second-oldest.second; + + if(elapsedSec>0.0 && byteDelta>0) + { + double bytesPerSec=static_cast(byteDelta)/elapsedSec; + snap.speedMbps=bytesPerSec/(1024.0*1024.0); + + // ETA from remaining bytes and current speed + int64_t remaining=snap.totalBytes-snap.bytesDownloaded; + if(remaining>0 && bytesPerSec>0.0) + { + snap.etaSeconds=static_cast(static_cast(remaining)/bytesPerSec); + } + } + } + } + + return snap; +} + +std::optional ModelDownloader::getProgressSnapshot(const std::string &modelName) +{ + std::lock_guard lock(m_downloadsMutex); + + auto it=m_activeDownloads.find(modelName); + if(it==m_activeDownloads.end()) + { + return std::nullopt; + } + + DownloadStatus status=it->second->status.load(); + if(status!=DownloadStatus::InProgress && status!=DownloadStatus::Pending) + { + return std::nullopt; + } + + return buildSnapshot(it->second); +} + +std::vector ModelDownloader::getActiveSnapshots() +{ + std::lock_guard lock(m_downloadsMutex); + + std::vector result; + + for(const std::pair> &entry:m_activeDownloads) + { + DownloadStatus status=entry.second->status.load(); + if(status==DownloadStatus::InProgress || status==DownloadStatus::Pending) + { + result.push_back(buildSnapshot(entry.second)); + } + } + + return result; +} + } // namespace arbiterAI \ No newline at end of file diff --git a/src/arbiterAI/modelDownloader.h b/src/arbiterAI/modelDownloader.h index c692011..9afaf34 100644 --- a/src/arbiterAI/modelDownloader.h +++ b/src/arbiterAI/modelDownloader.h @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include namespace arbiterAI { @@ -27,17 +30,36 @@ using DownloadProgressCallback = std::function; /** - * @struct DownloadState + * @struct ActiveDownload * @brief Tracks the state of an active download */ -struct ActiveDownload -{ +struct ActiveDownload { std::atomic bytesDownloaded{0}; std::atomic totalBytes{0}; std::atomic percentComplete{0.0f}; std::atomic status{DownloadStatus::NotStarted}; std::string error; std::string modelName; + std::string variant; + + // Speed tracking (guarded by speedMutex) + mutable std::mutex speedMutex; + std::deque> speedSamples; + std::chrono::steady_clock::time_point startTime; +}; + +/** + * @struct DownloadProgressSnapshot + * @brief Point-in-time snapshot of a download's progress including speed and ETA + */ +struct DownloadProgressSnapshot { + int64_t bytesDownloaded=0; + int64_t totalBytes=0; + float percentComplete=0.0f; + double speedMbps=0.0; // rolling average MB/s + int etaSeconds=0; // estimated time remaining + std::string modelName; + std::string variant; }; /** @@ -78,13 +100,15 @@ class ModelDownloader * @param fileHash Expected SHA256 hash (optional) * @param progressCallback Callback for progress updates * @param modelName Name for tracking in active downloads + * @param variant Quantization variant name for tracking * @return Future that resolves to true on success */ std::future downloadModelWithProgress(const std::string &downloadUrl, const std::string &filePath, const std::optional &fileHash, DownloadProgressCallback progressCallback, - const std::string &modelName = ""); + const std::string &modelName = "", + const std::string &variant = ""); /** * @brief Get the current download state for a model @@ -93,6 +117,19 @@ class ModelDownloader */ std::shared_ptr getDownloadState(const std::string &modelName); + /** + * @brief Get a progress snapshot with speed and ETA for a model + * @param modelName Name of the model + * @return Snapshot with speed/ETA, or nullopt if not downloading + */ + std::optional getProgressSnapshot(const std::string &modelName); + + /** + * @brief Get progress snapshots for all active downloads + * @return Vector of snapshots for all in-progress or pending downloads + */ + std::vector getActiveSnapshots(); + /** * @brief Check if a download can be resumed * @param filePath Path to the partial file @@ -111,6 +148,7 @@ class ModelDownloader std::string getCachePath(const std::string &key); std::optional loadFromCache(const std::string &key); void saveToCache(const std::string &key, const nlohmann::json &config); + DownloadProgressSnapshot buildSnapshot(const std::shared_ptr &download); std::filesystem::path m_cacheDir; std::shared_ptr m_fileVerifier; diff --git a/src/arbiterAI/modelRuntime.cpp b/src/arbiterAI/modelRuntime.cpp index 826e4e1..e915a9f 100644 --- a/src/arbiterAI/modelRuntime.cpp +++ b/src/arbiterAI/modelRuntime.cpp @@ -1,6 +1,7 @@ #include "arbiterAI/modelRuntime.h" #include "arbiterAI/hardwareDetector.h" #include "arbiterAI/telemetryCollector.h" +#include "arbiterAI/storageManager.h" #include #include @@ -149,6 +150,20 @@ ErrorCode ModelRuntime::loadModel( std::string filePath="/models/"+selectedVar->download.filename; if(!std::filesystem::exists(filePath)&&!selectedVar->download.url.empty()) { + // Check storage quota before downloading + int64_t fileSizeBytes=static_cast(selectedVar->fileSizeMb)*1024*1024; + if(!StorageManager::instance().canDownload(fileSizeBytes)) + { + // Try cleanup first + StorageManager::instance().runCleanup(); + if(!StorageManager::instance().canDownload(fileSizeBytes)) + { + spdlog::error("Insufficient storage to download '{}' variant '{}' ({} MB)", + model, selectedVariant, selectedVar->fileSizeMb); + return ErrorCode::InsufficientStorage; + } + } + // Mark as downloading LoadedModel &dlEntry=m_models[model]; dlEntry.modelName=model; @@ -163,7 +178,8 @@ ErrorCode ModelRuntime::loadModel( selectedVar->download.url, filePath, selectedVar->download.sha256, - model); + model, + selectedVariant); m_mutex.lock(); if(!downloadOk) @@ -171,6 +187,16 @@ ErrorCode ModelRuntime::loadModel( m_models.erase(model); return ErrorCode::ModelDownloadFailed; } + + // Register the download with StorageManager + int64_t actualSize=0; + std::error_code ec; + if(std::filesystem::exists(filePath, ec)) + { + actualSize=static_cast(std::filesystem::file_size(filePath, ec)); + } + StorageManager::instance().registerDownload( + model, selectedVariant, selectedVar->download.filename, actualSize); } } @@ -395,6 +421,16 @@ std::vector ModelRuntime::getLocalModelCapabilities() const return ModelFitCalculator::calculateFittableModels(allModels, hw); } +std::vector ModelRuntime::getActiveDownloadSnapshots() +{ + return m_downloader.getActiveSnapshots(); +} + +std::optional ModelRuntime::getDownloadSnapshot(const std::string &modelName) +{ + return m_downloader.getProgressSnapshot(modelName); +} + void ModelRuntime::setReadyRamBudget(int mb) { std::lock_guard lock(m_mutex); @@ -490,6 +526,17 @@ void ModelRuntime::beginInference(const std::string &model) void ModelRuntime::endInference() { + // Record usage for storage tracking + if(!m_inferenceModel.empty()) + { + std::lock_guard lock(m_mutex); + auto it=m_models.find(m_inferenceModel); + if(it!=m_models.end()) + { + StorageManager::instance().recordUsage(m_inferenceModel, it->second.variant); + } + } + m_inferenceActive=false; m_inferenceModel.clear(); drainPendingSwaps(); @@ -735,7 +782,8 @@ bool ModelRuntime::downloadModelFile( const std::string &url, const std::string &filePath, const std::string &sha256, - const std::string &modelName) + const std::string &modelName, + const std::string &variant) { std::optional hash=std::nullopt; if(!sha256.empty()) @@ -765,7 +813,8 @@ bool ModelRuntime::downloadModelFile( } } }, - modelName); + modelName, + variant); bool success=result.get(); diff --git a/src/arbiterAI/modelRuntime.h b/src/arbiterAI/modelRuntime.h index 3e01b82..288ceac 100644 --- a/src/arbiterAI/modelRuntime.h +++ b/src/arbiterAI/modelRuntime.h @@ -87,6 +87,12 @@ class ModelRuntime { /// Get model fit capabilities for all local models given current hardware. std::vector getLocalModelCapabilities() const; + /// Get progress snapshots for all active downloads (with speed and ETA). + std::vector getActiveDownloadSnapshots(); + + /// Get a download progress snapshot for a specific model. + std::optional getDownloadSnapshot(const std::string &modelName); + /// Set the RAM budget for "Ready" tier models (MB). void setReadyRamBudget(int mb); @@ -153,7 +159,8 @@ class ModelRuntime { const std::string &url, const std::string &filePath, const std::string &sha256, - const std::string &modelName); + const std::string &modelName, + const std::string &variant); std::map m_models; mutable std::mutex m_mutex; diff --git a/src/arbiterAI/storageManager.cpp b/src/arbiterAI/storageManager.cpp new file mode 100644 index 0000000..5d64b8a --- /dev/null +++ b/src/arbiterAI/storageManager.cpp @@ -0,0 +1,862 @@ +#include "arbiterAI/storageManager.h" +#include "arbiterAI/modelRuntime.h" + +#include +#include +#include +#include + +namespace arbiterAI +{ + +namespace +{ + +std::string timePointToIso(const std::chrono::system_clock::time_point &tp) +{ + std::time_t t=std::chrono::system_clock::to_time_t(tp); + std::tm tm{}; + gmtime_r(&t, &tm); + + char buf[32]; + std::strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &tm); + return std::string(buf); +} + +std::chrono::system_clock::time_point isoToTimePoint(const std::string &iso) +{ + if(iso.empty()) + { + return std::chrono::system_clock::now(); + } + + std::tm tm{}; + strptime(iso.c_str(), "%Y-%m-%dT%H:%M:%SZ", &tm); + std::time_t t=timegm(&tm); + return std::chrono::system_clock::from_time_t(t); +} + +std::string formatBytes(int64_t bytes) +{ + if(bytes>=1073741824) + return std::to_string(bytes/1073741824)+"."+std::to_string((bytes%1073741824)*10/1073741824)+" GB"; + if(bytes>=1048576) + return std::to_string(bytes/1048576)+"."+std::to_string((bytes%1048576)*10/1048576)+" MB"; + return std::to_string(bytes)+" B"; +} + +} // anonymous namespace + +StorageManager &StorageManager::instance() +{ + static StorageManager mgr; + return mgr; +} + +void StorageManager::reset() +{ + StorageManager &mgr=instance(); + + mgr.shutdown(); + + std::lock_guard lock(mgr.m_mutex); + mgr.m_entries.clear(); + mgr.m_modelsDir.clear(); + mgr.m_storageLimitBytes=0; + mgr.m_initialized=false; + mgr.m_dirty=false; + mgr.m_cleanupPolicy=CleanupPolicy{}; +} + +StorageManager::~StorageManager() +{ + shutdown(); +} + +void StorageManager::initialize(const std::filesystem::path &modelsDir) +{ + std::lock_guard lock(m_mutex); + + m_modelsDir=modelsDir; + + if(!std::filesystem::exists(m_modelsDir)) + { + std::filesystem::create_directories(m_modelsDir); + } + + loadUsageData(); + scanModelsDirectory(); + m_initialized=true; + + spdlog::info("StorageManager initialized: modelsDir={}", m_modelsDir.string()); + + startBackgroundTimer(); +} + +void StorageManager::shutdown() +{ + m_timerRunning=false; + if(m_timerThread.joinable()) + { + m_timerThread.join(); + } + + flush(); +} + +void StorageManager::setStorageLimit(int64_t limitBytes) +{ + std::lock_guard lock(m_mutex); + m_storageLimitBytes=limitBytes; + m_dirty=true; +} + +int64_t StorageManager::getStorageLimit() const +{ + std::lock_guard lock(m_mutex); + return m_storageLimitBytes; +} + +StorageInfo StorageManager::getStorageInfo() const +{ + std::lock_guard lock(m_mutex); + + StorageInfo info; + info.modelsDirectory=m_modelsDir; + info.storageLimitBytes=m_storageLimitBytes; + info.cleanupEnabled=m_cleanupPolicy.enabled; + + // Query disk space + if(!m_modelsDir.empty()&&std::filesystem::exists(m_modelsDir)) + { + std::error_code ec; + std::filesystem::space_info si=std::filesystem::space(m_modelsDir, ec); + + if(!ec) + { + info.totalDiskBytes=static_cast(si.capacity); + info.freeDiskBytes=static_cast(si.available); + } + } + + // Sum model file sizes + int64_t usedBytes=0; + for(const ModelFileEntry &entry:m_entries) + { + usedBytes+=entry.fileSizeBytes; + } + info.usedByModelsBytes=usedBytes; + info.modelCount=static_cast(m_entries.size()); + + // Calculate available space + if(m_storageLimitBytes>0) + { + int64_t limitRemaining=m_storageLimitBytes-usedBytes; + if(limitRemaining<0) limitRemaining=0; + info.availableForModelsBytes=std::min(info.freeDiskBytes, limitRemaining); + } + else + { + info.availableForModelsBytes=info.freeDiskBytes; + } + + return info; +} + +bool StorageManager::canDownload(int64_t fileSizeBytes) const +{ + StorageInfo info=getStorageInfo(); + return info.availableForModelsBytes>=fileSizeBytes; +} + +std::vector StorageManager::getDownloadedModels() const +{ + std::lock_guard lock(m_mutex); + + std::vector result; + result.reserve(m_entries.size()); + + for(const ModelFileEntry &entry:m_entries) + { + result.push_back(entryToPublic(entry)); + } + + return result; +} + +void StorageManager::registerDownload(const std::string &modelName, + const std::string &variant, + const std::string &filename, + int64_t fileSizeBytes) +{ + std::lock_guard lock(m_mutex); + + // Check if entry already exists + ModelFileEntry *existing=findEntry(modelName, variant); + if(existing) + { + existing->filename=filename; + existing->fileSizeBytes=fileSizeBytes; + existing->downloadedAt=std::chrono::system_clock::now(); + m_dirty=true; + return; + } + + ModelFileEntry entry; + entry.modelName=modelName; + entry.variant=variant; + entry.filename=filename; + entry.fileSizeBytes=fileSizeBytes; + entry.downloadedAt=std::chrono::system_clock::now(); + entry.lastUsedAt=std::chrono::system_clock::now(); + entry.usageCount=0; + entry.hotReady=false; + entry.isProtected=false; + + m_entries.push_back(entry); + m_dirty=true; + + spdlog::info("StorageManager: registered download {} variant {} ({})", + modelName, variant, formatBytes(fileSizeBytes)); +} + +void StorageManager::recordUsage(const std::string &modelName, const std::string &variant) +{ + std::lock_guard lock(m_mutex); + + ModelFileEntry *entry=findEntry(modelName, variant); + if(entry) + { + entry->lastUsedAt=std::chrono::system_clock::now(); + entry->usageCount++; + m_dirty=true; + } +} + +bool StorageManager::deleteModelFile(const std::string &modelName, const std::string &variant, + int64_t &freedBytes) +{ + std::lock_guard lock(m_mutex); + + freedBytes=0; + + if(variant.empty()) + { + // Delete all variants of this model + std::vector toRemove; + + for(size_t i=0; i(*it)); + } + } + else + { + // Delete a specific variant + for(auto it=m_entries.begin(); it!=m_entries.end(); ++it) + { + if(it->modelName==modelName&&it->variant==variant) + { + if(it->hotReady||it->isProtected) + { + spdlog::warn("StorageManager: cannot delete {} variant {} — guarded", modelName, variant); + return false; + } + + std::filesystem::path filePath=m_modelsDir/it->filename; + + std::error_code ec; + if(std::filesystem::exists(filePath, ec)) + { + std::filesystem::remove(filePath, ec); + if(ec) + { + spdlog::error("StorageManager: failed to delete file {}: {}", filePath.string(), ec.message()); + return false; + } + } + + freedBytes=it->fileSizeBytes; + spdlog::info("StorageManager: deleted {} variant {} ({})", + modelName, variant, formatBytes(it->fileSizeBytes)); + m_entries.erase(it); + break; + } + } + } + + if(freedBytes>0) + { + m_dirty=true; + saveUsageData(); + } + + return freedBytes>0; +} + +bool StorageManager::setHotReady(const std::string &modelName, const std::string &variant, bool enabled) +{ + std::lock_guard lock(m_mutex); + + ModelFileEntry *entry=findEntry(modelName, variant); + if(!entry) + { + return false; + } + + entry->hotReady=enabled; + m_dirty=true; + return true; +} + +bool StorageManager::setProtected(const std::string &modelName, const std::string &variant, bool enabled) +{ + std::lock_guard lock(m_mutex); + + ModelFileEntry *entry=findEntry(modelName, variant); + if(!entry) + { + return false; + } + + entry->isProtected=enabled; + m_dirty=true; + return true; +} + +std::vector StorageManager::getModelStats(const std::string &modelName) const +{ + std::lock_guard lock(m_mutex); + + std::vector result; + for(const ModelFileEntry &entry:m_entries) + { + if(entry.modelName==modelName) + { + result.push_back(entryToPublic(entry)); + } + } + return result; +} + +std::optional StorageManager::getVariantStats( + const std::string &modelName, const std::string &variant) const +{ + std::lock_guard lock(m_mutex); + + const ModelFileEntry *entry=findEntry(modelName, variant); + if(!entry) + { + return std::nullopt; + } + return entryToPublic(*entry); +} + +bool StorageManager::isGuarded(const std::string &modelName, const std::string &variant) const +{ + std::lock_guard lock(m_mutex); + + const ModelFileEntry *entry=findEntry(modelName, variant); + if(!entry) + { + return false; + } + return entry->hotReady||entry->isProtected; +} + +void StorageManager::flush() +{ + std::lock_guard lock(m_mutex); + + if(!m_initialized||!m_dirty) + { + return; + } + + saveUsageData(); + m_dirty=false; +} + +void StorageManager::scanModelsDirectory() +{ + // NOTE: caller must hold m_mutex + + if(m_modelsDir.empty()||!std::filesystem::exists(m_modelsDir)) + { + return; + } + + std::error_code ec; + + for(const std::filesystem::directory_entry &dirEntry:std::filesystem::directory_iterator(m_modelsDir, ec)) + { + if(!dirEntry.is_regular_file()) + { + continue; + } + + std::string filename=dirEntry.path().filename().string(); + + // Only track GGUF files + if(filename.size()<5||filename.substr(filename.size()-5)!=".gguf") + { + continue; + } + + // Check if already tracked + bool found=false; + for(const ModelFileEntry &entry:m_entries) + { + if(entry.filename==filename) + { + found=true; + break; + } + } + + if(!found) + { + // New file discovered on disk — add with unknown model/variant + ModelFileEntry entry; + entry.filename=filename; + entry.fileSizeBytes=static_cast(dirEntry.file_size(ec)); + entry.downloadedAt=std::chrono::system_clock::now(); + entry.lastUsedAt=std::chrono::system_clock::now(); + + // Try to infer model name and variant from filename + // Typical pattern: ModelName-VariantName.gguf + std::string stem=filename.substr(0, filename.size()-5); + size_t lastDash=stem.rfind('-'); + if(lastDash!=std::string::npos&&lastDash>0) + { + entry.modelName=stem.substr(0, lastDash); + entry.variant=stem.substr(lastDash+1); + } + else + { + entry.modelName=stem; + entry.variant="default"; + } + + m_entries.push_back(entry); + m_dirty=true; + + spdlog::info("StorageManager: discovered untracked GGUF file: {} ({})", + filename, formatBytes(entry.fileSizeBytes)); + } + } + + // Remove entries for files that no longer exist on disk + auto removeIt=std::remove_if(m_entries.begin(), m_entries.end(), + [this](const ModelFileEntry &entry) + { + std::filesystem::path filePath=m_modelsDir/entry.filename; + std::error_code ec; + bool exists=std::filesystem::exists(filePath, ec); + if(!exists) + { + spdlog::info("StorageManager: removing entry for missing file: {}", entry.filename); + } + return !exists; + }); + + if(removeIt!=m_entries.end()) + { + m_entries.erase(removeIt, m_entries.end()); + m_dirty=true; + } +} + +// ========== Cleanup ========== + +void StorageManager::setCleanupPolicy(const CleanupPolicy &policy) +{ + std::lock_guard lock(m_mutex); + m_cleanupPolicy=policy; +} + +CleanupPolicy StorageManager::getCleanupPolicy() const +{ + std::lock_guard lock(m_mutex); + return m_cleanupPolicy; +} + +std::vector StorageManager::previewCleanup() const +{ + std::lock_guard lock(m_mutex); + return collectCleanupCandidates(); +} + +int64_t StorageManager::runCleanup() +{ + std::lock_guard lock(m_mutex); + + if(!m_cleanupPolicy.enabled) + { + return 0; + } + + std::vector candidates=collectCleanupCandidates(); + + if(candidates.empty()) + { + return 0; + } + + int64_t totalFreed=0; + + for(const CleanupCandidate &candidate:candidates) + { + // Delete the file + std::filesystem::path filePath=m_modelsDir/candidate.filename; + + std::error_code ec; + if(std::filesystem::exists(filePath, ec)) + { + std::filesystem::remove(filePath, ec); + if(ec) + { + spdlog::error("StorageManager cleanup: failed to delete {}: {}", filePath.string(), ec.message()); + continue; + } + } + + // Remove from entries + for(auto it=m_entries.begin(); it!=m_entries.end(); ++it) + { + if(it->modelName==candidate.modelName&&it->variant==candidate.variant) + { + totalFreed+=it->fileSizeBytes; + spdlog::info("StorageManager cleanup: deleted {} variant {} ({})", + it->modelName, it->variant, formatBytes(it->fileSizeBytes)); + m_entries.erase(it); + break; + } + } + } + + if(totalFreed>0) + { + m_dirty=true; + saveUsageData(); + } + + spdlog::info("StorageManager cleanup: freed {}", formatBytes(totalFreed)); + return totalFreed; +} + +// ========== Private Methods ========== + +StorageManager::ModelFileEntry *StorageManager::findEntry( + const std::string &modelName, const std::string &variant) +{ + for(ModelFileEntry &entry:m_entries) + { + if(entry.modelName==modelName&&entry.variant==variant) + { + return &entry; + } + } + return nullptr; +} + +const StorageManager::ModelFileEntry *StorageManager::findEntry( + const std::string &modelName, const std::string &variant) const +{ + for(const ModelFileEntry &entry:m_entries) + { + if(entry.modelName==modelName&&entry.variant==variant) + { + return &entry; + } + } + return nullptr; +} + +void StorageManager::loadUsageData() +{ + // NOTE: caller must hold m_mutex + + std::filesystem::path usagePath=m_modelsDir/"model_usage.json"; + + if(!std::filesystem::exists(usagePath)) + { + return; + } + + try + { + std::ifstream file(usagePath); + if(!file.is_open()) + { + return; + } + + nlohmann::json data; + file>>data; + + if(data.contains("storage_limit_bytes")) + { + m_storageLimitBytes=data["storage_limit_bytes"].get(); + } + + if(data.contains("cleanup_policy")) + { + const nlohmann::json &cp=data["cleanup_policy"]; + if(cp.contains("enabled")) m_cleanupPolicy.enabled=cp["enabled"].get(); + if(cp.contains("max_age_hours")) m_cleanupPolicy.maxAge=std::chrono::hours(cp["max_age_hours"].get()); + if(cp.contains("check_interval_hours")) m_cleanupPolicy.checkInterval=std::chrono::hours(cp["check_interval_hours"].get()); + if(cp.contains("target_free_percent")) m_cleanupPolicy.targetFreePercent=cp["target_free_percent"].get(); + } + + if(data.contains("models")&&data["models"].is_array()) + { + for(const nlohmann::json &m:data["models"]) + { + ModelFileEntry entry; + entry.modelName=m.value("model", ""); + entry.variant=m.value("variant", ""); + entry.filename=m.value("filename", ""); + entry.fileSizeBytes=m.value("file_size_bytes", int64_t(0)); + entry.downloadedAt=isoToTimePoint(m.value("downloaded_at", "")); + entry.lastUsedAt=isoToTimePoint(m.value("last_used_at", "")); + entry.usageCount=m.value("usage_count", 0); + entry.hotReady=m.value("hot_ready", false); + entry.isProtected=m.value("protected", false); + + if(!entry.filename.empty()) + { + m_entries.push_back(entry); + } + } + } + + spdlog::info("StorageManager: loaded {} entries from {}", m_entries.size(), usagePath.string()); + } + catch(const std::exception &e) + { + spdlog::warn("StorageManager: failed to load usage data: {}", e.what()); + } +} + +void StorageManager::saveUsageData() const +{ + // NOTE: caller must hold m_mutex + + if(m_modelsDir.empty()) + { + return; + } + + std::filesystem::path usagePath=m_modelsDir/"model_usage.json"; + + nlohmann::json data; + data["version"]=1; + data["storage_limit_bytes"]=m_storageLimitBytes; + + nlohmann::json cleanupJson; + cleanupJson["enabled"]=m_cleanupPolicy.enabled; + cleanupJson["max_age_hours"]=m_cleanupPolicy.maxAge.count(); + cleanupJson["check_interval_hours"]=m_cleanupPolicy.checkInterval.count(); + cleanupJson["target_free_percent"]=m_cleanupPolicy.targetFreePercent; + data["cleanup_policy"]=cleanupJson; + + nlohmann::json models=nlohmann::json::array(); + for(const ModelFileEntry &entry:m_entries) + { + nlohmann::json m; + m["model"]=entry.modelName; + m["variant"]=entry.variant; + m["filename"]=entry.filename; + m["file_size_bytes"]=entry.fileSizeBytes; + m["downloaded_at"]=timePointToIso(entry.downloadedAt); + m["last_used_at"]=timePointToIso(entry.lastUsedAt); + m["usage_count"]=entry.usageCount; + m["hot_ready"]=entry.hotReady; + m["protected"]=entry.isProtected; + models.push_back(m); + } + data["models"]=models; + + try + { + std::ofstream file(usagePath); + file< StorageManager::collectCleanupCandidates() const +{ + // NOTE: caller must hold m_mutex + + std::vector candidates; + + auto now=std::chrono::system_clock::now(); + + for(const ModelFileEntry &entry:m_entries) + { + // Skip guarded entries + if(m_cleanupPolicy.respectHotReady&&entry.hotReady) continue; + if(m_cleanupPolicy.respectProtected&&entry.isProtected) continue; + + // Skip entries that are currently Loaded or Ready in ModelRuntime + // Note: we don't hold ModelRuntime's lock here, so this is a best-effort check + std::optional runtimeState=ModelRuntime::instance().getModelState(entry.modelName); + if(runtimeState.has_value()) + { + ModelState state=runtimeState->state; + if(state==ModelState::Loaded||state==ModelState::Ready) + { + continue; + } + } + + // Check staleness + auto age=std::chrono::duration_cast(now-entry.lastUsedAt); + if(age runtimeState=ModelRuntime::instance().getModelState(entry.modelName); + if(runtimeState.has_value()&&runtimeState->variant==entry.variant) + { + switch(runtimeState->state) + { + case ModelState::Loaded: f.runtimeState="Loaded"; break; + case ModelState::Ready: f.runtimeState="Ready"; break; + case ModelState::Downloading: f.runtimeState="Downloading"; break; + case ModelState::Unloading: f.runtimeState="Unloading"; break; + default: f.runtimeState="Unloaded"; break; + } + } + else + { + f.runtimeState="Unloaded"; + } + + return f; +} + +void StorageManager::startBackgroundTimer() +{ + if(m_timerRunning) + { + return; + } + + m_timerRunning=true; + m_timerThread=std::thread([this]() + { + // Flush every 5 minutes, cleanup on the cleanup interval + constexpr int flushIntervalSeconds=300; // 5 minutes + int elapsedSeconds=0; + + while(m_timerRunning) + { + std::this_thread::sleep_for(std::chrono::seconds(1)); + elapsedSeconds++; + + if(!m_timerRunning) + { + break; + } + + // Periodic flush + if(elapsedSeconds%flushIntervalSeconds==0) + { + flush(); + } + + // Periodic cleanup + int cleanupIntervalSeconds=0; + { + std::lock_guard lock(m_mutex); + cleanupIntervalSeconds=static_cast(m_cleanupPolicy.checkInterval.count()*3600); + } + + if(cleanupIntervalSeconds>0&&elapsedSeconds%cleanupIntervalSeconds==0) + { + runCleanup(); + } + } + }); +} + +} // namespace arbiterAI diff --git a/src/arbiterAI/storageManager.h b/src/arbiterAI/storageManager.h new file mode 100644 index 0000000..dcb9fea --- /dev/null +++ b/src/arbiterAI/storageManager.h @@ -0,0 +1,196 @@ +#ifndef _ARBITERAI_STORAGEMANAGER_H_ +#define _ARBITERAI_STORAGEMANAGER_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace arbiterAI +{ + +struct StorageInfo { + std::filesystem::path modelsDirectory; + int64_t totalDiskBytes=0; // partition total + int64_t freeDiskBytes=0; // partition free + int64_t usedByModelsBytes=0; // sum of all tracked model files + int64_t storageLimitBytes=0; // configured limit (0 = use all free) + int64_t availableForModelsBytes=0; // min(freeDisk, limit - usedByModels) + int modelCount=0; + bool cleanupEnabled=true; +}; + +struct DownloadedModelFile { + std::string modelName; + std::string variant; // quantization (e.g., "Q4_K_M") + std::string filename; + std::filesystem::path filePath; + int64_t fileSizeBytes=0; + std::chrono::system_clock::time_point downloadedAt; + std::chrono::system_clock::time_point lastUsedAt; + int usageCount=0; // number of inference requests served + bool hotReady=false; // keep weights in RAM for quick VRAM reload + bool isProtected=false; // protected from deletion (manual and automated) + std::string runtimeState; // cross-referenced from ModelRuntime +}; + +struct CleanupPolicy { + bool enabled=true; + std::chrono::hours maxAge{30*24}; // 30 days + std::chrono::hours checkInterval{24}; // run every 24 hours + double targetFreePercent=20.0; // try to keep 20% free + bool respectHotReady=true; // never delete hot_ready variants + bool respectProtected=true; // never delete protected variants +}; + +struct CleanupCandidate { + std::string modelName; + std::string variant; + std::string filename; + int64_t fileSizeBytes=0; + std::chrono::system_clock::time_point lastUsedAt; + int usageCount=0; +}; + +class StorageManager { +public: + static StorageManager &instance(); + static void reset(); // For testing + + /// Initialize with the models directory path. + void initialize(const std::filesystem::path &modelsDir); + + /// Shut down the background flush/cleanup timers. + void shutdown(); + + /// Set the storage limit in bytes. 0 = use all free disk space. + void setStorageLimit(int64_t limitBytes); + + /// Get the current storage limit. + int64_t getStorageLimit() const; + + /// Get a snapshot of current storage usage. + StorageInfo getStorageInfo() const; + + /// Check if a download of the given size can proceed. + /// @return true if enough space, false otherwise. + bool canDownload(int64_t fileSizeBytes) const; + + /// Get the list of all downloaded model files with usage stats. + std::vector getDownloadedModels() const; + + /// Register a completed download (updates inventory). + void registerDownload(const std::string &modelName, + const std::string &variant, + const std::string &filename, + int64_t fileSizeBytes); + + /// Record a model usage event (inference served). + void recordUsage(const std::string &modelName, const std::string &variant); + + /// Delete a downloaded model file from disk. + /// Fails if the variant is hot ready or protected (must clear flags first). + /// @param modelName Model name. + /// @param variant Specific variant to delete. If empty, deletes all variants of the model. + /// @param freedBytes [out] Total bytes freed. + /// @return true if deleted, false if file not found, deletion failed, or guarded. + bool deleteModelFile(const std::string &modelName, const std::string &variant, + int64_t &freedBytes); + + /// Set/clear hot ready on a variant (keep weights in RAM for quick VRAM reload). + /// @return true if the variant was found, false otherwise. + bool setHotReady(const std::string &modelName, const std::string &variant, bool enabled); + + /// Set/clear protected on a variant (prevent deletion, manual or automated). + /// @return true if the variant was found, false otherwise. + bool setProtected(const std::string &modelName, const std::string &variant, bool enabled); + + /// Get the storage stats for a specific model (all variants). + std::vector getModelStats(const std::string &modelName) const; + + /// Get the storage stats for a specific model variant. + std::optional getVariantStats( + const std::string &modelName, const std::string &variant) const; + + /// Check if a variant is guarded (hot ready or protected). + /// @return true if either flag is set, false otherwise. + bool isGuarded(const std::string &modelName, const std::string &variant) const; + + /// Flush usage stats to disk (called periodically and on shutdown). + void flush(); + + /// Scan the models directory for GGUF files not yet in the inventory. + void scanModelsDirectory(); + + // ========== Cleanup ========== + + /// Set the cleanup policy. + void setCleanupPolicy(const CleanupPolicy &policy); + + /// Get the current cleanup policy. + CleanupPolicy getCleanupPolicy() const; + + /// Preview what automated cleanup would delete (without deleting anything). + std::vector previewCleanup() const; + + /// Run cleanup: remove stale, unguarded, unloaded variants. + /// @return Total bytes freed. + int64_t runCleanup(); + +private: + StorageManager()=default; + ~StorageManager(); + + StorageManager(const StorageManager &)=delete; + StorageManager &operator=(const StorageManager &)=delete; + + struct ModelFileEntry { + std::string modelName; + std::string variant; + std::string filename; + int64_t fileSizeBytes=0; + std::chrono::system_clock::time_point downloadedAt; + std::chrono::system_clock::time_point lastUsedAt; + int usageCount=0; + bool hotReady=false; + bool isProtected=false; + }; + + /// Find an entry by model name and variant. + ModelFileEntry *findEntry(const std::string &modelName, const std::string &variant); + const ModelFileEntry *findEntry(const std::string &modelName, const std::string &variant) const; + + void loadUsageData(); + void saveUsageData() const; + + /// Collect cleanup candidates (caller holds m_mutex). + std::vector collectCleanupCandidates() const; + + /// Convert internal entry to public DownloadedModelFile. + DownloadedModelFile entryToPublic(const ModelFileEntry &entry) const; + + /// Start the background flush/cleanup timer. + void startBackgroundTimer(); + + std::filesystem::path m_modelsDir; + int64_t m_storageLimitBytes=0; + + std::vector m_entries; + mutable std::mutex m_mutex; + bool m_initialized=false; + bool m_dirty=false; // has unsaved changes + + CleanupPolicy m_cleanupPolicy; + + // Background timer + std::thread m_timerThread; + std::atomic m_timerRunning{false}; +}; + +} // namespace arbiterAI + +#endif//_ARBITERAI_STORAGEMANAGER_H_ diff --git a/src/server/dashboard.h b/src/server/dashboard.h index 296dd22..fc9b3e6 100644 --- a/src/server/dashboard.h +++ b/src/server/dashboard.h @@ -198,6 +198,89 @@ td background: #3a1515; color: #ff6060; } +.btn-disabled +{ + opacity: 0.4; + cursor: not-allowed; +} +.btn-toggle +{ + padding: 2px 8px; + font-size: 11px; +} +.btn-toggle.active +{ + background: #1b3a2a; + border-color: #4caf50; + color: #4caf50; +} +.storage-bar-outer +{ + background: #1f2230; + border-radius: 4px; + height: 24px; + margin: 8px 0; + overflow: hidden; + position: relative; +} +.storage-bar-fill +{ + height: 100%; + border-radius: 4px; + background: linear-gradient(90deg, #4a6cf7, #7c8aff); + transition: width 0.5s ease; +} +.storage-bar-text +{ + position: absolute; + top: 0; + left: 0; + right: 0; + height: 100%; + display: flex; + align-items: center; + justify-content: center; + font-size: 12px; + color: #e0e0e0; + font-weight: 500; +} +.storage-info +{ + display: flex; + justify-content: space-between; + font-size: 12px; + color: #888; + margin-bottom: 4px; +} +.row-fresh +{ + border-left: 3px solid #4caf50; +} +.row-stale +{ + border-left: 3px solid #f0c040; +} +.row-old +{ + border-left: 3px solid #ff4444; +} +.progress-inline +{ + display: inline-block; + width: 120px; + height: 12px; + background: #1f2230; + border-radius: 3px; + overflow: hidden; + vertical-align: middle; + margin-right: 6px; +} +.progress-inline-fill +{ + height: 100%; + background: linear-gradient(90deg, #4a6cf7, #7c8aff); + transition: width 0.3s ease; +} .chart-container { height: 120px; @@ -319,6 +402,43 @@ td +
+

Downloaded Models

+
+
+ Used: - + Limit: - +
+
+
+
-
+
+
+ Auto-cleanup: - + +
+
+
+ + + + + + + + + + + + + + + + + + +
ModelVariantSizeDownloadedLast UsedUsesStateHot ReadyProtectedActions
No downloaded models
+

Recent Inferences

@@ -342,8 +462,10 @@ td