azmuth13 · azmuth13 · Mar 22, 2026 · Mar 14, 2026 · Mar 22, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,13 @@
+.git
+.github
+.venv
+.pytest_cache
+.pycache_local
+__pycache__
+*.pyc
+.DS_Store
+.env
+node_modules
+frontend/react-chat/node_modules
+frontend/react-chat/dist
+docs
diff --git a/.env.example b/.env.example
@@ -1,3 +1,8 @@
+APP_ENV=development
+APP_HOST=0.0.0.0
+APP_PORT=8000
+APP_RELOAD=true
+APP_ORIGIN=http://localhost:5173
 GROQ_API_KEY=
 PINECONE_API_KEY=
 PINECONE_INDEX_NAME=ai-observability-agent
@@ -7,4 +12,3 @@ LANGCHAIN_TRACING_V2=true
 LANGCHAIN_PROJECT=ai-observability-agent
 EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 GROQ_MODEL=llama-3.1-8b-instant
-APP_ORIGIN=http://localhost:5173
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -0,0 +1,59 @@
+name: CD
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: read
+  packages: write
+
+env:
+  IMAGE_NAME: ghcr.io/${{ github.repository_owner }}/ai-observability-agent
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.IMAGE_NAME }}
+          tags: |
+            type=raw,value=latest
+            type=sha
+
+      - name: Build and push image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+  deploy-render:
+    runs-on: ubuntu-latest
+    needs: build-and-push
+    if: ${{ secrets.RENDER_DEPLOY_HOOK_URL != '' }}
+
+    steps:
+      - name: Trigger Render deploy
+        run: curl -X POST "$RENDER_DEPLOY_HOOK_URL"
+        env:
+          RENDER_DEPLOY_HOOK_URL: ${{ secrets.RENDER_DEPLOY_HOOK_URL }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,45 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r backend/requirements.txt
+
+      - name: Run tests
+        run: pytest
+
+  docker-build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: false
+          tags: ai-observability-agent:ci
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ __pycache__/
 *.pyc
 node_modules/
 dist/
+docs
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONPATH=/app
+
+WORKDIR /app
+
+COPY backend/requirements.txt ./backend/requirements.txt
+
+RUN pip install --upgrade pip && \
+    pip install -r backend/requirements.txt
+
+COPY . .
+
+EXPOSE 8000
+
+CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-8000}"]
diff --git a/README.md b/README.md
@@ -31,7 +31,15 @@ Small agentic AI project scaffold for learning:
 ### Backend
 
 1. Create a virtual environment
+```bash
+    python3.11 -m venv .venv
+    source .venv/bin/activate
+```
 2. Install dependencies from `backend/requirements.txt`
+```bash
+    pip install --upgrade pip
+    pip install -r backend/requirements.txt
+```
 3. Copy `.env.example` to `.env`
 4. Start the API:
 
@@ -52,3 +60,61 @@ cd frontend/react-chat
 npm install
 npm run dev
 ```
+
+### Pinecone
+https://app.pinecone.io/organizations/
+
+## Deployment path
+
+This repo now includes a Docker-first deployment baseline:
+
+- `Dockerfile` for packaging the backend
+- `.dockerignore` to keep the image lean
+- `.github/workflows/ci.yml` for tests plus Docker build validation
+- `.github/workflows/cd.yml` for publishing a container to GHCR and triggering Render
+- `render.yaml` as a starter Render blueprint
+
+### Run locally with Docker
+
+```bash
+docker build -t ai-observability-agent .
+docker run --rm -p 8000:8000 --env-file .env ai-observability-agent
+```
+
+Then verify:
+
+```bash
+curl http://localhost:8000/api/health
+```
+
+### CI
+
+CI runs on pull requests and pushes to `main`:
+
+- installs backend dependencies
+- runs `pytest`
+- builds the Docker image
+
+### CD
+
+CD runs on pushes to `main`:
+
+- builds and pushes `ghcr.io/<owner>/ai-observability-agent`
+- tags the image with `latest` and the Git SHA
+- triggers Render through `RENDER_DEPLOY_HOOK_URL`
+
+### Render setup
+
+1. Create a Render web service from an existing image.
+2. Point it at `ghcr.io/<your-user-or-org>/ai-observability-agent:latest`.
+3. Add the environment variables from `.env.example`.
+4. Set the health check path to `/api/health`.
+5. Add `RENDER_DEPLOY_HOOK_URL` as a GitHub Actions secret.
+
+### GitHub Actions secrets
+
+You only need one repository secret for the current CD flow:
+
+- `RENDER_DEPLOY_HOOK_URL`
+
+The workflow uses the built-in `GITHUB_TOKEN` to push to GHCR.
diff --git a/backend/README.md b/backend/README.md
@@ -1,3 +1,13 @@
 # Backend
 
 This folder contains the FastAPI service, LangGraph agent, RAG pipeline, tools, and MCP server.
+
+## MCP quick check
+
+After installing backend dependencies, you can test the local MCP server with:
+
+```bash
+python scripts/test_mcp_client.py --list-tools
+python scripts/test_mcp_client.py --tool search_logs --args '{"query":"error","limit":3}'
+python scripts/test_mcp_client.py --tool get_metrics --args '{"service_name":"checkout-service"}'
+```
diff --git a/backend/agent/nodes.py b/backend/agent/nodes.py
@@ -15,7 +15,20 @@
 def classify_query(state: dict[str, Any]) -> dict[str, Any]:
     """Route queries to the right tooling using lightweight heuristics."""
     query = state["query"].lower()
-    if any(keyword in query for keyword in ("log", "stack trace", "error", "exception")):
+    if any(
+        keyword in query
+        for keyword in (
+            "log",
+            "stack trace",
+            "error",
+            "exception",
+            "trace_id",
+            "trace id",
+            "trace",
+            "request id",
+            "correlation id",
+        )
+    ):
         intent = "logs"
     elif any(keyword in query for keyword in ("cpu", "memory", "latency", "metrics", "slow")):
         intent = "metrics"

diff --git a/backend/tools/log_tool.py b/backend/tools/log_tool.py
@@ -16,6 +16,7 @@
     "database": re.compile(r"SQLException|connection refused|deadlock", re.IGNORECASE),
 }
 TIMESTAMP_PATTERN = re.compile(r"^(?P<timestamp>\S+)")
+TRACE_ID_PATTERN = re.compile(r"\btrace_id=(?P<trace_id>[A-Za-z0-9_-]+)")
 
 
 def _read_log_file() -> str:
@@ -64,28 +65,41 @@ def analyze_logs_tool(log_text: str) -> str:
 
     evidence_lines = [line for line in log_text.splitlines() if line.strip()]
     timestamps = _extract_timestamps(evidence_lines)
+    trace_ids = _extract_trace_ids(evidence_lines)
 
     if not findings:
         if evidence_lines:
-            return (
+            summary = [
                 "No obvious incident signature detected.\n"
-                f"Evidence lines:\n{_format_evidence(evidence_lines)}\n"
+            ]
+            if timestamps:
+                summary.append(f"Relevant timestamps: {', '.join(timestamps)}.")
+            if trace_ids:
+                summary.append(f"Relevant trace IDs: {', '.join(trace_ids)}.")
+            summary.append(f"Evidence lines:\n{_format_evidence(evidence_lines)}")
+            summary.append(
                 "Recommended next step: inspect latency spikes, correlation IDs, and nearby log lines."
             )
+            return "\n".join(summary)
         return "No obvious incident signature detected. Inspect latency spikes and correlation IDs."
 
     summary = [f"Likely issue types: {', '.join(findings)}."]
     if timestamps:
         summary.append(f"Relevant timestamps: {', '.join(timestamps)}.")
+    if trace_ids:
+        summary.append(f"Relevant trace IDs: {', '.join(trace_ids)}.")
     if evidence_lines:
         summary.append(f"Evidence lines:\n{_format_evidence(evidence_lines)}")
     summary.append("Recommended next step: inspect surrounding logs, trace IDs, and recent deploys.")
     return "\n".join(summary)
 
 
 def _expand_query_terms(query: str) -> list[str]:
-    normalized = re.sub(r"[^a-zA-Z0-9]+", " ", query.lower())
+    lowered = query.lower()
+    normalized = re.sub(r"[^a-zA-Z0-9]+", " ", lowered)
     terms = {term for term in normalized.split() if len(term) > 2}
+    raw_terms = {term for term in re.split(r"\s+", lowered) if len(term) > 2}
+    terms.update(raw_terms)
 
     if {"out", "memory"} <= terms or "oom" in terms:
         terms.update({"outofmemoryerror", "java heap space", "heap", "memory"})
@@ -95,6 +109,8 @@ def _expand_query_terms(query: str) -> list[str]:
         terms.update({"error", "exception", "failed"})
     if "time" in terms or "when" in terms:
         terms.update({"timestamp"})
+    if "trace" in terms or "trace_id" in terms or "trace id" in lowered:
+        terms.update({"trace_id", "trace_id=", "trace", "request", "traceid"})
 
     return sorted(terms)
 
@@ -110,3 +126,12 @@ def _extract_timestamps(lines: list[str]) -> list[str]:
 
 def _format_evidence(lines: list[str]) -> str:
     return "\n".join(f"- {line}" for line in lines)
+
+
+def _extract_trace_ids(lines: list[str]) -> list[str]:
+    trace_ids: list[str] = []
+    for line in lines:
+        match = TRACE_ID_PATTERN.search(line)
+        if match:
+            trace_ids.append(match.group("trace_id"))
+    return trace_ids
diff --git a/render.yaml b/render.yaml
@@ -0,0 +1,32 @@
+services:
+  - type: web
+    name: ai-observability-agent
+    runtime: image
+    image:
+      url: ghcr.io/OWNER_OR_ORG/ai-observability-agent:latest
+    plan: free
+    healthCheckPath: /api/health
+    autoDeploy: false
+    envVars:
+      - key: APP_ENV
+        value: production
+      - key: APP_ORIGIN
+        sync: false
+      - key: GROQ_API_KEY
+        sync: false
+      - key: GROQ_MODEL
+        value: llama-3.1-8b-instant
+      - key: PINECONE_API_KEY
+        sync: false
+      - key: PINECONE_INDEX_NAME
+        value: ai-observability-agent
+      - key: PINECONE_NAMESPACE
+        value: observability-docs
+      - key: LANGCHAIN_API_KEY
+        sync: false
+      - key: LANGCHAIN_TRACING_V2
+        value: "true"
+      - key: LANGCHAIN_PROJECT
+        value: ai-observability-agent
+      - key: EMBEDDING_MODEL
+        value: sentence-transformers/all-MiniLM-L6-v2
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,3 +6,4 @@ __pycache__/ @@
     *.pyc
     node_modules/
     dist/
+    docs