From 399352b944068017f99890ac89a6f602e08c8398 Mon Sep 17 00:00:00 2001
From: Suraj Kumar <suraj@Surajs-MacBook-Air.local>
Date: Sun, 15 Mar 2026 04:15:58 +0530
Subject: [PATCH 1/2] feat: trace_id pattern

---
 .gitignore                |  1 +
 README.md                 |  8 ++++++++
 backend/agent/nodes.py    | 15 ++++++++++++++-
 backend/tools/log_tool.py | 31 ++++++++++++++++++++++++++++---
 4 files changed, 51 insertions(+), 4 deletions(-)
diff --git a/.gitignore b/.gitignore
index 4266775..d7a1e7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ __pycache__/
 *.pyc
 node_modules/
 dist/
+docs
diff --git a/README.md b/README.md
index 6ba9b08..ec518d2 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,15 @@ Small agentic AI project scaffold for learning:
 ### Backend
 
 1. Create a virtual environment
+```bash
+    python3.11 -m venv .venv
+    source .venv/bin/activate
+```
 2. Install dependencies from `backend/requirements.txt`
+```bash
+    pip install --upgrade pip
+    pip install -r backend/requirements.txt
+```
 3. Copy `.env.example` to `.env`
 4. Start the API:
 
diff --git a/backend/agent/nodes.py b/backend/agent/nodes.py
index 4c9a719..11b039d 100644
--- a/backend/agent/nodes.py
+++ b/backend/agent/nodes.py
@@ -15,7 +15,20 @@
 def classify_query(state: dict[str, Any]) -> dict[str, Any]:
     """Route queries to the right tooling using lightweight heuristics."""
     query = state["query"].lower()
-    if any(keyword in query for keyword in ("log", "stack trace", "error", "exception")):
+    if any(
+        keyword in query
+        for keyword in (
+            "log",
+            "stack trace",
+            "error",
+            "exception",
+            "trace_id",
+            "trace id",
+            "trace",
+            "request id",
+            "correlation id",
+        )
+    ):
         intent = "logs"
     elif any(keyword in query for keyword in ("cpu", "memory", "latency", "metrics", "slow")):
         intent = "metrics"
diff --git a/backend/tools/log_tool.py b/backend/tools/log_tool.py
index aff4f8a..0ad4d86 100644
--- a/backend/tools/log_tool.py
+++ b/backend/tools/log_tool.py
@@ -16,6 +16,7 @@
     "database": re.compile(r"SQLException|connection refused|deadlock", re.IGNORECASE),
 }
 TIMESTAMP_PATTERN = re.compile(r"^(?P<timestamp>\S+)")
+TRACE_ID_PATTERN = re.compile(r"\btrace_id=(?P<trace_id>[A-Za-z0-9_-]+)")
 
 
 def _read_log_file() -> str:
@@ -64,19 +65,29 @@ def analyze_logs_tool(log_text: str) -> str:
 
     evidence_lines = [line for line in log_text.splitlines() if line.strip()]
     timestamps = _extract_timestamps(evidence_lines)
+    trace_ids = _extract_trace_ids(evidence_lines)
 
     if not findings:
         if evidence_lines:
-            return (
+            summary = [
                 "No obvious incident signature detected.\n"
-                f"Evidence lines:\n{_format_evidence(evidence_lines)}\n"
+            ]
+            if timestamps:
+                summary.append(f"Relevant timestamps: {', '.join(timestamps)}.")
+            if trace_ids:
+                summary.append(f"Relevant trace IDs: {', '.join(trace_ids)}.")
+            summary.append(f"Evidence lines:\n{_format_evidence(evidence_lines)}")
+            summary.append(
                 "Recommended next step: inspect latency spikes, correlation IDs, and nearby log lines."
             )
+            return "\n".join(summary)
         return "No obvious incident signature detected. Inspect latency spikes and correlation IDs."
 
     summary = [f"Likely issue types: {', '.join(findings)}."]
     if timestamps:
         summary.append(f"Relevant timestamps: {', '.join(timestamps)}.")
+    if trace_ids:
+        summary.append(f"Relevant trace IDs: {', '.join(trace_ids)}.")
     if evidence_lines:
         summary.append(f"Evidence lines:\n{_format_evidence(evidence_lines)}")
     summary.append("Recommended next step: inspect surrounding logs, trace IDs, and recent deploys.")
@@ -84,8 +95,11 @@ def analyze_logs_tool(log_text: str) -> str:
 
 
 def _expand_query_terms(query: str) -> list[str]:
-    normalized = re.sub(r"[^a-zA-Z0-9]+", " ", query.lower())
+    lowered = query.lower()
+    normalized = re.sub(r"[^a-zA-Z0-9]+", " ", lowered)
     terms = {term for term in normalized.split() if len(term) > 2}
+    raw_terms = {term for term in re.split(r"\s+", lowered) if len(term) > 2}
+    terms.update(raw_terms)
 
     if {"out", "memory"} <= terms or "oom" in terms:
         terms.update({"outofmemoryerror", "java heap space", "heap", "memory"})
@@ -95,6 +109,8 @@ def _expand_query_terms(query: str) -> list[str]:
         terms.update({"error", "exception", "failed"})
     if "time" in terms or "when" in terms:
         terms.update({"timestamp"})
+    if "trace" in terms or "trace_id" in terms or "trace id" in lowered:
+        terms.update({"trace_id", "trace_id=", "trace", "request", "traceid"})
 
     return sorted(terms)
 
@@ -110,3 +126,12 @@ def _extract_timestamps(lines: list[str]) -> list[str]:
 
 def _format_evidence(lines: list[str]) -> str:
     return "\n".join(f"- {line}" for line in lines)
+
+
+def _extract_trace_ids(lines: list[str]) -> list[str]:
+    trace_ids: list[str] = []
+    for line in lines:
+        match = TRACE_ID_PATTERN.search(line)
+        if match:
+            trace_ids.append(match.group("trace_id"))
+    return trace_ids

From 46d6e9dbb22e48fb59e39bdbd51daddc02b8089a Mon Sep 17 00:00:00 2001
From: Suraj Kumar <suraj@Surajs-MacBook-Air.local>
Date: Mon, 23 Mar 2026 02:02:48 +0530
Subject: [PATCH 2/2] add cicd placeholders

---
 .dockerignore              | 13 +++++
 .env.example               |  6 ++-
 .github/workflows/cd.yml   | 59 +++++++++++++++++++++++
 .github/workflows/ci.yml   | 45 ++++++++++++++++++
 Dockerfile                 | 19 ++++++++
 README.md                  | 58 +++++++++++++++++++++++
 backend/README.md          | 10 ++++
 render.yaml                | 32 +++++++++++++
 scripts/test_mcp_client.py | 97 ++++++++++++++++++++++++++++++++++++++
 tests/conftest.py          |  8 ++++
 tests/test_health.py       | 13 +++++
 11 files changed, 359 insertions(+), 1 deletion(-)
 create mode 100644 .dockerignore
 create mode 100644 .github/workflows/cd.yml
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 Dockerfile
 create mode 100644 render.yaml
 create mode 100644 scripts/test_mcp_client.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_health.py

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..f9cd150
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,13 @@
+.git
+.github
+.venv
+.pytest_cache
+.pycache_local
+__pycache__
+*.pyc
+.DS_Store
+.env
+node_modules
+frontend/react-chat/node_modules
+frontend/react-chat/dist
+docs
diff --git a/.env.example b/.env.example
index 6323db4..2fb3cd2 100644
--- a/.env.example
+++ b/.env.example
@@ -1,3 +1,8 @@
+APP_ENV=development
+APP_HOST=0.0.0.0
+APP_PORT=8000
+APP_RELOAD=true
+APP_ORIGIN=http://localhost:5173
 GROQ_API_KEY=
 PINECONE_API_KEY=
 PINECONE_INDEX_NAME=ai-observability-agent
@@ -7,4 +12,3 @@ LANGCHAIN_TRACING_V2=true
 LANGCHAIN_PROJECT=ai-observability-agent
 EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
 GROQ_MODEL=llama-3.1-8b-instant
-APP_ORIGIN=http://localhost:5173
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
new file mode 100644
index 0000000..b84ae09
--- /dev/null
+++ b/.github/workflows/cd.yml
@@ -0,0 +1,59 @@
+name: CD
+
+on:
+  push:
+    branches:
+      - main
+
+permissions:
+  contents: read
+  packages: write
+
+env:
+  IMAGE_NAME: ghcr.io/${{ github.repository_owner }}/ai-observability-agent
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Extract Docker metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.IMAGE_NAME }}
+          tags: |
+            type=raw,value=latest
+            type=sha
+
+      - name: Build and push image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+  deploy-render:
+    runs-on: ubuntu-latest
+    needs: build-and-push
+    if: ${{ secrets.RENDER_DEPLOY_HOOK_URL != '' }}
+
+    steps:
+      - name: Trigger Render deploy
+        run: curl -X POST "$RENDER_DEPLOY_HOOK_URL"
+        env:
+          RENDER_DEPLOY_HOOK_URL: ${{ secrets.RENDER_DEPLOY_HOOK_URL }}
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..4fe4108
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,45 @@
+name: CI
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r backend/requirements.txt
+
+      - name: Run tests
+        run: pytest
+
+  docker-build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build Docker image
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: false
+          tags: ai-observability-agent:ci
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..fdd160e
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PYTHONPATH=/app
+
+WORKDIR /app
+
+COPY backend/requirements.txt ./backend/requirements.txt
+
+RUN pip install --upgrade pip && \
+    pip install -r backend/requirements.txt
+
+COPY . .
+
+EXPOSE 8000
+
+CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-8000}"]
diff --git a/README.md b/README.md
index ec518d2..8475800 100644
--- a/README.md
+++ b/README.md
@@ -60,3 +60,61 @@ cd frontend/react-chat
 npm install
 npm run dev
 ```
+
+### Pinecone
+https://app.pinecone.io/organizations/
+
+## Deployment path
+
+This repo now includes a Docker-first deployment baseline:
+
+- `Dockerfile` for packaging the backend
+- `.dockerignore` to keep the image lean
+- `.github/workflows/ci.yml` for tests plus Docker build validation
+- `.github/workflows/cd.yml` for publishing a container to GHCR and triggering Render
+- `render.yaml` as a starter Render blueprint
+
+### Run locally with Docker
+
+```bash
+docker build -t ai-observability-agent .
+docker run --rm -p 8000:8000 --env-file .env ai-observability-agent
+```
+
+Then verify:
+
+```bash
+curl http://localhost:8000/api/health
+```
+
+### CI
+
+CI runs on pull requests and pushes to `main`:
+
+- installs backend dependencies
+- runs `pytest`
+- builds the Docker image
+
+### CD
+
+CD runs on pushes to `main`:
+
+- builds and pushes `ghcr.io/<owner>/ai-observability-agent`
+- tags the image with `latest` and the Git SHA
+- triggers Render through `RENDER_DEPLOY_HOOK_URL`
+
+### Render setup
+
+1. Create a Render web service from an existing image.
+2. Point it at `ghcr.io/<your-user-or-org>/ai-observability-agent:latest`.
+3. Add the environment variables from `.env.example`.
+4. Set the health check path to `/api/health`.
+5. Add `RENDER_DEPLOY_HOOK_URL` as a GitHub Actions secret.
+
+### GitHub Actions secrets
+
+You only need one repository secret for the current CD flow:
+
+- `RENDER_DEPLOY_HOOK_URL`
+
+The workflow uses the built-in `GITHUB_TOKEN` to push to GHCR.
diff --git a/backend/README.md b/backend/README.md
index 14e6e87..8aaaded 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -1,3 +1,13 @@
 # Backend
 
 This folder contains the FastAPI service, LangGraph agent, RAG pipeline, tools, and MCP server.
+
+## MCP quick check
+
+After installing backend dependencies, you can test the local MCP server with:
+
+```bash
+python scripts/test_mcp_client.py --list-tools
+python scripts/test_mcp_client.py --tool search_logs --args '{"query":"error","limit":3}'
+python scripts/test_mcp_client.py --tool get_metrics --args '{"service_name":"checkout-service"}'
+```
diff --git a/render.yaml b/render.yaml
new file mode 100644
index 0000000..17df5df
--- /dev/null
+++ b/render.yaml
@@ -0,0 +1,32 @@
+services:
+  - type: web
+    name: ai-observability-agent
+    runtime: image
+    image:
+      url: ghcr.io/OWNER_OR_ORG/ai-observability-agent:latest
+    plan: free
+    healthCheckPath: /api/health
+    autoDeploy: false
+    envVars:
+      - key: APP_ENV
+        value: production
+      - key: APP_ORIGIN
+        sync: false
+      - key: GROQ_API_KEY
+        sync: false
+      - key: GROQ_MODEL
+        value: llama-3.1-8b-instant
+      - key: PINECONE_API_KEY
+        sync: false
+      - key: PINECONE_INDEX_NAME
+        value: ai-observability-agent
+      - key: PINECONE_NAMESPACE
+        value: observability-docs
+      - key: LANGCHAIN_API_KEY
+        sync: false
+      - key: LANGCHAIN_TRACING_V2
+        value: "true"
+      - key: LANGCHAIN_PROJECT
+        value: ai-observability-agent
+      - key: EMBEDDING_MODEL
+        value: sentence-transformers/all-MiniLM-L6-v2
diff --git a/scripts/test_mcp_client.py b/scripts/test_mcp_client.py
new file mode 100644
index 0000000..0fc2ab5
--- /dev/null
+++ b/scripts/test_mcp_client.py
@@ -0,0 +1,97 @@
+"""Tiny local MCP client for exercising the observability MCP server."""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        description="Connect to the local MCP server, list tools, or call one tool."
+    )
+    parser.add_argument(
+        "--python",
+        default=sys.executable,
+        help="Python executable used to launch the MCP server.",
+    )
+    parser.add_argument(
+        "--server-module",
+        default="backend.mcp.server",
+        help="Python module path for the MCP server entrypoint.",
+    )
+    parser.add_argument(
+        "--list-tools",
+        action="store_true",
+        help="List all tools exposed by the MCP server.",
+    )
+    parser.add_argument(
+        "--tool",
+        help="Tool name to call, for example `search_logs` or `get_metrics`.",
+    )
+    parser.add_argument(
+        "--args",
+        default="{}",
+        help='JSON object with tool arguments, for example \'{"query":"error","limit":3}\'.',
+    )
+    return parser
+
+
+async def run_client(args: argparse.Namespace) -> int:
+    try:
+        tool_args: dict[str, Any] = json.loads(args.args)
+    except json.JSONDecodeError as exc:
+        print(f"Invalid JSON passed to --args: {exc}", file=sys.stderr)
+        return 2
+
+    if not isinstance(tool_args, dict):
+        print("--args must decode to a JSON object.", file=sys.stderr)
+        return 2
+
+    server = StdioServerParameters(
+        command=args.python,
+        args=["-m", args.server_module],
+        cwd=str(REPO_ROOT),
+    )
+
+    async with stdio_client(server) as (read_stream, write_stream):
+        async with ClientSession(read_stream, write_stream) as session:
+            await session.initialize()
+
+            if args.list_tools or not args.tool:
+                tools = await session.list_tools()
+                print("Available MCP tools:")
+                for tool in tools.tools:
+                    print(f"- {tool.name}: {tool.description}")
+
+            if args.tool:
+                result = await session.call_tool(args.tool, tool_args)
+                print(f"\nTool result for `{args.tool}`:")
+                for content in result.content:
+                    text = getattr(content, "text", None)
+                    if text:
+                        print(text)
+                    else:
+                        print(content)
+
+    return 0
+
+
+def main() -> int:
+    parser = build_parser()
+    args = parser.parse_args()
+    return asyncio.run(run_client(args))
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..6f5b9f8
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,8 @@
+from pathlib import Path
+import sys
+
+
+ROOT = Path(__file__).resolve().parents[1]
+
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
diff --git a/tests/test_health.py b/tests/test_health.py
new file mode 100644
index 0000000..69278b1
--- /dev/null
+++ b/tests/test_health.py
@@ -0,0 +1,13 @@
+from fastapi.testclient import TestClient
+
+from backend.main import app
+
+
+client = TestClient(app)
+
+
+def test_healthcheck_returns_ok() -> None:
+    response = client.get("/api/health")
+
+    assert response.status_code == 200
+    assert response.json()["status"] == "ok"