From 399352b944068017f99890ac89a6f602e08c8398 Mon Sep 17 00:00:00 2001 From: Suraj Kumar Date: Sun, 15 Mar 2026 04:15:58 +0530 Subject: [PATCH 1/2] feat: trace_id pattern --- .gitignore | 1 + README.md | 8 ++++++++ backend/agent/nodes.py | 15 ++++++++++++++- backend/tools/log_tool.py | 31 ++++++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 4266775..d7a1e7f 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ __pycache__/ *.pyc node_modules/ dist/ +docs diff --git a/README.md b/README.md index 6ba9b08..ec518d2 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,15 @@ Small agentic AI project scaffold for learning: ### Backend 1. Create a virtual environment +```bash + python3.11 -m venv .venv + source .venv/bin/activate +``` 2. Install dependencies from `backend/requirements.txt` +```bash + pip install --upgrade pip + pip install -r backend/requirements.txt +``` 3. Copy `.env.example` to `.env` 4. Start the API: diff --git a/backend/agent/nodes.py b/backend/agent/nodes.py index 4c9a719..11b039d 100644 --- a/backend/agent/nodes.py +++ b/backend/agent/nodes.py @@ -15,7 +15,20 @@ def classify_query(state: dict[str, Any]) -> dict[str, Any]: """Route queries to the right tooling using lightweight heuristics.""" query = state["query"].lower() - if any(keyword in query for keyword in ("log", "stack trace", "error", "exception")): + if any( + keyword in query + for keyword in ( + "log", + "stack trace", + "error", + "exception", + "trace_id", + "trace id", + "trace", + "request id", + "correlation id", + ) + ): intent = "logs" elif any(keyword in query for keyword in ("cpu", "memory", "latency", "metrics", "slow")): intent = "metrics" diff --git a/backend/tools/log_tool.py b/backend/tools/log_tool.py index aff4f8a..0ad4d86 100644 --- a/backend/tools/log_tool.py +++ b/backend/tools/log_tool.py @@ -16,6 +16,7 @@ "database": re.compile(r"SQLException|connection refused|deadlock", re.IGNORECASE), } TIMESTAMP_PATTERN = re.compile(r"^(?P\S+)") +TRACE_ID_PATTERN = re.compile(r"\btrace_id=(?P[A-Za-z0-9_-]+)") def _read_log_file() -> str: @@ -64,19 +65,29 @@ def analyze_logs_tool(log_text: str) -> str: evidence_lines = [line for line in log_text.splitlines() if line.strip()] timestamps = _extract_timestamps(evidence_lines) + trace_ids = _extract_trace_ids(evidence_lines) if not findings: if evidence_lines: - return ( + summary = [ "No obvious incident signature detected.\n" - f"Evidence lines:\n{_format_evidence(evidence_lines)}\n" + ] + if timestamps: + summary.append(f"Relevant timestamps: {', '.join(timestamps)}.") + if trace_ids: + summary.append(f"Relevant trace IDs: {', '.join(trace_ids)}.") + summary.append(f"Evidence lines:\n{_format_evidence(evidence_lines)}") + summary.append( "Recommended next step: inspect latency spikes, correlation IDs, and nearby log lines." ) + return "\n".join(summary) return "No obvious incident signature detected. Inspect latency spikes and correlation IDs." summary = [f"Likely issue types: {', '.join(findings)}."] if timestamps: summary.append(f"Relevant timestamps: {', '.join(timestamps)}.") + if trace_ids: + summary.append(f"Relevant trace IDs: {', '.join(trace_ids)}.") if evidence_lines: summary.append(f"Evidence lines:\n{_format_evidence(evidence_lines)}") summary.append("Recommended next step: inspect surrounding logs, trace IDs, and recent deploys.") @@ -84,8 +95,11 @@ def analyze_logs_tool(log_text: str) -> str: def _expand_query_terms(query: str) -> list[str]: - normalized = re.sub(r"[^a-zA-Z0-9]+", " ", query.lower()) + lowered = query.lower() + normalized = re.sub(r"[^a-zA-Z0-9]+", " ", lowered) terms = {term for term in normalized.split() if len(term) > 2} + raw_terms = {term for term in re.split(r"\s+", lowered) if len(term) > 2} + terms.update(raw_terms) if {"out", "memory"} <= terms or "oom" in terms: terms.update({"outofmemoryerror", "java heap space", "heap", "memory"}) @@ -95,6 +109,8 @@ def _expand_query_terms(query: str) -> list[str]: terms.update({"error", "exception", "failed"}) if "time" in terms or "when" in terms: terms.update({"timestamp"}) + if "trace" in terms or "trace_id" in terms or "trace id" in lowered: + terms.update({"trace_id", "trace_id=", "trace", "request", "traceid"}) return sorted(terms) @@ -110,3 +126,12 @@ def _extract_timestamps(lines: list[str]) -> list[str]: def _format_evidence(lines: list[str]) -> str: return "\n".join(f"- {line}" for line in lines) + + +def _extract_trace_ids(lines: list[str]) -> list[str]: + trace_ids: list[str] = [] + for line in lines: + match = TRACE_ID_PATTERN.search(line) + if match: + trace_ids.append(match.group("trace_id")) + return trace_ids From 46d6e9dbb22e48fb59e39bdbd51daddc02b8089a Mon Sep 17 00:00:00 2001 From: Suraj Kumar Date: Mon, 23 Mar 2026 02:02:48 +0530 Subject: [PATCH 2/2] add cicd placeholders --- .dockerignore | 13 +++++ .env.example | 6 ++- .github/workflows/cd.yml | 59 +++++++++++++++++++++++ .github/workflows/ci.yml | 45 ++++++++++++++++++ Dockerfile | 19 ++++++++ README.md | 58 +++++++++++++++++++++++ backend/README.md | 10 ++++ render.yaml | 32 +++++++++++++ scripts/test_mcp_client.py | 97 ++++++++++++++++++++++++++++++++++++++ tests/conftest.py | 8 ++++ tests/test_health.py | 13 +++++ 11 files changed, 359 insertions(+), 1 deletion(-) create mode 100644 .dockerignore create mode 100644 .github/workflows/cd.yml create mode 100644 .github/workflows/ci.yml create mode 100644 Dockerfile create mode 100644 render.yaml create mode 100644 scripts/test_mcp_client.py create mode 100644 tests/conftest.py create mode 100644 tests/test_health.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..f9cd150 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,13 @@ +.git +.github +.venv +.pytest_cache +.pycache_local +__pycache__ +*.pyc +.DS_Store +.env +node_modules +frontend/react-chat/node_modules +frontend/react-chat/dist +docs diff --git a/.env.example b/.env.example index 6323db4..2fb3cd2 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,8 @@ +APP_ENV=development +APP_HOST=0.0.0.0 +APP_PORT=8000 +APP_RELOAD=true +APP_ORIGIN=http://localhost:5173 GROQ_API_KEY= PINECONE_API_KEY= PINECONE_INDEX_NAME=ai-observability-agent @@ -7,4 +12,3 @@ LANGCHAIN_TRACING_V2=true LANGCHAIN_PROJECT=ai-observability-agent EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2 GROQ_MODEL=llama-3.1-8b-instant -APP_ORIGIN=http://localhost:5173 diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml new file mode 100644 index 0000000..b84ae09 --- /dev/null +++ b/.github/workflows/cd.yml @@ -0,0 +1,59 @@ +name: CD + +on: + push: + branches: + - main + +permissions: + contents: read + packages: write + +env: + IMAGE_NAME: ghcr.io/${{ github.repository_owner }}/ai-observability-agent + +jobs: + build-and-push: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.IMAGE_NAME }} + tags: | + type=raw,value=latest + type=sha + + - name: Build and push image + uses: docker/build-push-action@v6 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + deploy-render: + runs-on: ubuntu-latest + needs: build-and-push + if: ${{ secrets.RENDER_DEPLOY_HOOK_URL != '' }} + + steps: + - name: Trigger Render deploy + run: curl -X POST "$RENDER_DEPLOY_HOOK_URL" + env: + RENDER_DEPLOY_HOOK_URL: ${{ secrets.RENDER_DEPLOY_HOOK_URL }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..4fe4108 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,45 @@ +name: CI + +on: + pull_request: + push: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r backend/requirements.txt + + - name: Run tests + run: pytest + + docker-build: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build Docker image + uses: docker/build-push-action@v6 + with: + context: . + push: false + tags: ai-observability-agent:ci diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..fdd160e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.11-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PYTHONPATH=/app + +WORKDIR /app + +COPY backend/requirements.txt ./backend/requirements.txt + +RUN pip install --upgrade pip && \ + pip install -r backend/requirements.txt + +COPY . . + +EXPOSE 8000 + +CMD ["sh", "-c", "uvicorn backend.main:app --host 0.0.0.0 --port ${PORT:-8000}"] diff --git a/README.md b/README.md index ec518d2..8475800 100644 --- a/README.md +++ b/README.md @@ -60,3 +60,61 @@ cd frontend/react-chat npm install npm run dev ``` + +### Pinecone +https://app.pinecone.io/organizations/ + +## Deployment path + +This repo now includes a Docker-first deployment baseline: + +- `Dockerfile` for packaging the backend +- `.dockerignore` to keep the image lean +- `.github/workflows/ci.yml` for tests plus Docker build validation +- `.github/workflows/cd.yml` for publishing a container to GHCR and triggering Render +- `render.yaml` as a starter Render blueprint + +### Run locally with Docker + +```bash +docker build -t ai-observability-agent . +docker run --rm -p 8000:8000 --env-file .env ai-observability-agent +``` + +Then verify: + +```bash +curl http://localhost:8000/api/health +``` + +### CI + +CI runs on pull requests and pushes to `main`: + +- installs backend dependencies +- runs `pytest` +- builds the Docker image + +### CD + +CD runs on pushes to `main`: + +- builds and pushes `ghcr.io//ai-observability-agent` +- tags the image with `latest` and the Git SHA +- triggers Render through `RENDER_DEPLOY_HOOK_URL` + +### Render setup + +1. Create a Render web service from an existing image. +2. Point it at `ghcr.io//ai-observability-agent:latest`. +3. Add the environment variables from `.env.example`. +4. Set the health check path to `/api/health`. +5. Add `RENDER_DEPLOY_HOOK_URL` as a GitHub Actions secret. + +### GitHub Actions secrets + +You only need one repository secret for the current CD flow: + +- `RENDER_DEPLOY_HOOK_URL` + +The workflow uses the built-in `GITHUB_TOKEN` to push to GHCR. diff --git a/backend/README.md b/backend/README.md index 14e6e87..8aaaded 100644 --- a/backend/README.md +++ b/backend/README.md @@ -1,3 +1,13 @@ # Backend This folder contains the FastAPI service, LangGraph agent, RAG pipeline, tools, and MCP server. + +## MCP quick check + +After installing backend dependencies, you can test the local MCP server with: + +```bash +python scripts/test_mcp_client.py --list-tools +python scripts/test_mcp_client.py --tool search_logs --args '{"query":"error","limit":3}' +python scripts/test_mcp_client.py --tool get_metrics --args '{"service_name":"checkout-service"}' +``` diff --git a/render.yaml b/render.yaml new file mode 100644 index 0000000..17df5df --- /dev/null +++ b/render.yaml @@ -0,0 +1,32 @@ +services: + - type: web + name: ai-observability-agent + runtime: image + image: + url: ghcr.io/OWNER_OR_ORG/ai-observability-agent:latest + plan: free + healthCheckPath: /api/health + autoDeploy: false + envVars: + - key: APP_ENV + value: production + - key: APP_ORIGIN + sync: false + - key: GROQ_API_KEY + sync: false + - key: GROQ_MODEL + value: llama-3.1-8b-instant + - key: PINECONE_API_KEY + sync: false + - key: PINECONE_INDEX_NAME + value: ai-observability-agent + - key: PINECONE_NAMESPACE + value: observability-docs + - key: LANGCHAIN_API_KEY + sync: false + - key: LANGCHAIN_TRACING_V2 + value: "true" + - key: LANGCHAIN_PROJECT + value: ai-observability-agent + - key: EMBEDDING_MODEL + value: sentence-transformers/all-MiniLM-L6-v2 diff --git a/scripts/test_mcp_client.py b/scripts/test_mcp_client.py new file mode 100644 index 0000000..0fc2ab5 --- /dev/null +++ b/scripts/test_mcp_client.py @@ -0,0 +1,97 @@ +"""Tiny local MCP client for exercising the observability MCP server.""" + +from __future__ import annotations + +import argparse +import asyncio +import json +import sys +from pathlib import Path +from typing import Any + +from mcp import ClientSession, StdioServerParameters +from mcp.client.stdio import stdio_client + + +REPO_ROOT = Path(__file__).resolve().parents[1] + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + description="Connect to the local MCP server, list tools, or call one tool." + ) + parser.add_argument( + "--python", + default=sys.executable, + help="Python executable used to launch the MCP server.", + ) + parser.add_argument( + "--server-module", + default="backend.mcp.server", + help="Python module path for the MCP server entrypoint.", + ) + parser.add_argument( + "--list-tools", + action="store_true", + help="List all tools exposed by the MCP server.", + ) + parser.add_argument( + "--tool", + help="Tool name to call, for example `search_logs` or `get_metrics`.", + ) + parser.add_argument( + "--args", + default="{}", + help='JSON object with tool arguments, for example \'{"query":"error","limit":3}\'.', + ) + return parser + + +async def run_client(args: argparse.Namespace) -> int: + try: + tool_args: dict[str, Any] = json.loads(args.args) + except json.JSONDecodeError as exc: + print(f"Invalid JSON passed to --args: {exc}", file=sys.stderr) + return 2 + + if not isinstance(tool_args, dict): + print("--args must decode to a JSON object.", file=sys.stderr) + return 2 + + server = StdioServerParameters( + command=args.python, + args=["-m", args.server_module], + cwd=str(REPO_ROOT), + ) + + async with stdio_client(server) as (read_stream, write_stream): + async with ClientSession(read_stream, write_stream) as session: + await session.initialize() + + if args.list_tools or not args.tool: + tools = await session.list_tools() + print("Available MCP tools:") + for tool in tools.tools: + print(f"- {tool.name}: {tool.description}") + + if args.tool: + result = await session.call_tool(args.tool, tool_args) + print(f"\nTool result for `{args.tool}`:") + for content in result.content: + text = getattr(content, "text", None) + if text: + print(text) + else: + print(content) + + return 0 + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + return asyncio.run(run_client(args)) + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6f5b9f8 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +from pathlib import Path +import sys + + +ROOT = Path(__file__).resolve().parents[1] + +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) diff --git a/tests/test_health.py b/tests/test_health.py new file mode 100644 index 0000000..69278b1 --- /dev/null +++ b/tests/test_health.py @@ -0,0 +1,13 @@ +from fastapi.testclient import TestClient + +from backend.main import app + + +client = TestClient(app) + + +def test_healthcheck_returns_ok() -> None: + response = client.get("/api/health") + + assert response.status_code == 200 + assert response.json()["status"] == "ok"