-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrequirements.txt
More file actions
44 lines (35 loc) · 2.54 KB
/
requirements.txt
File metadata and controls
44 lines (35 loc) · 2.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# ─── Core Big-Data / Spark ecosystem ─────────────────────────────────────────
pyspark==3.4.2
# GraphFrames Python bindings (JAR auto-downloaded by Spark via --packages)
graphframes==0.8.3
# Delta Lake (ACID writes, time-travel, MERGE) — optional but recommended
delta-spark==2.4.0
# ─── ML / feature engineering ────────────────────────────────────────────────
xgboost==2.0.3
scikit-learn==1.4.0
lightgbm==4.2.0
shap==0.44.0
# ─── Data handling ────────────────────────────────────────────────────────────
pandas==2.1.4
numpy==1.26.3
pyarrow==14.0.2 # Parquet I/O + Arrow ↔ pandas bridge
ogb==1.3.6 # Open Graph Benchmark datasets
duckdb==1.5.1 # Native TPC-H data generation fallback (no C toolchain)
# ─── Statistics / collinearity analysis ──────────────────────────────────────
statsmodels==0.14.1 # VIF computation (model/feature_analysis.py)
# ─── Visualisation ────────────────────────────────────────────────────────────
matplotlib==3.8.2
seaborn==0.13.1
# ─── Experiment tracking & reporting ─────────────────────────────────────────
jsonschema==4.21.1 # DSL query validation
# ─── Interactive development ──────────────────────────────────────────────────
jupyter==1.0.0 # Jupyter Lab / Classic notebook
jupyterlab==4.1.2
ipywidgets==8.1.2 # ipywidgets for interactive plots in notebooks
nbformat==5.9.2
streamlit==1.44.1 # Lightweight evaluation dashboard for demo videos
# ─── Streaming / Kafka integration (optional — for online retraining loop) ───
# kafka-python==2.0.2 # uncomment when implementing Task 3.2
# ─── Testing ─────────────────────────────────────────────────────────────────
pytest==7.4.4
pytest-cov==4.1.0