Skip to content

Commit 103e136

Browse files
authored
Create credit_risk_system.py
1 parent d8d9ecf commit 103e136

1 file changed

Lines changed: 200 additions & 0 deletions

File tree

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
"""
2+
CreditRiskPro v1.1
3+
Enterprise Credit Risk Prediction System
4+
Single-File | Industry-Style | Beginner-Friendly
5+
6+
Run modes:
7+
1) python generate_sample_data.py
8+
2) python credit_risk_system.py --mode train --data train_data.csv
9+
"""
10+
11+
# =========================
12+
# Imports
13+
# =========================
14+
import os
15+
import sys
16+
import argparse
17+
import joblib
18+
import pandas as pd
19+
import numpy as np
20+
21+
from sklearn.model_selection import train_test_split
22+
from sklearn.preprocessing import StandardScaler
23+
from sklearn.pipeline import Pipeline
24+
from sklearn.compose import ColumnTransformer
25+
from sklearn.linear_model import LogisticRegression
26+
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix
27+
28+
# =========================
29+
# Configuration
30+
# =========================
31+
MODEL_PATH = "credit_risk_model.joblib"
32+
DEFAULT_TRAIN_DATA = "train_data.csv"
33+
DEFAULT_PREDICT_DATA = "new_applicants.csv"
34+
TARGET_COL = "default"
35+
RANDOM_STATE = 42
36+
37+
38+
# =========================
39+
# Utilities
40+
# =========================
41+
def log(msg):
42+
print(f"[CreditRiskPro] {msg}")
43+
44+
45+
# =========================
46+
# Validation
47+
# =========================
48+
def validate_dataset(df):
49+
if TARGET_COL not in df.columns:
50+
raise ValueError(f"Missing target column '{TARGET_COL}'")
51+
52+
if df.empty:
53+
raise ValueError("Dataset is empty")
54+
55+
56+
# =========================
57+
# Preprocessing
58+
# =========================
59+
def build_preprocessor(df):
60+
numeric_features = df.select_dtypes(include=["int64", "float64"]).columns.tolist()
61+
numeric_features.remove(TARGET_COL)
62+
63+
preprocessor = ColumnTransformer(
64+
transformers=[
65+
("num", StandardScaler(), numeric_features)
66+
]
67+
)
68+
69+
return preprocessor
70+
71+
72+
# =========================
73+
# Model
74+
# =========================
75+
def build_model():
76+
return LogisticRegression(
77+
max_iter=1000,
78+
class_weight="balanced",
79+
random_state=RANDOM_STATE
80+
)
81+
82+
83+
# =========================
84+
# Training
85+
# =========================
86+
def train_model(data_path):
87+
log(f"Loading training data: {data_path}")
88+
df = pd.read_csv(data_path)
89+
90+
validate_dataset(df)
91+
92+
X = df.drop(columns=[TARGET_COL])
93+
y = df[TARGET_COL]
94+
95+
preprocessor = build_preprocessor(df)
96+
model = build_model()
97+
98+
pipeline = Pipeline(steps=[
99+
("preprocessor", preprocessor),
100+
("model", model)
101+
])
102+
103+
X_train, X_test, y_train, y_test = train_test_split(
104+
X, y,
105+
test_size=0.25,
106+
stratify=y,
107+
random_state=RANDOM_STATE
108+
)
109+
110+
log("Training model...")
111+
pipeline.fit(X_train, y_train)
112+
113+
log("Evaluating model...")
114+
probs = pipeline.predict_proba(X_test)[:, 1]
115+
preds = (probs >= 0.5).astype(int)
116+
117+
print("\nAUC:", round(roc_auc_score(y_test, probs), 4))
118+
print("\nClassification Report:")
119+
print(classification_report(y_test, preds))
120+
print("Confusion Matrix:")
121+
print(confusion_matrix(y_test, preds))
122+
123+
joblib.dump(pipeline, MODEL_PATH)
124+
log(f"Model saved → {MODEL_PATH}")
125+
126+
127+
# =========================
128+
# Prediction
129+
# =========================
130+
def predict(data_path):
131+
if not os.path.exists(MODEL_PATH):
132+
raise FileNotFoundError("Model not found. Train the model first.")
133+
134+
log(f"Loading model: {MODEL_PATH}")
135+
pipeline = joblib.load(MODEL_PATH)
136+
137+
log(f"Loading prediction data: {data_path}")
138+
df = pd.read_csv(data_path)
139+
140+
probs = pipeline.predict_proba(df)[:, 1]
141+
142+
results = df.copy()
143+
results["risk_score"] = probs
144+
results["risk_class"] = np.where(probs >= 0.5, "HIGH", "LOW")
145+
146+
output_file = "credit_risk_predictions.csv"
147+
results.to_csv(output_file, index=False)
148+
149+
log(f"Predictions saved → {output_file}")
150+
print(results[["risk_score", "risk_class"]].head())
151+
152+
153+
# =========================
154+
# Main Logic (FIXED)
155+
# =========================
156+
def main():
157+
parser = argparse.ArgumentParser(add_help=False)
158+
159+
parser.add_argument("--mode", choices=["train", "predict"])
160+
parser.add_argument("--data")
161+
162+
args, _ = parser.parse_known_args()
163+
164+
# ---------- AUTO MODE ----------
165+
if args.mode is None:
166+
log("No arguments provided → Auto mode enabled")
167+
168+
if not os.path.exists(MODEL_PATH):
169+
if not os.path.exists(DEFAULT_TRAIN_DATA):
170+
log(f"ERROR: '{DEFAULT_TRAIN_DATA}' not found")
171+
sys.exit(1)
172+
173+
log("Model not found → Training new model")
174+
train_model(DEFAULT_TRAIN_DATA)
175+
else:
176+
if not os.path.exists(DEFAULT_PREDICT_DATA):
177+
log(f"ERROR: '{DEFAULT_PREDICT_DATA}' not found")
178+
sys.exit(1)
179+
180+
log("Model found → Running prediction")
181+
predict(DEFAULT_PREDICT_DATA)
182+
183+
return
184+
185+
# ---------- MANUAL MODE ----------
186+
if not args.data:
187+
log("ERROR: --data is required when --mode is specified")
188+
sys.exit(1)
189+
190+
if args.mode == "train":
191+
train_model(args.data)
192+
elif args.mode == "predict":
193+
predict(args.data)
194+
195+
196+
# =========================
197+
# Entry Point
198+
# =========================
199+
if __name__ == "__main__":
200+
main()

0 commit comments

Comments
 (0)