Skip to content

Commit 3a65f30

Browse files
authored
fix: add ci_generate_data.py for CI model training (no infinite loop)
1 parent d3f6233 commit 3a65f30

1 file changed

Lines changed: 54 additions & 0 deletions

File tree

ai-model/ci_generate_data.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
"""CI Data Generator - generates a fixed batch of logs for model training in CI.
2+
Runs once and exits (no infinite loop). Safe for GitHub Actions.
3+
"""
4+
import csv
5+
import os
6+
import random
7+
from datetime import datetime
8+
9+
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10+
DATA_DIR = os.path.join(BASE_DIR, "../data")
11+
LOG_FILE = os.path.join(DATA_DIR, "generated_logs.csv")
12+
13+
os.makedirs(DATA_DIR, exist_ok=True)
14+
15+
headers = ["timestamp", "source_ip", "destination_ip", "bytes", "protocol", "event_type", "details"]
16+
17+
protocols = ["TCP", "UDP", "ICMP", "HTTP", "HTTPS"]
18+
events = ["Normal", "Failed Login", "Port Scan", "Malware Detected", "File Access"]
19+
weights = [0.7, 0.1, 0.1, 0.05, 0.05]
20+
21+
22+
def generate_ip():
23+
return f"{random.randint(10,192)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(1,255)}"
24+
25+
26+
def generate_row():
27+
event_type = random.choices(events, weights=weights, k=1)[0]
28+
src_ip = generate_ip()
29+
details_map = {
30+
"Failed Login": f"Failed attempt from {src_ip}",
31+
"Port Scan": f"Multiple ports scanned by {src_ip}",
32+
"Malware Detected": "Signature match: Trojan.Win32",
33+
"File Access": "Accessed /etc/passwd",
34+
}
35+
return [
36+
datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
37+
src_ip,
38+
generate_ip(),
39+
random.randint(100, 50000),
40+
random.choice(protocols),
41+
event_type,
42+
details_map.get(event_type, "Routine traffic"),
43+
]
44+
45+
46+
NUM_ROWS = 500
47+
print(f"Generating {NUM_ROWS} log entries -> {LOG_FILE}")
48+
with open(LOG_FILE, "w", newline="") as f:
49+
writer = csv.writer(f)
50+
writer.writerow(headers)
51+
for _ in range(NUM_ROWS):
52+
writer.writerow(generate_row())
53+
54+
print(f"Done. {NUM_ROWS} rows written to {LOG_FILE}")

0 commit comments

Comments
 (0)