|
| 1 | +import tkinter as tk |
| 2 | +from tkinter import messagebox, filedialog |
| 3 | +import ttkbootstrap as tb |
| 4 | +from ttkbootstrap.widgets.scrolled import ScrolledText |
| 5 | +import threading |
| 6 | +import time |
| 7 | +import json |
| 8 | +import csv |
| 9 | +import requests |
| 10 | +import re |
| 11 | +import os |
| 12 | +import sys |
| 13 | +from collections import defaultdict |
| 14 | +from bs4 import BeautifulSoup |
| 15 | + |
| 16 | +HEADERS = {"User-Agent": "Mozilla/5.0"} |
| 17 | +SEARCH_URL = "https://www.google.com/search" |
| 18 | + |
| 19 | +emails_found = set() |
| 20 | +sources = defaultdict(list) |
| 21 | +stop_event = threading.Event() |
| 22 | +scrape_completed = False |
| 23 | + |
| 24 | +EMAIL_REGEX = re.compile(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") |
| 25 | + |
| 26 | +# =================== Utility =================== |
| 27 | +def resource_path(file_name): |
| 28 | + base_path = getattr(sys, '_MEIPASS', os.path.dirname(os.path.abspath(__file__))) |
| 29 | + return os.path.join(base_path, file_name) |
| 30 | + |
| 31 | +def show_error(t, m): messagebox.showerror(t, m) |
| 32 | +def show_info(t, m): messagebox.showinfo(t, m) |
| 33 | + |
| 34 | +# ================= APP ================= |
| 35 | +app = tb.Window("EmailScout – Public Contact Finder", themename="superhero", size=(1300, 680)) |
| 36 | +app.grid_columnconfigure(0, weight=1) |
| 37 | +app.grid_rowconfigure(1, weight=1) |
| 38 | + |
| 39 | +# ================= INPUT ================= |
| 40 | +input_card = tb.Labelframe(app, text="Search Keywords", padding=15) |
| 41 | +input_card.grid(row=0, column=0, sticky="nsew", padx=10, pady=10) |
| 42 | + |
| 43 | +tb.Label(input_card, text="One search per line (e.g. 'AI developer contact email')").pack(anchor="w") |
| 44 | +keywords_input = ScrolledText(input_card, height=7) |
| 45 | +keywords_input.pack(fill="both", expand=True) |
| 46 | + |
| 47 | +# ================= OUTPUT ================= |
| 48 | +output_card = tb.Labelframe(app, text="Live Results", padding=15) |
| 49 | +output_card.grid(row=1, column=0, sticky="nsew", padx=10, pady=10) |
| 50 | + |
| 51 | +log = ScrolledText(output_card) |
| 52 | +log.pack(fill="both", expand=True) |
| 53 | +log.text.config(state="disabled") |
| 54 | + |
| 55 | +# ================= FOOTER ================= |
| 56 | +footer = tb.Frame(app) |
| 57 | +footer.grid(row=2, column=0, sticky="ew", padx=10, pady=5) |
| 58 | + |
| 59 | +start_btn = tb.Button(footer, text="Start", bootstyle="success", width=18, |
| 60 | + command=lambda: threading.Thread(target=start_scraping, daemon=True).start()) |
| 61 | +start_btn.pack(side="left", padx=5) |
| 62 | + |
| 63 | +stop_btn = tb.Button(footer, text="Stop", bootstyle="danger", width=15, command=lambda: stop_scraping()) |
| 64 | +stop_btn.pack(side="left", padx=5) |
| 65 | +stop_btn.config(state="disabled") |
| 66 | + |
| 67 | +export_txt = tb.Button(footer, text="Export TXT", width=15, command=lambda: export_file("txt")) |
| 68 | +export_txt.pack(side="left", padx=5) |
| 69 | + |
| 70 | +export_csv = tb.Button(footer, text="Export CSV", width=15, command=lambda: export_file("csv")) |
| 71 | +export_csv.pack(side="left", padx=5) |
| 72 | + |
| 73 | +export_json = tb.Button(footer, text="Export JSON", width=15, command=lambda: export_file("json")) |
| 74 | +export_json.pack(side="left", padx=5) |
| 75 | + |
| 76 | +# ================= LOG ================= |
| 77 | +def log_line(t): |
| 78 | + log.text.config(state="normal") |
| 79 | + log.text.insert("end", t + "\n") |
| 80 | + log.text.see("end") |
| 81 | + log.text.config(state="disabled") |
| 82 | + |
| 83 | +# ================= SCRAPER ================= |
| 84 | +def google_search(query): |
| 85 | + params = {"q": query, "num": 5} |
| 86 | + r = requests.get(SEARCH_URL, params=params, headers=HEADERS, timeout=10) |
| 87 | + soup = BeautifulSoup(r.text, "html.parser") |
| 88 | + return [a["href"] for a in soup.select("a") if a.get("href", "").startswith("http")] |
| 89 | + |
| 90 | +def scrape_page(url): |
| 91 | + try: |
| 92 | + r = requests.get(url, headers=HEADERS, timeout=10) |
| 93 | + return set(EMAIL_REGEX.findall(r.text)) |
| 94 | + except: |
| 95 | + return set() |
| 96 | + |
| 97 | +def run_scraper(queries): |
| 98 | + global scrape_completed |
| 99 | + |
| 100 | + for q in queries: |
| 101 | + if stop_event.is_set(): return |
| 102 | + log_line(f"🔍 Searching: {q}") |
| 103 | + |
| 104 | + urls = google_search(q) |
| 105 | + for url in urls: |
| 106 | + if stop_event.is_set(): return |
| 107 | + |
| 108 | + emails = scrape_page(url) |
| 109 | + for e in emails: |
| 110 | + if e not in emails_found: |
| 111 | + emails_found.add(e) |
| 112 | + sources[e].append(url) |
| 113 | + log_line(e) |
| 114 | + |
| 115 | + time.sleep(0.6) |
| 116 | + |
| 117 | + scrape_completed = True |
| 118 | + show_info("Done", f"Found {len(emails_found)} public emails.") |
| 119 | + stop_btn.config(state="disabled") |
| 120 | + start_btn.config(state="normal") |
| 121 | + |
| 122 | +def start_scraping(): |
| 123 | + global scrape_completed |
| 124 | + scrape_completed = False |
| 125 | + stop_event.clear() |
| 126 | + emails_found.clear() |
| 127 | + sources.clear() |
| 128 | + |
| 129 | + queries = [q.strip() for q in keywords_input.get("1.0", "end").splitlines() if q.strip()] |
| 130 | + if not queries: |
| 131 | + show_error("Input Error", "Please enter at least one search query.") |
| 132 | + return |
| 133 | + |
| 134 | + log.text.config(state="normal") |
| 135 | + log.text.delete("1.0", "end") |
| 136 | + log.text.config(state="disabled") |
| 137 | + |
| 138 | + stop_btn.config(state="normal") |
| 139 | + start_btn.config(state="disabled") |
| 140 | + |
| 141 | + threading.Thread(target=run_scraper, args=(queries,), daemon=True).start() |
| 142 | + |
| 143 | +def stop_scraping(): |
| 144 | + stop_event.set() |
| 145 | + log_line("⛔ Stopped by user") |
| 146 | + stop_btn.config(state="disabled") |
| 147 | + start_btn.config(state="normal") |
| 148 | + |
| 149 | +# ================= EXPORT ================= |
| 150 | +def export_file(fmt): |
| 151 | + if not emails_found or not scrape_completed: |
| 152 | + show_error("Export Error", "Nothing to export.") |
| 153 | + return |
| 154 | + |
| 155 | + path = filedialog.asksaveasfilename(defaultextension=f".{fmt}") |
| 156 | + if not path: return |
| 157 | + |
| 158 | + if fmt == "txt": |
| 159 | + with open(path, "w") as f: |
| 160 | + for e in sorted(emails_found): |
| 161 | + f.write(e + "\n") |
| 162 | + |
| 163 | + elif fmt == "csv": |
| 164 | + with open(path, "w", newline="") as f: |
| 165 | + w = csv.writer(f) |
| 166 | + w.writerow(["email", "source"]) |
| 167 | + for e, s in sources.items(): |
| 168 | + w.writerow([e, ", ".join(s)]) |
| 169 | + |
| 170 | + elif fmt == "json": |
| 171 | + with open(path, "w") as f: |
| 172 | + json.dump(sources, f, indent=2) |
| 173 | + |
| 174 | + show_info("Exported", "File saved successfully.") |
| 175 | + |
| 176 | +app.mainloop() |
0 commit comments