From 646fa5d3b9804ea33e8e43bbcd68aeae3d18b830 Mon Sep 17 00:00:00 2001 From: Alexandre DO-O ALMEIDA Date: Sat, 11 Apr 2026 10:54:39 +0200 Subject: [PATCH] feat: guard user interactions during scraping --- .../ScrapingContentScript.ts | 16 ++ .../ScrapingInteractionGuard.ts | 136 ++++++++++++++ .../__tests__/ScrapingContentScript.test.ts | 170 ++++++++++++++++++ .../ScrapingInteractionGuard.test.ts | 134 ++++++++++++++ 4 files changed, 456 insertions(+) create mode 100644 browser-extension/src/shared/scraping-content-script/ScrapingInteractionGuard.ts create mode 100644 browser-extension/src/shared/scraping-content-script/__tests__/ScrapingContentScript.test.ts create mode 100644 browser-extension/src/shared/scraping-content-script/__tests__/ScrapingInteractionGuard.test.ts diff --git a/browser-extension/src/shared/scraping-content-script/ScrapingContentScript.ts b/browser-extension/src/shared/scraping-content-script/ScrapingContentScript.ts index 3952505e..3f1cda6c 100644 --- a/browser-extension/src/shared/scraping-content-script/ScrapingContentScript.ts +++ b/browser-extension/src/shared/scraping-content-script/ScrapingContentScript.ts @@ -15,6 +15,7 @@ import { ScrapingStatus, } from "./ScrapingStatus"; import { ProgressManager } from "./ProgressManager"; +import { ScrapingInteractionGuard } from "./ScrapingInteractionGuard"; const ABORT_CANCEL_SCRAPING_REASON = Symbol("CANCEL_SCRAPING"); @@ -79,12 +80,23 @@ export class ScrapingContentScript { } console.info("[SCS] - Start scraping"); this.scrapAbortController = new AbortController(); + const interactionGuard = new ScrapingInteractionGuard(); + const startUrl = window.location.href; + const throwIfNavigationDetected = () => { + if (window.location.href !== startUrl) { + throw new Error( + `Navigation detected during scraping: ${startUrl} -> ${window.location.href}`, + ); + } + }; try { this.scrapingStatus = { type: "running", progress: 0, }; + interactionGuard.activate(); const start = Date.now(); + throwIfNavigationDetected(); const postSnapshot = await this.scraper.scrapPagePost( this.scrapAbortController.signal, new ProgressManager((progress) => { @@ -93,6 +105,7 @@ export class ScrapingContentScript { // Probably canceling return; } + throwIfNavigationDetected(); const roundedProgress = Math.round(progress); const durationSec = Math.round((Date.now() - start) / 1000); @@ -105,6 +118,7 @@ export class ScrapingContentScript { }; }), ); + throwIfNavigationDetected(); console.info("[SCS] - Scraping completed"); // Store post snapshot @@ -146,6 +160,8 @@ export class ScrapingContentScript { this.scrapingStatus = scrapingFailed(errorMessage); return this.scrapingStatus; } + } finally { + interactionGuard.deactivate(); } } diff --git a/browser-extension/src/shared/scraping-content-script/ScrapingInteractionGuard.ts b/browser-extension/src/shared/scraping-content-script/ScrapingInteractionGuard.ts new file mode 100644 index 00000000..fc1a7be2 --- /dev/null +++ b/browser-extension/src/shared/scraping-content-script/ScrapingInteractionGuard.ts @@ -0,0 +1,136 @@ +const SCRAPING_GUARD_OVERLAY_DATA_ATTRIBUTE = "data-bth-scraping-guard"; + +type GuardedEventType = + | "auxclick" + | "click" + | "contextmenu" + | "dblclick" + | "keydown" + | "keypress" + | "keyup" + | "mousedown" + | "mouseup" + | "pointerdown" + | "pointerup" + | "touchend" + | "touchmove" + | "touchstart" + | "wheel"; + +const GUARDED_EVENT_TYPES: GuardedEventType[] = [ + "auxclick", + "click", + "contextmenu", + "dblclick", + "keydown", + "keypress", + "keyup", + "mousedown", + "mouseup", + "pointerdown", + "pointerup", + "touchend", + "touchmove", + "touchstart", + "wheel", +]; + +const GUARDED_EVENT_LISTENER_OPTIONS: AddEventListenerOptions = { + capture: true, + passive: false, +}; + +/** + * Prevent accidental user interaction with the host page during scraping. + * + * This blocks only trusted user-driven input events (event.isTrusted === true), + * so scraper-triggered synthetic events continue to work. + */ +export class ScrapingInteractionGuard { + private active = false; + private overlayElement: HTMLDivElement | null = null; + + private onBeforeUnload = (event: BeforeUnloadEvent) => { + if (!this.active) { + return; + } + event.preventDefault(); + // Required for legacy browser support. + event.returnValue = true; + }; + + private onPotentiallyUserInput = (event: Event) => { + if (!this.active || !event.isTrusted) { + return; + } + event.preventDefault(); + event.stopImmediatePropagation(); + event.stopPropagation(); + }; + + activate(): void { + if (this.active) { + return; + } + this.active = true; + this.overlayElement = this.createOverlayElement(); + if (this.overlayElement) { + document.documentElement?.appendChild(this.overlayElement); + } + + for (const eventType of GUARDED_EVENT_TYPES) { + window.addEventListener( + eventType, + this.onPotentiallyUserInput, + GUARDED_EVENT_LISTENER_OPTIONS, + ); + } + window.addEventListener("beforeunload", this.onBeforeUnload); + } + + deactivate(): void { + if (!this.active) { + return; + } + this.active = false; + + for (const eventType of GUARDED_EVENT_TYPES) { + window.removeEventListener( + eventType, + this.onPotentiallyUserInput, + GUARDED_EVENT_LISTENER_OPTIONS, + ); + } + window.removeEventListener("beforeunload", this.onBeforeUnload); + + this.overlayElement?.remove(); + this.overlayElement = null; + } + + private createOverlayElement(): HTMLDivElement | null { + if (!document?.createElement) { + return null; + } + const overlay = document.createElement("div"); + overlay.setAttribute(SCRAPING_GUARD_OVERLAY_DATA_ATTRIBUTE, "true"); + overlay.setAttribute("aria-hidden", "true"); + Object.assign(overlay.style, { + position: "fixed", + inset: "0", + width: "100vw", + height: "100vh", + cursor: "progress", + background: "transparent", + pointerEvents: "auto", + touchAction: "none", + zIndex: "2147483647", + }); + return overlay; + } +} + +export function findScrapingGuardOverlayElement(): HTMLDivElement | undefined { + return document.querySelector( + `[${SCRAPING_GUARD_OVERLAY_DATA_ATTRIBUTE}]`, + ) as HTMLDivElement | undefined; +} diff --git a/browser-extension/src/shared/scraping-content-script/__tests__/ScrapingContentScript.test.ts b/browser-extension/src/shared/scraping-content-script/__tests__/ScrapingContentScript.test.ts new file mode 100644 index 00000000..7c8f7f1e --- /dev/null +++ b/browser-extension/src/shared/scraping-content-script/__tests__/ScrapingContentScript.test.ts @@ -0,0 +1,170 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { SocialNetwork } from "@/shared/model/SocialNetworkName"; +import { ScrapingContentScript } from "../ScrapingContentScript"; +import { SocialNetworkScraper } from "../SocialNetworkScraper"; +import { PostSnapshot } from "@/shared/model/PostSnapshot"; + +const mocks = vi.hoisted(() => ({ + insertPostSnapshot: vi.fn<() => Promise>(), + guardActivate: vi.fn(), + guardDeactivate: vi.fn(), +})); + +vi.mock("@/shared/storage/post-snapshot-storage", () => ({ + insertPostSnapshot: mocks.insertPostSnapshot, +})); + +vi.mock("../ScrapingInteractionGuard", () => ({ + ScrapingInteractionGuard: class { + activate() { + mocks.guardActivate(); + } + deactivate() { + mocks.guardDeactivate(); + } + }, +})); + +function buildSnapshot(): PostSnapshot { + return { + id: "00000000-0000-4000-8000-000000000001", + postId: "post-id", + socialNetwork: SocialNetwork.YouTube, + url: "https://www.youtube.com/watch?v=post-id", + publishedAt: { + type: "absolute", + date: "2026-01-01T00:00:00.000Z", + }, + author: { + name: "@channel", + accountHref: "https://www.youtube.com/@channel", + }, + textContent: "text", + comments: [], + scrapedAt: "2026-01-02T00:00:00.000Z", + title: "title", + }; +} + +function createScraper( + scrapPagePost: SocialNetworkScraper["scrapPagePost"], +): SocialNetworkScraper { + return { + getSocialNetworkPageInfo: () => + Promise.resolve({ + isScrapablePost: true as const, + socialNetwork: SocialNetwork.YouTube, + postId: "post-id", + }), + scrapPagePost, + }; +} + +describe("ScrapingContentScript", () => { + const initialWindow = globalThis.window; + + beforeEach(() => { + mocks.insertPostSnapshot.mockReset(); + mocks.insertPostSnapshot.mockResolvedValue(); + mocks.guardActivate.mockReset(); + mocks.guardDeactivate.mockReset(); + globalThis.window = { + location: { + href: "https://www.youtube.com/watch?v=post-id", + }, + } as unknown as Window & typeof globalThis; + }); + + afterEach(() => { + vi.restoreAllMocks(); + globalThis.window = initialWindow; + }); + + it("deactivates interaction guard after successful scraping", async () => { + const scraper = createScraper((_, progress) => { + progress.setProgress(10); + progress.setProgress(100); + return Promise.resolve(buildSnapshot()); + }); + const subject = new ScrapingContentScript(scraper); + + const result = await ( + subject as unknown as { + scrapPost: () => Promise<{ type: string }>; + } + ).scrapPost(); + + expect(result.type).toBe("succeeded"); + expect(mocks.guardActivate).toHaveBeenCalledOnce(); + expect(mocks.guardDeactivate).toHaveBeenCalledOnce(); + expect(mocks.insertPostSnapshot).toHaveBeenCalledOnce(); + }); + + it("deactivates interaction guard when scraping fails", async () => { + const scraper = createScraper(() => Promise.reject(new Error("boom"))); + const subject = new ScrapingContentScript(scraper); + + const result = await ( + subject as unknown as { + scrapPost: () => Promise<{ type: string; errorMessage: string }>; + } + ).scrapPost(); + + expect(result.type).toBe("failed"); + expect(result.errorMessage).toContain("boom"); + expect(mocks.guardActivate).toHaveBeenCalledOnce(); + expect(mocks.guardDeactivate).toHaveBeenCalledOnce(); + }); + + it("deactivates interaction guard when scraping is canceled", async () => { + const scraper = createScraper(async (abortSignal) => { + await new Promise((resolve) => { + abortSignal.addEventListener("abort", () => { + resolve(); + }); + }); + abortSignal.throwIfAborted(); + return buildSnapshot(); + }); + const subject = new ScrapingContentScript(scraper); + + const scrapPostPromise = ( + subject as unknown as { + scrapPost: () => Promise<{ type: string }>; + } + ).scrapPost(); + await Promise.resolve(); + ( + subject as unknown as { + cancelScraping: () => void; + } + ).cancelScraping(); + + const result = await scrapPostPromise; + expect(result.type).toBe("canceled"); + expect(mocks.guardActivate).toHaveBeenCalledOnce(); + expect(mocks.guardDeactivate).toHaveBeenCalledOnce(); + }); + + it("fails scraping when url changes during progress callback", async () => { + const scraper = createScraper((_, progress) => { + window.location.href = "https://www.youtube.com/watch?v=other-post"; + progress.setProgress(42); + return Promise.resolve(buildSnapshot()); + }); + const subject = new ScrapingContentScript(scraper); + + const result = await ( + subject as unknown as { + scrapPost: () => Promise<{ type: string; errorMessage: string }>; + } + ).scrapPost(); + + expect(result.type).toBe("failed"); + expect(result.errorMessage).toContain( + "Navigation detected during scraping", + ); + expect(mocks.guardActivate).toHaveBeenCalledOnce(); + expect(mocks.guardDeactivate).toHaveBeenCalledOnce(); + }); +}); diff --git a/browser-extension/src/shared/scraping-content-script/__tests__/ScrapingInteractionGuard.test.ts b/browser-extension/src/shared/scraping-content-script/__tests__/ScrapingInteractionGuard.test.ts new file mode 100644 index 00000000..8be11a83 --- /dev/null +++ b/browser-extension/src/shared/scraping-content-script/__tests__/ScrapingInteractionGuard.test.ts @@ -0,0 +1,134 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { + findScrapingGuardOverlayElement, + ScrapingInteractionGuard, +} from "../ScrapingInteractionGuard"; + +describe("ScrapingInteractionGuard", () => { + const initialWindow = globalThis.window; + const initialDocument = globalThis.document; + let currentOverlay: { remove: () => void } | undefined; + + beforeEach(() => { + currentOverlay = undefined; + const documentElement = { + appendChild: vi.fn((element: { remove: () => void }) => { + currentOverlay = element; + }), + }; + const createElement = vi.fn(() => { + const style = {}; + const attributes = new Map(); + return { + style, + setAttribute: (key: string, value: string) => { + attributes.set(key, value); + }, + remove: () => { + currentOverlay = undefined; + }, + }; + }); + const querySelector = vi.fn(() => currentOverlay); + + globalThis.window = { + addEventListener: vi.fn(), + removeEventListener: vi.fn(), + } as unknown as Window & typeof globalThis; + globalThis.document = { + documentElement, + createElement, + querySelector, + } as unknown as Document; + }); + + afterEach(() => { + globalThis.window = initialWindow; + globalThis.document = initialDocument; + }); + + it("activate should inject an overlay and deactivate should remove it", () => { + const guard = new ScrapingInteractionGuard(); + + expect(findScrapingGuardOverlayElement()).toBeUndefined(); + guard.activate(); + expect(findScrapingGuardOverlayElement()).toBeDefined(); + guard.deactivate(); + expect(findScrapingGuardOverlayElement()).toBeUndefined(); + }); + + it("should block trusted user events", () => { + const guard = new ScrapingInteractionGuard(); + guard.activate(); + const preventDefault = vi.fn(); + const stopImmediatePropagation = vi.fn(); + const stopPropagation = vi.fn(); + const event = { + isTrusted: true, + preventDefault, + stopImmediatePropagation, + stopPropagation, + } as unknown as Event; + + // Accessing private method intentionally to isolate the event-filter logic. + ( + guard as unknown as { onPotentiallyUserInput: (event: Event) => void } + ).onPotentiallyUserInput(event); + + expect(preventDefault).toHaveBeenCalledOnce(); + expect(stopImmediatePropagation).toHaveBeenCalledOnce(); + expect(stopPropagation).toHaveBeenCalledOnce(); + }); + + it("should not block synthetic events", () => { + const guard = new ScrapingInteractionGuard(); + guard.activate(); + const preventDefault = vi.fn(); + const stopImmediatePropagation = vi.fn(); + const stopPropagation = vi.fn(); + const event = { + isTrusted: false, + preventDefault, + stopImmediatePropagation, + stopPropagation, + } as unknown as Event; + + // Accessing private method intentionally to isolate the event-filter logic. + ( + guard as unknown as { onPotentiallyUserInput: (event: Event) => void } + ).onPotentiallyUserInput(event); + + expect(preventDefault).not.toHaveBeenCalled(); + expect(stopImmediatePropagation).not.toHaveBeenCalled(); + expect(stopPropagation).not.toHaveBeenCalled(); + }); + + it("should activate and deactivate beforeunload confirmation", () => { + const guard = new ScrapingInteractionGuard(); + const activePreventDefault = vi.fn(); + + const activeEvent = { + preventDefault: activePreventDefault, + returnValue: undefined, + } as unknown as BeforeUnloadEvent; + + guard.activate(); + ( + guard as unknown as { onBeforeUnload: (event: BeforeUnloadEvent) => void } + ).onBeforeUnload(activeEvent); + expect(activePreventDefault).toHaveBeenCalledOnce(); + expect(activeEvent.returnValue).toBe(true); + + const inactivePreventDefault = vi.fn(); + const inactiveEvent = { + preventDefault: inactivePreventDefault, + returnValue: undefined, + } as unknown as BeforeUnloadEvent; + guard.deactivate(); + ( + guard as unknown as { onBeforeUnload: (event: BeforeUnloadEvent) => void } + ).onBeforeUnload(inactiveEvent); + expect(inactivePreventDefault).not.toHaveBeenCalled(); + expect(inactiveEvent.returnValue).toBeUndefined(); + }); +});