Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import {
ScrapingStatus,
} from "./ScrapingStatus";
import { ProgressManager } from "./ProgressManager";
import { ScrapingInteractionGuard } from "./ScrapingInteractionGuard";

const ABORT_CANCEL_SCRAPING_REASON = Symbol("CANCEL_SCRAPING");

Expand Down Expand Up @@ -79,12 +80,23 @@ export class ScrapingContentScript {
}
console.info("[SCS] - Start scraping");
this.scrapAbortController = new AbortController();
const interactionGuard = new ScrapingInteractionGuard();
const startUrl = window.location.href;
const throwIfNavigationDetected = () => {
if (window.location.href !== startUrl) {
throw new Error(
`Navigation detected during scraping: ${startUrl} -> ${window.location.href}`,
);
}
};
try {
this.scrapingStatus = {
type: "running",
progress: 0,
};
interactionGuard.activate();
const start = Date.now();
throwIfNavigationDetected();
const postSnapshot = await this.scraper.scrapPagePost(
this.scrapAbortController.signal,
new ProgressManager((progress) => {
Expand All @@ -93,6 +105,7 @@ export class ScrapingContentScript {
// Probably canceling
return;
}
throwIfNavigationDetected();
const roundedProgress = Math.round(progress);
const durationSec = Math.round((Date.now() - start) / 1000);

Expand All @@ -105,6 +118,7 @@ export class ScrapingContentScript {
};
}),
);
throwIfNavigationDetected();
console.info("[SCS] - Scraping completed");

// Store post snapshot
Expand Down Expand Up @@ -146,6 +160,8 @@ export class ScrapingContentScript {
this.scrapingStatus = scrapingFailed(errorMessage);
return this.scrapingStatus;
}
} finally {
interactionGuard.deactivate();
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
const SCRAPING_GUARD_OVERLAY_DATA_ATTRIBUTE = "data-bth-scraping-guard";

type GuardedEventType =
| "auxclick"
| "click"
| "contextmenu"
| "dblclick"
| "keydown"
| "keypress"
| "keyup"
| "mousedown"
| "mouseup"
| "pointerdown"
| "pointerup"
| "touchend"
| "touchmove"
| "touchstart"
| "wheel";

const GUARDED_EVENT_TYPES: GuardedEventType[] = [
"auxclick",
"click",
"contextmenu",
"dblclick",
"keydown",
"keypress",
"keyup",
"mousedown",
"mouseup",
"pointerdown",
"pointerup",
"touchend",
"touchmove",
"touchstart",
"wheel",
];

const GUARDED_EVENT_LISTENER_OPTIONS: AddEventListenerOptions = {
capture: true,
passive: false,
};

/**
* Prevent accidental user interaction with the host page during scraping.
*
* This blocks only trusted user-driven input events (event.isTrusted === true),
* so scraper-triggered synthetic events continue to work.
*/
export class ScrapingInteractionGuard {
private active = false;
private overlayElement: HTMLDivElement | null = null;

private onBeforeUnload = (event: BeforeUnloadEvent) => {
if (!this.active) {
return;
}
event.preventDefault();
// Required for legacy browser support.
event.returnValue = true;
};

private onPotentiallyUserInput = (event: Event) => {
if (!this.active || !event.isTrusted) {
return;
}
event.preventDefault();
event.stopImmediatePropagation();
event.stopPropagation();
};

activate(): void {
if (this.active) {
return;
}
this.active = true;
this.overlayElement = this.createOverlayElement();
if (this.overlayElement) {
document.documentElement?.appendChild(this.overlayElement);
}

for (const eventType of GUARDED_EVENT_TYPES) {
window.addEventListener(
eventType,
this.onPotentiallyUserInput,
GUARDED_EVENT_LISTENER_OPTIONS,
);
}
window.addEventListener("beforeunload", this.onBeforeUnload);
}

deactivate(): void {
if (!this.active) {
return;
}
this.active = false;

for (const eventType of GUARDED_EVENT_TYPES) {
window.removeEventListener(
eventType,
this.onPotentiallyUserInput,
GUARDED_EVENT_LISTENER_OPTIONS,
);
}
window.removeEventListener("beforeunload", this.onBeforeUnload);

this.overlayElement?.remove();
this.overlayElement = null;
}

private createOverlayElement(): HTMLDivElement | null {
if (!document?.createElement) {
return null;
}
const overlay = document.createElement("div");
overlay.setAttribute(SCRAPING_GUARD_OVERLAY_DATA_ATTRIBUTE, "true");
overlay.setAttribute("aria-hidden", "true");
Object.assign(overlay.style, {
position: "fixed",
inset: "0",
width: "100vw",
height: "100vh",
cursor: "progress",
background: "transparent",
pointerEvents: "auto",
touchAction: "none",
zIndex: "2147483647",
});
return overlay;
}
}

export function findScrapingGuardOverlayElement(): HTMLDivElement | undefined {
return document.querySelector(
`[${SCRAPING_GUARD_OVERLAY_DATA_ATTRIBUTE}]`,
) as HTMLDivElement | undefined;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { SocialNetwork } from "@/shared/model/SocialNetworkName";
import { ScrapingContentScript } from "../ScrapingContentScript";
import { SocialNetworkScraper } from "../SocialNetworkScraper";
import { PostSnapshot } from "@/shared/model/PostSnapshot";

const mocks = vi.hoisted(() => ({
insertPostSnapshot: vi.fn<() => Promise<void>>(),
guardActivate: vi.fn(),
guardDeactivate: vi.fn(),
}));

vi.mock("@/shared/storage/post-snapshot-storage", () => ({
insertPostSnapshot: mocks.insertPostSnapshot,
}));

vi.mock("../ScrapingInteractionGuard", () => ({
ScrapingInteractionGuard: class {
activate() {
mocks.guardActivate();
}
deactivate() {
mocks.guardDeactivate();
}
},
}));

function buildSnapshot(): PostSnapshot {
return {
id: "00000000-0000-4000-8000-000000000001",
postId: "post-id",
socialNetwork: SocialNetwork.YouTube,
url: "https://www.youtube.com/watch?v=post-id",
publishedAt: {
type: "absolute",
date: "2026-01-01T00:00:00.000Z",
},
author: {
name: "@channel",
accountHref: "https://www.youtube.com/@channel",
},
textContent: "text",
comments: [],
scrapedAt: "2026-01-02T00:00:00.000Z",
title: "title",
};
}

function createScraper(
scrapPagePost: SocialNetworkScraper["scrapPagePost"],
): SocialNetworkScraper {
return {
getSocialNetworkPageInfo: () =>
Promise.resolve({
isScrapablePost: true as const,
socialNetwork: SocialNetwork.YouTube,
postId: "post-id",
}),
scrapPagePost,
};
}

describe("ScrapingContentScript", () => {
const initialWindow = globalThis.window;

beforeEach(() => {
mocks.insertPostSnapshot.mockReset();
mocks.insertPostSnapshot.mockResolvedValue();
mocks.guardActivate.mockReset();
mocks.guardDeactivate.mockReset();
globalThis.window = {
location: {
href: "https://www.youtube.com/watch?v=post-id",
},
} as unknown as Window & typeof globalThis;
});

afterEach(() => {
vi.restoreAllMocks();
globalThis.window = initialWindow;
});

it("deactivates interaction guard after successful scraping", async () => {
const scraper = createScraper((_, progress) => {
progress.setProgress(10);
progress.setProgress(100);
return Promise.resolve(buildSnapshot());
});
const subject = new ScrapingContentScript(scraper);

const result = await (
subject as unknown as {
scrapPost: () => Promise<{ type: string }>;
}
).scrapPost();

expect(result.type).toBe("succeeded");
expect(mocks.guardActivate).toHaveBeenCalledOnce();
expect(mocks.guardDeactivate).toHaveBeenCalledOnce();
expect(mocks.insertPostSnapshot).toHaveBeenCalledOnce();
});

it("deactivates interaction guard when scraping fails", async () => {
const scraper = createScraper(() => Promise.reject(new Error("boom")));
const subject = new ScrapingContentScript(scraper);

const result = await (
subject as unknown as {
scrapPost: () => Promise<{ type: string; errorMessage: string }>;
}
).scrapPost();

expect(result.type).toBe("failed");
expect(result.errorMessage).toContain("boom");
expect(mocks.guardActivate).toHaveBeenCalledOnce();
expect(mocks.guardDeactivate).toHaveBeenCalledOnce();
});

it("deactivates interaction guard when scraping is canceled", async () => {
const scraper = createScraper(async (abortSignal) => {
await new Promise<void>((resolve) => {
abortSignal.addEventListener("abort", () => {
resolve();
});
});
abortSignal.throwIfAborted();
return buildSnapshot();
});
const subject = new ScrapingContentScript(scraper);

const scrapPostPromise = (
subject as unknown as {
scrapPost: () => Promise<{ type: string }>;
}
).scrapPost();
await Promise.resolve();
(
subject as unknown as {
cancelScraping: () => void;
}
).cancelScraping();

const result = await scrapPostPromise;
expect(result.type).toBe("canceled");
expect(mocks.guardActivate).toHaveBeenCalledOnce();
expect(mocks.guardDeactivate).toHaveBeenCalledOnce();
});

it("fails scraping when url changes during progress callback", async () => {
const scraper = createScraper((_, progress) => {
window.location.href = "https://www.youtube.com/watch?v=other-post";
progress.setProgress(42);
return Promise.resolve(buildSnapshot());
});
const subject = new ScrapingContentScript(scraper);

const result = await (
subject as unknown as {
scrapPost: () => Promise<{ type: string; errorMessage: string }>;
}
).scrapPost();

expect(result.type).toBe("failed");
expect(result.errorMessage).toContain(
"Navigation detected during scraping",
);
expect(mocks.guardActivate).toHaveBeenCalledOnce();
expect(mocks.guardDeactivate).toHaveBeenCalledOnce();
});
});
Loading
Loading