diff --git a/eu_fact_force/ingestion/data_collection/download_ground_truth.py b/eu_fact_force/ingestion/data_collection/download_ground_truth.py
new file mode 100644
index 0000000..38dc2f2
--- /dev/null
+++ b/eu_fact_force/ingestion/data_collection/download_ground_truth.py
@@ -0,0 +1,239 @@
+"""
+Download PDFs and extract ground truth text for articles in verified_ground_truth.csv.
+
+For each arXiv article:
+  - Downloads the PDF to {output_dir}/pdf/{article_id}.pdf
+  - Downloads the LaTeX source tar, extracts all .tex files, and writes
+    cleaned text to {output_dir}/text/{article_id}.txt
+
+The extracted text is used as the reference ("ground truth") when measuring
+how well the PDF parser reproduces the original content.
+
+Usage:
+    python -m eu_fact_force.ingestion.data_collection.download_ground_truth \\
+        --csv verified_ground_truth.csv \\
+        --output-dir ./verified_ground_truth_data \\
+        --workers 4
+"""
+
+import argparse
+import csv
+import json
+import logging
+import re
+import sys
+import tarfile
+import tempfile
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+_TIMEOUT = 60  # seconds — source tarballs can be large
+
+
+def download_article(article: dict, pdf_dir: Path, text_dir: Path) -> dict:
+    """
+    Download PDF and extract LaTeX text for one article row from the CSV.
+
+    Returns a result dict with keys: article_id, status, pdf_path, text_path, error.
+    """
+    article_id = article["article_id"]
+    source = article["source"]
+
+    if source != "arxiv":
+        logger.warning("download.unsupported_source id=%s source=%s", article_id, source)
+        return {"article_id": article_id, "status": "skipped", "reason": f"unsupported source: {source}"}
+
+    safe_id = article_id.replace(":", "_").replace("/", "_")
+    pdf_path = pdf_dir / f"{safe_id}.pdf"
+    text_path = text_dir / f"{safe_id}.txt"
+
+    # Skip if both already present
+    if pdf_path.exists() and text_path.exists():
+        logger.info("download.skip id=%s reason=already_exists", article_id)
+        return {"article_id": article_id, "status": "skipped", "reason": "already_exists",
+                "pdf_path": str(pdf_path), "text_path": str(text_path)}
+
+    pdf_ok = _download_pdf(article["pdf_url"], pdf_path)
+    text_ok = _download_arxiv_latex(article["text_url"], text_path)
+
+    status = "success" if pdf_ok and text_ok else ("partial" if pdf_ok or text_ok else "failed")
+    result = {
+        "article_id": article_id,
+        "status": status,
+        "pdf_path": str(pdf_path) if pdf_ok else None,
+        "text_path": str(text_path) if text_ok else None,
+    }
+    logger.info("download.done id=%s status=%s", article_id, status)
+    return result
+
+
+def download_all(
+    csv_path: str,
+    output_dir: str,
+    workers: int = 4,
+) -> list[dict]:
+    """
+    Download all articles from the ground truth CSV in parallel.
+
+    Writes a download_manifest.json into output_dir summarising results.
+    """
+    pdf_dir = Path(output_dir) / "pdf"
+    text_dir = Path(output_dir) / "text"
+    pdf_dir.mkdir(parents=True, exist_ok=True)
+    text_dir.mkdir(parents=True, exist_ok=True)
+
+    with open(csv_path, encoding="utf-8") as f:
+        articles = list(csv.DictReader(f))
+
+    logger.info("download.start total=%d workers=%d", len(articles), workers)
+
+    results = []
+    with ThreadPoolExecutor(max_workers=workers) as pool:
+        futures = {
+            pool.submit(download_article, a, pdf_dir, text_dir): a["article_id"]
+            for a in articles
+        }
+        for future in as_completed(futures):
+            results.append(future.result())
+
+    successful = [r for r in results if r["status"] in ("success", "partial")]
+    failed = [r for r in results if r["status"] == "failed"]
+
+    manifest = {
+        "csv": csv_path,
+        "total": len(articles),
+        "successful": len(successful),
+        "failed": len(failed),
+        "results": results,
+    }
+    manifest_path = Path(output_dir) / "download_manifest.json"
+    manifest_path.write_text(json.dumps(manifest, indent=2), encoding="utf-8")
+
+    print(f"\nDownload complete: {len(successful)}/{len(articles)} succeeded")
+    if failed:
+        print(f"Failed ({len(failed)}):")
+        for r in failed:
+            print(f"  {r['article_id']}: {r.get('error', 'unknown')}")
+    print(f"Manifest: {manifest_path}")
+
+    return results
+
+
+# ---------------------------------------------------------------------------
+# Internal helpers
+# ---------------------------------------------------------------------------
+
+def _download_pdf(url: str, dest: Path) -> bool:
+    """Download a PDF file. Returns True on success."""
+    if dest.exists():
+        return True
+    try:
+        resp = requests.get(url, timeout=_TIMEOUT)
+        resp.raise_for_status()
+        if not resp.content.startswith(b"%PDF"):
+            logger.warning("download.not_a_pdf url=%s", url)
+            return False
+        dest.write_bytes(resp.content)
+        logger.info("download.pdf_ok url=%s size=%d", url, len(resp.content))
+        return True
+    except Exception as e:
+        logger.warning("download.pdf_failed url=%s error=%s", url, e)
+        return False
+
+
+def _download_arxiv_latex(source_url: str, dest: Path) -> bool:
+    """
+    Download an arXiv source tarball, extract all .tex files, clean and
+    concatenate them, then write plain text to dest. Returns True on success.
+    """
+    if dest.exists():
+        return True
+    try:
+        resp = requests.get(source_url, timeout=_TIMEOUT)
+        resp.raise_for_status()
+    except Exception as e:
+        logger.warning("download.latex_fetch_failed url=%s error=%s", source_url, e)
+        return False
+
+    try:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            tar_path = Path(tmpdir) / "source.tar.gz"
+            tar_path.write_bytes(resp.content)
+
+            try:
+                with tarfile.open(tar_path, "r:gz") as tar:
+                    tar.extractall(tmpdir)
+            except tarfile.TarError:
+                # Some arXiv sources are bare .tex, not tar'd
+                text = _clean_latex(resp.content)
+                dest.write_text(text, encoding="utf-8")
+                return bool(text.strip())
+
+            tex_files = sorted(Path(tmpdir).rglob("*.tex"))
+            if not tex_files:
+                logger.warning("download.no_tex_found url=%s", source_url)
+                return False
+
+            # Concatenate all .tex files (main file first if identifiable)
+            parts = []
+            for tex in tex_files:
+                try:
+                    parts.append(_clean_latex(tex.read_bytes()))
+                except Exception:
+                    pass
+
+            text = "\n\n".join(p for p in parts if p.strip())
+            dest.write_text(text, encoding="utf-8")
+            logger.info("download.latex_ok url=%s chars=%d", source_url, len(text))
+            return bool(text.strip())
+
+    except Exception as e:
+        logger.warning("download.latex_extract_failed url=%s error=%s", source_url, e)
+        return False
+
+
+def _clean_latex(raw: bytes) -> str:
+    """Strip LaTeX markup and return readable plain text."""
+    text = raw.decode("utf-8", errors="ignore")
+    # Remove comments
+    text = re.sub(r"%[^\n]*", "", text)
+    # Unwrap common commands that enclose text: \cmd{content} → content
+    text = re.sub(r"\\(?:textbf|textit|emph|textrm|texttt|text|mbox)\{([^}]*)\}", r"\1", text)
+    # Remove remaining LaTeX commands (with or without braces)
+    text = re.sub(r"\\[a-zA-Z]+\*?\{[^}]*\}", "", text)
+    text = re.sub(r"\\[a-zA-Z]+\*?", " ", text)
+    # Remove leftover braces and math delimiters
+    text = re.sub(r"[{}$]", " ", text)
+    # Collapse whitespace
+    text = re.sub(r"\n{3,}", "\n\n", text)
+    text = re.sub(r"[ \t]+", " ", text)
+    return text.strip()
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s — %(message)s",
+    )
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--csv", required=True, help="Path to verified_ground_truth.csv")
+    parser.add_argument("--output-dir", default="./verified_ground_truth_data")
+    parser.add_argument("--workers", type=int, default=4)
+    args = parser.parse_args()
+
+    results = download_all(args.csv, args.output_dir, args.workers)
+    failed = sum(1 for r in results if r["status"] == "failed")
+    if failed:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/eu_fact_force/ingestion/data_collection/ground_truth.py b/eu_fact_force/ingestion/data_collection/ground_truth.py
new file mode 100644
index 0000000..86ed7a2
--- /dev/null
+++ b/eu_fact_force/ingestion/data_collection/ground_truth.py
@@ -0,0 +1,204 @@
+"""
+Collect verified ground truth articles from arXiv.
+
+"Verified" means the ground truth text is the official LaTeX source that the
+authors submitted — not extracted from a PDF. This gives clean, artifact-free
+reference text for parser quality evaluation.
+
+Usage:
+    python -m eu_fact_force.ingestion.data_collection.ground_truth \\
+        --output verified_ground_truth.csv \\
+        --vaccine-limit 30 \\
+        --other-limit 30
+"""
+
+import argparse
+import csv
+import logging
+import sys
+import xml.etree.ElementTree as ET
+from dataclasses import dataclass
+from typing import Optional
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+_ARXIV_API = "https://arxiv.org/api/query"
+_ATOM_NS = "http://www.w3.org/2005/Atom"
+
+CSV_FIELDS = [
+    "category",
+    "article_id",
+    "title",
+    "source",
+    "ground_truth_format",
+    "pdf_url",
+    "text_url",
+    "verification",
+]
+
+
+@dataclass
+class ArxivGroundTruth:
+    arxiv_id: str
+    title: str
+    category: str   # "vaccine_autism" | "other"
+
+    @property
+    def article_id(self) -> str:
+        return f"arxiv:{self.arxiv_id}"
+
+    @property
+    def pdf_url(self) -> str:
+        return f"https://arxiv.org/pdf/{self.arxiv_id}.pdf"
+
+    @property
+    def text_url(self) -> str:
+        # Official LaTeX source archive
+        return f"https://arxiv.org/src/{self.arxiv_id}"
+
+    def to_row(self) -> dict:
+        return {
+            "category": self.category,
+            "article_id": self.article_id,
+            "title": self.title,
+            "source": "arxiv",
+            "ground_truth_format": "arxiv_latex_source",
+            "pdf_url": self.pdf_url,
+            "text_url": self.text_url,
+            "verification": "Official arXiv LaTeX source (authors' original)",
+        }
+
+
+class ArxivGroundTruthCollector:
+    """Query the arXiv Atom API and return ArxivGroundTruth records."""
+
+    def search(
+        self,
+        query: str,
+        category: str,
+        limit: int,
+    ) -> list[ArxivGroundTruth]:
+        params = {
+            "search_query": query,
+            "max_results": limit * 2,  # fetch extra to account for parse failures
+            "sortBy": "submittedDate",
+            "sortOrder": "descending",
+        }
+        try:
+            resp = requests.get(_ARXIV_API, params=params, timeout=30)
+            resp.raise_for_status()
+        except Exception:
+            logger.exception("arxiv.request_failed query=%s", query[:60])
+            return []
+
+        root = ET.fromstring(resp.content)
+        articles: list[ArxivGroundTruth] = []
+
+        for entry in root.findall(f"{{{_ATOM_NS}}}entry"):
+            try:
+                arxiv_id = _parse_arxiv_id(entry)
+                title = _parse_title(entry)
+                if arxiv_id and title:
+                    articles.append(ArxivGroundTruth(arxiv_id, title, category))
+                    logger.debug("arxiv.found id=%s title=%s", arxiv_id, title[:60])
+            except Exception as e:
+                logger.debug("arxiv.parse_error error=%s", e)
+
+        result = articles[:limit]
+        logger.info("arxiv.search_done query=%s found=%d", query[:60], len(result))
+        return result
+
+
+def collect(
+    output_csv: str,
+    vaccine_limit: int = 30,
+    other_limit: int = 30,
+) -> list[ArxivGroundTruth]:
+    """
+    Collect verified ground truth articles and write to a CSV.
+
+    Searches for two categories:
+      - vaccine_autism: papers on vaccine safety / autism link
+      - other: general biomedical / ML-in-health papers (as contrast set)
+
+    Returns the list of collected articles.
+    """
+    collector = ArxivGroundTruthCollector()
+
+    vaccine_query = (
+        'cat:(q-bio.QM OR q-bio.CB OR stat.AP OR cs.CY) AND '
+        '(abs:"vaccine" AND abs:"autism" OR '
+        'abs:"vaccination" AND abs:"autism spectrum" OR '
+        'abs:"vaccine safety" AND abs:"autism")'
+    )
+    other_query = (
+        'cat:(q-bio.QM OR stat.AP) AND '
+        '(abs:"clinical trial" OR abs:"efficacy" OR abs:"treatment") '
+        'NOT abs:vaccine'
+    )
+
+    vaccine_articles = collector.search(vaccine_query, "vaccine_autism", vaccine_limit)
+    other_articles = collector.search(other_query, "other", other_limit)
+
+    all_articles = vaccine_articles + other_articles
+
+    with open(output_csv, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=CSV_FIELDS)
+        writer.writeheader()
+        for article in all_articles:
+            writer.writerow(article.to_row())
+
+    logger.info(
+        "ground_truth.saved csv=%s vaccine=%d other=%d total=%d",
+        output_csv, len(vaccine_articles), len(other_articles), len(all_articles),
+    )
+    print(f"Saved {len(all_articles)} articles to {output_csv}")
+    print(f"  vaccine_autism : {len(vaccine_articles)}")
+    print(f"  other          : {len(other_articles)}")
+    return all_articles
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _parse_arxiv_id(entry: ET.Element) -> Optional[str]:
+    id_elem = entry.find(f"{{{_ATOM_NS}}}id")
+    if id_elem is None or not id_elem.text:
+        return None
+    # URL form: https://arxiv.org/abs/2301.00001v1
+    return id_elem.text.strip().split("/abs/")[-1]
+
+
+def _parse_title(entry: ET.Element) -> Optional[str]:
+    title_elem = entry.find(f"{{{_ATOM_NS}}}title")
+    if title_elem is None or not title_elem.text:
+        return None
+    return " ".join(title_elem.text.split())  # normalise whitespace
+
+
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s %(levelname)s %(name)s — %(message)s",
+    )
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--output", default="verified_ground_truth.csv")
+    parser.add_argument("--vaccine-limit", type=int, default=30)
+    parser.add_argument("--other-limit", type=int, default=30)
+    args = parser.parse_args()
+
+    articles = collect(args.output, args.vaccine_limit, args.other_limit)
+    if not articles:
+        print("No articles collected.", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/verified_ground_truth.csv b/verified_ground_truth.csv
new file mode 100644
index 0000000..1c917b7
--- /dev/null
+++ b/verified_ground_truth.csv
@@ -0,0 +1,33 @@
+category,article_id,title,source,ground_truth_format,pdf_url,text_url,verification
+vaccine_autism,arxiv:1905.12616v3,Defending Against Neural Fake News,arxiv,arxiv_latex_source,https://arxiv.org/pdf/1905.12616v3.pdf,https://arxiv.org/src/1905.12616v3,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:1409.2651v1,Social determinants of content selection in the age of (mis)information,arxiv,arxiv_latex_source,https://arxiv.org/pdf/1409.2651v1.pdf,https://arxiv.org/src/1409.2651v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2105.05134v2,COVID-19 Vaccine Hesitancy on Social Media: Building a Public Twitter Dataset of Anti-vaccine Content Vaccine Misinformation and Conspiracies,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2105.05134v2.pdf,https://arxiv.org/src/2105.05134v2,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2106.08423v1,COVID-19 Vaccines: Characterizing Misinformation Campaigns and Vaccine Hesitancy on Twitter,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2106.08423v1.pdf,https://arxiv.org/src/2106.08423v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2311.18195v1,COVID-19 Vaccine Misinformation in Middle Income Countries,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2311.18195v1.pdf,https://arxiv.org/src/2311.18195v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2304.06858v1,Vax-Culture: A Dataset for Studying Vaccine Discourse on Twitter,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2304.06858v1.pdf,https://arxiv.org/src/2304.06858v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2402.01783v1,Hierarchical Multi-Label Classification of Online Vaccine Concerns,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2402.01783v1.pdf,https://arxiv.org/src/2402.01783v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2404.01669v1,How COVID-19 has Impacted the Anti-Vaccine Discourse: A Large-Scale Twitter Study Spanning Pre-COVID and Post-COVID Era,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2404.01669v1.pdf,https://arxiv.org/src/2404.01669v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2402.11351v2,Modeling the amplification of epidemic spread by individuals exposed to misinformation on social media,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2402.11351v2.pdf,https://arxiv.org/src/2402.11351v2,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2503.04572v1,Social Imitation Dynamics of Vaccination Driven by Vaccine Effectiveness and Beliefs,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2503.04572v1.pdf,https://arxiv.org/src/2503.04572v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2603.05626v1,The Impact of Neglecting Vaccine Unwillingness in Epidemiology Models,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2603.05626v1.pdf,https://arxiv.org/src/2603.05626v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2411.11813v1,Heterogeneous population and its resilience to misinformation in vaccination uptake: A dual ODE and network approach,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2411.11813v1.pdf,https://arxiv.org/src/2411.11813v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2410.18670v1,Health Misinformation in Social Networks: A Survey of IT Approaches,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2410.18670v1.pdf,https://arxiv.org/src/2410.18670v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2403.09349v1,From Pro Anti to Informative and Hesitant: An Infoveillance Study of COVID-19 Vaccines and Vaccination Discourse on Twitter,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2403.09349v1.pdf,https://arxiv.org/src/2403.09349v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2407.03190v2,Cutting Through the Noise to Motivate People: A Comprehensive Analysis of COVID-19 Social Media Posts De/motivating Vaccination,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2407.03190v2.pdf,https://arxiv.org/src/2407.03190v2,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2510.16359v1,Utilising Large Language Models for Generating Effective Counter Arguments to Anti-Vaccine Tweets,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2510.16359v1.pdf,https://arxiv.org/src/2510.16359v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2402.18335v1,Detecting Anti-vaccine Content on Twitter using Multiple Message-Based Network Representations,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2402.18335v1.pdf,https://arxiv.org/src/2402.18335v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2208.04491v1,Improving Vaccine Stance Detection by Combining Online and Offline Data,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2208.04491v1.pdf,https://arxiv.org/src/2208.04491v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2312.10626v1,Decoding Concerns: Multi-label Classification of Vaccine Sentiments in Social Media,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2312.10626v1.pdf,https://arxiv.org/src/2312.10626v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2110.11333v1,Detecting Anti-Vaccine Users on Twitter,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2110.11333v1.pdf,https://arxiv.org/src/2110.11333v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2106.04081v1,Insight from NLP Analysis: COVID-19 Vaccines Sentiments on Social Media,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2106.04081v1.pdf,https://arxiv.org/src/2106.04081v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2601.18377v1,Socioeconomic Determinants of the COVID-19 Infodemics,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2601.18377v1.pdf,https://arxiv.org/src/2601.18377v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2303.06433v1,Reinforcement Learning-based Counter-Misinformation Response Generation: A Case Study of COVID-19 Vaccine Misinformation,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2303.06433v1.pdf,https://arxiv.org/src/2303.06433v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2211.11495v1,Global misinformation spillovers in the online vaccination debate before and during COVID-19,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2211.11495v1.pdf,https://arxiv.org/src/2211.11495v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2208.01509v1,Characterizing Vaccination Movements on YouTube in the United States and Brazil,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2208.01509v1.pdf,https://arxiv.org/src/2208.01509v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2309.08503v1,HealthFC: Verifying Health Claims with Evidence-Based Medical Fact-Checking,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2309.08503v1.pdf,https://arxiv.org/src/2309.08503v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2010.09926v1,Explainable Automated Fact-Checking for Public Health Claims,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2010.09926v1.pdf,https://arxiv.org/src/2010.09926v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2310.19834v2,AMIR: An Automated MisInformation Rebuttal System — A COVID-19 Vaccination Datasets based Exposition,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2310.19834v2.pdf,https://arxiv.org/src/2310.19834v2,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2602.15476v1,How to Detect Information Voids Using Longitudinal Data from Social Media and Web Searches,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2602.15476v1.pdf,https://arxiv.org/src/2602.15476v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2311.11435v1,Unveiling Public Perceptions: Machine Learning-Based Sentiment Analysis of COVID-19 Vaccines in India,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2311.11435v1.pdf,https://arxiv.org/src/2311.11435v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2306.13797v1,An Analysis of Vaccine-Related Sentiments from Development to Deployment of COVID-19 Vaccines,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2306.13797v1.pdf,https://arxiv.org/src/2306.13797v1,Official arXiv LaTeX source (authors' original)
+vaccine_autism,arxiv:2107.10648v1,DEAP-FAKED: Knowledge Graph based Approach for Fake News Detection,arxiv,arxiv_latex_source,https://arxiv.org/pdf/2107.10648v1.pdf,https://arxiv.org/src/2107.10648v1,Official arXiv LaTeX source (authors' original)