From 12817e8e7617df0c06142c685c697b04e257dc30 Mon Sep 17 00:00:00 2001 From: Anne <125669618+agddrv@users.noreply.github.com> Date: Mon, 4 May 2026 15:57:49 +0200 Subject: [PATCH 1/5] =?UTF-8?q?Graphs=20plotly=20donn=C3=A9es=20PGP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit feat: ajout code python plotly vaccins à partir des données PGP --- pgpxd4g_graphs.py | 143 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 pgpxd4g_graphs.py diff --git a/pgpxd4g_graphs.py b/pgpxd4g_graphs.py new file mode 100644 index 0000000..9f677a1 --- /dev/null +++ b/pgpxd4g_graphs.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +"""PGPxD4G_graphs.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1A3xnBoI-FUp62eFTT0RQPpYPU-TrubZ0 +""" + +import numpy as np +import pandas as pd +import plotly.graph_objects as go +import plotly.express as px +from wordcloud import WordCloud + +DATA_PATH = "/content/PGP x D4G- Exported Vaccine Data.xlsx" # Correct to the actual Excel file + +# ── Load all sheets ──────────────────────────────────────────────── +xl = pd.ExcelFile(DATA_PATH) + +df_trend = pd.read_excel(xl, "Trendline") +df_trend.columns = ["date", "posts"] +df_trend["date"] = pd.to_datetime(df_trend["date"]) + +df_lang = pd.read_excel(xl, "Language") +df_lang.columns = ["language", "posts", "share"] +df_lang = df_lang.dropna(subset=["language", "posts"]) +df_lang = df_lang[df_lang["posts"].apply(lambda x: str(x).isdigit() or isinstance(x, (int, float)))] +df_lang["posts"] = pd.to_numeric(df_lang["posts"], errors="coerce") +df_lang = df_lang.dropna(subset=["posts"]).head(8) + +df_source = pd.read_excel(xl, "Source") +df_source.columns = ["source", "mentions"] +df_source["mentions"] = pd.to_numeric(df_source["mentions"], errors="coerce").fillna(0) +df_source = df_source.dropna(subset=["source", "mentions"]) + +df_themes_raw = pd.read_excel(xl, "Crosstab- themes across all vac") + +# Corrected processing for df_th +df_th = df_themes_raw.drop(columns=['Topics \\ Themes']).T.reset_index() +df_th.columns = ['theme', 'mentions'] +df_th['mentions'] = pd.to_numeric(df_th['mentions'], errors='coerce') +df_th = df_th.dropna(subset=["mentions"]) +df_th = df_th.sort_values("mentions", ascending=False) + +PALETTE = ["#1E5AA8", "#2F6FB6", "#5C8FD6", "#E6EEF8"] +BG = "#FFFFFF" +PAPER = "#FFFFFF" +FONT_COLOR = "#333333" +GRID_COLOR = "#E0E0E0" + + +def base_layout(title): + return dict( + title=dict(text=title, font=dict(size=16, color=FONT_COLOR), x=0.02), + paper_bgcolor=PAPER, + plot_bgcolor=BG, + font=dict(family="Inter, sans-serif", color=FONT_COLOR), + margin=dict(l=50, r=30, t=60, b=50), + ) + + +# ── 1. Daily post volume trendline ───────────────────────────── +fig_trend = go.Figure() +fig_trend.add_trace(go.Scatter( + x=df_trend["date"], y=df_trend["posts"], + mode="lines+markers", + line=dict(color=PALETTE[0], width=2), + marker=dict(size=5, color=PALETTE[0]), + fill="tozeroy", + fillcolor="rgba(30,90,168,0.15)", + name="Posts", + hovertemplate="%{x|%b %d}
%{y:,} posts", +)) +fig_trend.update_layout( + **base_layout("Daily Post Volume — March–April 2026"), + xaxis=dict(showgrid=False, tickformat="%b %d", tickcolor=GRID_COLOR), + yaxis=dict(showgrid=True, gridcolor=GRID_COLOR, tickformat=","), +) + +# ── 2. Language donut ─────────────────────────────────── +fig_lang = go.Figure(go.Pie( + labels=df_lang["language"], + values=df_lang["posts"], + hole=0.55, + marker=dict(colors=PALETTE), + textinfo="label+percent", + hovertemplate="%{label}
%{value:,} posts (%{percent})", +)) +fig_lang.update_layout( + **base_layout("Post Distribution by Language"), + showlegend=False, +) + +# Create a color list for df_source that matches its length +source_colors = [] +for i in range(len(df_source)): + source_colors.append(PALETTE[i % len(PALETTE)]) + +# ── 3. Source donut chart ─────────────────────────────────── +fig_source = go.Figure(go.Pie( + labels=df_source["source"].tolist(), + values=df_source["mentions"].tolist(), + hole=0.55, + marker=dict(colors=source_colors), # Use the explicit color list + textinfo="label", # Simplified from "label+percent" + hovertemplate="%{label}
%{value:,} mentions", # Simplified +)) +fig_source.update_layout( + **base_layout("Mentions by Platform"), + showlegend=False, +) + +# ── 4. Anti-vaccine themes treemap ───────────────────── +# Define a colorscale using the PALETTE in reverse order to map higher mentions to darker blues +gradient_colors = [[0, PALETTE[3]], [0.33, PALETTE[2]], [0.66, PALETTE[1]], [1, PALETTE[0]]] +fig_themes = go.Figure(go.Treemap( + labels=df_th["theme"], + parents=[""] * len(df_th), + values=df_th["mentions"], + # Assign the 'mentions' column to marker.colors for the gradient effect + marker=dict(colors=df_th["mentions"], colorscale=gradient_colors, showscale=False), + hovertemplate="%{label}
%{value:,} mentions (%{percent parent})", + textinfo="label+percent parent" +)) +fig_themes.update_layout(**base_layout("Anti-vaccine Themes — All Vaccines")) +fig_themes.update_layout(margin=dict(l=10, r=10, t=60, b=10)) + +# ── Preview all figures ──────────────────────────────── +if __name__ == "__main__": + for name, fig in [ + ("Trendline", fig_trend), + ("Language", fig_lang), + ("Source", fig_source), + ("Themes Treemap", fig_themes) + ]: + print(f"Attempting to display: {name}") + try: + fig.show() + print(f"Opened: {name}") + except Exception as e: + print(f"Error opening {name}: {e}") + print(f"Skipping {name} due to error.") \ No newline at end of file From 3aa5abcab39c36bb450a9701a40d01ecf6f85279 Mon Sep 17 00:00:00 2001 From: Anne <125669618+agddrv@users.noreply.github.com> Date: Mon, 4 May 2026 16:26:45 +0200 Subject: [PATCH 2/5] =?UTF-8?q?Plotly=20donn=C3=A9es=20PGP?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ajout du code python avec les graphs plotly à partir des données PGP --- eu_fact_force/dash-app/pgpxd4g_graphs.py | 143 +++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 eu_fact_force/dash-app/pgpxd4g_graphs.py diff --git a/eu_fact_force/dash-app/pgpxd4g_graphs.py b/eu_fact_force/dash-app/pgpxd4g_graphs.py new file mode 100644 index 0000000..9f677a1 --- /dev/null +++ b/eu_fact_force/dash-app/pgpxd4g_graphs.py @@ -0,0 +1,143 @@ +# -*- coding: utf-8 -*- +"""PGPxD4G_graphs.ipynb + +Automatically generated by Colab. + +Original file is located at + https://colab.research.google.com/drive/1A3xnBoI-FUp62eFTT0RQPpYPU-TrubZ0 +""" + +import numpy as np +import pandas as pd +import plotly.graph_objects as go +import plotly.express as px +from wordcloud import WordCloud + +DATA_PATH = "/content/PGP x D4G- Exported Vaccine Data.xlsx" # Correct to the actual Excel file + +# ── Load all sheets ──────────────────────────────────────────────── +xl = pd.ExcelFile(DATA_PATH) + +df_trend = pd.read_excel(xl, "Trendline") +df_trend.columns = ["date", "posts"] +df_trend["date"] = pd.to_datetime(df_trend["date"]) + +df_lang = pd.read_excel(xl, "Language") +df_lang.columns = ["language", "posts", "share"] +df_lang = df_lang.dropna(subset=["language", "posts"]) +df_lang = df_lang[df_lang["posts"].apply(lambda x: str(x).isdigit() or isinstance(x, (int, float)))] +df_lang["posts"] = pd.to_numeric(df_lang["posts"], errors="coerce") +df_lang = df_lang.dropna(subset=["posts"]).head(8) + +df_source = pd.read_excel(xl, "Source") +df_source.columns = ["source", "mentions"] +df_source["mentions"] = pd.to_numeric(df_source["mentions"], errors="coerce").fillna(0) +df_source = df_source.dropna(subset=["source", "mentions"]) + +df_themes_raw = pd.read_excel(xl, "Crosstab- themes across all vac") + +# Corrected processing for df_th +df_th = df_themes_raw.drop(columns=['Topics \\ Themes']).T.reset_index() +df_th.columns = ['theme', 'mentions'] +df_th['mentions'] = pd.to_numeric(df_th['mentions'], errors='coerce') +df_th = df_th.dropna(subset=["mentions"]) +df_th = df_th.sort_values("mentions", ascending=False) + +PALETTE = ["#1E5AA8", "#2F6FB6", "#5C8FD6", "#E6EEF8"] +BG = "#FFFFFF" +PAPER = "#FFFFFF" +FONT_COLOR = "#333333" +GRID_COLOR = "#E0E0E0" + + +def base_layout(title): + return dict( + title=dict(text=title, font=dict(size=16, color=FONT_COLOR), x=0.02), + paper_bgcolor=PAPER, + plot_bgcolor=BG, + font=dict(family="Inter, sans-serif", color=FONT_COLOR), + margin=dict(l=50, r=30, t=60, b=50), + ) + + +# ── 1. Daily post volume trendline ───────────────────────────── +fig_trend = go.Figure() +fig_trend.add_trace(go.Scatter( + x=df_trend["date"], y=df_trend["posts"], + mode="lines+markers", + line=dict(color=PALETTE[0], width=2), + marker=dict(size=5, color=PALETTE[0]), + fill="tozeroy", + fillcolor="rgba(30,90,168,0.15)", + name="Posts", + hovertemplate="%{x|%b %d}
%{y:,} posts", +)) +fig_trend.update_layout( + **base_layout("Daily Post Volume — March–April 2026"), + xaxis=dict(showgrid=False, tickformat="%b %d", tickcolor=GRID_COLOR), + yaxis=dict(showgrid=True, gridcolor=GRID_COLOR, tickformat=","), +) + +# ── 2. Language donut ─────────────────────────────────── +fig_lang = go.Figure(go.Pie( + labels=df_lang["language"], + values=df_lang["posts"], + hole=0.55, + marker=dict(colors=PALETTE), + textinfo="label+percent", + hovertemplate="%{label}
%{value:,} posts (%{percent})", +)) +fig_lang.update_layout( + **base_layout("Post Distribution by Language"), + showlegend=False, +) + +# Create a color list for df_source that matches its length +source_colors = [] +for i in range(len(df_source)): + source_colors.append(PALETTE[i % len(PALETTE)]) + +# ── 3. Source donut chart ─────────────────────────────────── +fig_source = go.Figure(go.Pie( + labels=df_source["source"].tolist(), + values=df_source["mentions"].tolist(), + hole=0.55, + marker=dict(colors=source_colors), # Use the explicit color list + textinfo="label", # Simplified from "label+percent" + hovertemplate="%{label}
%{value:,} mentions", # Simplified +)) +fig_source.update_layout( + **base_layout("Mentions by Platform"), + showlegend=False, +) + +# ── 4. Anti-vaccine themes treemap ───────────────────── +# Define a colorscale using the PALETTE in reverse order to map higher mentions to darker blues +gradient_colors = [[0, PALETTE[3]], [0.33, PALETTE[2]], [0.66, PALETTE[1]], [1, PALETTE[0]]] +fig_themes = go.Figure(go.Treemap( + labels=df_th["theme"], + parents=[""] * len(df_th), + values=df_th["mentions"], + # Assign the 'mentions' column to marker.colors for the gradient effect + marker=dict(colors=df_th["mentions"], colorscale=gradient_colors, showscale=False), + hovertemplate="%{label}
%{value:,} mentions (%{percent parent})", + textinfo="label+percent parent" +)) +fig_themes.update_layout(**base_layout("Anti-vaccine Themes — All Vaccines")) +fig_themes.update_layout(margin=dict(l=10, r=10, t=60, b=10)) + +# ── Preview all figures ──────────────────────────────── +if __name__ == "__main__": + for name, fig in [ + ("Trendline", fig_trend), + ("Language", fig_lang), + ("Source", fig_source), + ("Themes Treemap", fig_themes) + ]: + print(f"Attempting to display: {name}") + try: + fig.show() + print(f"Opened: {name}") + except Exception as e: + print(f"Error opening {name}: {e}") + print(f"Skipping {name} due to error.") \ No newline at end of file From eab04b2397bd073fb29e413d32c4fe5a9032dc9b Mon Sep 17 00:00:00 2001 From: Anne <125669618+agddrv@users.noreply.github.com> Date: Mon, 4 May 2026 16:28:18 +0200 Subject: [PATCH 3/5] Delete pgpxd4g_graphs.py File deleted because added in another folder --- pgpxd4g_graphs.py | 143 ---------------------------------------------- 1 file changed, 143 deletions(-) delete mode 100644 pgpxd4g_graphs.py diff --git a/pgpxd4g_graphs.py b/pgpxd4g_graphs.py deleted file mode 100644 index 9f677a1..0000000 --- a/pgpxd4g_graphs.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- coding: utf-8 -*- -"""PGPxD4G_graphs.ipynb - -Automatically generated by Colab. - -Original file is located at - https://colab.research.google.com/drive/1A3xnBoI-FUp62eFTT0RQPpYPU-TrubZ0 -""" - -import numpy as np -import pandas as pd -import plotly.graph_objects as go -import plotly.express as px -from wordcloud import WordCloud - -DATA_PATH = "/content/PGP x D4G- Exported Vaccine Data.xlsx" # Correct to the actual Excel file - -# ── Load all sheets ──────────────────────────────────────────────── -xl = pd.ExcelFile(DATA_PATH) - -df_trend = pd.read_excel(xl, "Trendline") -df_trend.columns = ["date", "posts"] -df_trend["date"] = pd.to_datetime(df_trend["date"]) - -df_lang = pd.read_excel(xl, "Language") -df_lang.columns = ["language", "posts", "share"] -df_lang = df_lang.dropna(subset=["language", "posts"]) -df_lang = df_lang[df_lang["posts"].apply(lambda x: str(x).isdigit() or isinstance(x, (int, float)))] -df_lang["posts"] = pd.to_numeric(df_lang["posts"], errors="coerce") -df_lang = df_lang.dropna(subset=["posts"]).head(8) - -df_source = pd.read_excel(xl, "Source") -df_source.columns = ["source", "mentions"] -df_source["mentions"] = pd.to_numeric(df_source["mentions"], errors="coerce").fillna(0) -df_source = df_source.dropna(subset=["source", "mentions"]) - -df_themes_raw = pd.read_excel(xl, "Crosstab- themes across all vac") - -# Corrected processing for df_th -df_th = df_themes_raw.drop(columns=['Topics \\ Themes']).T.reset_index() -df_th.columns = ['theme', 'mentions'] -df_th['mentions'] = pd.to_numeric(df_th['mentions'], errors='coerce') -df_th = df_th.dropna(subset=["mentions"]) -df_th = df_th.sort_values("mentions", ascending=False) - -PALETTE = ["#1E5AA8", "#2F6FB6", "#5C8FD6", "#E6EEF8"] -BG = "#FFFFFF" -PAPER = "#FFFFFF" -FONT_COLOR = "#333333" -GRID_COLOR = "#E0E0E0" - - -def base_layout(title): - return dict( - title=dict(text=title, font=dict(size=16, color=FONT_COLOR), x=0.02), - paper_bgcolor=PAPER, - plot_bgcolor=BG, - font=dict(family="Inter, sans-serif", color=FONT_COLOR), - margin=dict(l=50, r=30, t=60, b=50), - ) - - -# ── 1. Daily post volume trendline ───────────────────────────── -fig_trend = go.Figure() -fig_trend.add_trace(go.Scatter( - x=df_trend["date"], y=df_trend["posts"], - mode="lines+markers", - line=dict(color=PALETTE[0], width=2), - marker=dict(size=5, color=PALETTE[0]), - fill="tozeroy", - fillcolor="rgba(30,90,168,0.15)", - name="Posts", - hovertemplate="%{x|%b %d}
%{y:,} posts", -)) -fig_trend.update_layout( - **base_layout("Daily Post Volume — March–April 2026"), - xaxis=dict(showgrid=False, tickformat="%b %d", tickcolor=GRID_COLOR), - yaxis=dict(showgrid=True, gridcolor=GRID_COLOR, tickformat=","), -) - -# ── 2. Language donut ─────────────────────────────────── -fig_lang = go.Figure(go.Pie( - labels=df_lang["language"], - values=df_lang["posts"], - hole=0.55, - marker=dict(colors=PALETTE), - textinfo="label+percent", - hovertemplate="%{label}
%{value:,} posts (%{percent})", -)) -fig_lang.update_layout( - **base_layout("Post Distribution by Language"), - showlegend=False, -) - -# Create a color list for df_source that matches its length -source_colors = [] -for i in range(len(df_source)): - source_colors.append(PALETTE[i % len(PALETTE)]) - -# ── 3. Source donut chart ─────────────────────────────────── -fig_source = go.Figure(go.Pie( - labels=df_source["source"].tolist(), - values=df_source["mentions"].tolist(), - hole=0.55, - marker=dict(colors=source_colors), # Use the explicit color list - textinfo="label", # Simplified from "label+percent" - hovertemplate="%{label}
%{value:,} mentions", # Simplified -)) -fig_source.update_layout( - **base_layout("Mentions by Platform"), - showlegend=False, -) - -# ── 4. Anti-vaccine themes treemap ───────────────────── -# Define a colorscale using the PALETTE in reverse order to map higher mentions to darker blues -gradient_colors = [[0, PALETTE[3]], [0.33, PALETTE[2]], [0.66, PALETTE[1]], [1, PALETTE[0]]] -fig_themes = go.Figure(go.Treemap( - labels=df_th["theme"], - parents=[""] * len(df_th), - values=df_th["mentions"], - # Assign the 'mentions' column to marker.colors for the gradient effect - marker=dict(colors=df_th["mentions"], colorscale=gradient_colors, showscale=False), - hovertemplate="%{label}
%{value:,} mentions (%{percent parent})", - textinfo="label+percent parent" -)) -fig_themes.update_layout(**base_layout("Anti-vaccine Themes — All Vaccines")) -fig_themes.update_layout(margin=dict(l=10, r=10, t=60, b=10)) - -# ── Preview all figures ──────────────────────────────── -if __name__ == "__main__": - for name, fig in [ - ("Trendline", fig_trend), - ("Language", fig_lang), - ("Source", fig_source), - ("Themes Treemap", fig_themes) - ]: - print(f"Attempting to display: {name}") - try: - fig.show() - print(f"Opened: {name}") - except Exception as e: - print(f"Error opening {name}: {e}") - print(f"Skipping {name} due to error.") \ No newline at end of file From 81d12754829219656a741ce423cea4d447bb8e3e Mon Sep 17 00:00:00 2001 From: Anne <125669618+agddrv@users.noreply.github.com> Date: Mon, 4 May 2026 16:40:05 +0200 Subject: [PATCH 4/5] Update pgpxd4g_graphs.py Modified the file so we can create the pull request --- eu_fact_force/dash-app/pgpxd4g_graphs.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/eu_fact_force/dash-app/pgpxd4g_graphs.py b/eu_fact_force/dash-app/pgpxd4g_graphs.py index 9f677a1..4933673 100644 --- a/eu_fact_force/dash-app/pgpxd4g_graphs.py +++ b/eu_fact_force/dash-app/pgpxd4g_graphs.py @@ -1,19 +1,7 @@ -# -*- coding: utf-8 -*- -"""PGPxD4G_graphs.ipynb - -Automatically generated by Colab. - -Original file is located at - https://colab.research.google.com/drive/1A3xnBoI-FUp62eFTT0RQPpYPU-TrubZ0 -""" - -import numpy as np import pandas as pd import plotly.graph_objects as go -import plotly.express as px -from wordcloud import WordCloud -DATA_PATH = "/content/PGP x D4G- Exported Vaccine Data.xlsx" # Correct to the actual Excel file +DATA_PATH = "data/PGP x D4G- Exported Vaccine Data.xlsx" # Correct to the actual Excel file # ── Load all sheets ──────────────────────────────────────────────── xl = pd.ExcelFile(DATA_PATH) @@ -103,8 +91,8 @@ def base_layout(title): values=df_source["mentions"].tolist(), hole=0.55, marker=dict(colors=source_colors), # Use the explicit color list - textinfo="label", # Simplified from "label+percent" - hovertemplate="%{label}
%{value:,} mentions", # Simplified + textinfo="label+percent", # Reverted to include percentage + hovertemplate="%{label}
%{value:,} mentions (%{percent})", # Reverted to include percentage )) fig_source.update_layout( **base_layout("Mentions by Platform"), @@ -140,4 +128,4 @@ def base_layout(title): print(f"Opened: {name}") except Exception as e: print(f"Error opening {name}: {e}") - print(f"Skipping {name} due to error.") \ No newline at end of file + print(f"Skipping {name} due to error.") From c1855d4f686f02f510cbbd7de25b2a9e4a29d5ec Mon Sep 17 00:00:00 2001 From: mahautdd Date: Mon, 4 May 2026 23:46:48 +0200 Subject: [PATCH 5/5] Create visualisation_trendline.py Visualisation of the trendline data from excel source --- visualisation_trendline.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 visualisation_trendline.py diff --git a/visualisation_trendline.py b/visualisation_trendline.py new file mode 100644 index 0000000..3bd6013 --- /dev/null +++ b/visualisation_trendline.py @@ -0,0 +1,33 @@ +# trendline data +file_path = "PGP x D4G- Exported Vaccine Data.xlsx" # excel source to be modified + +trendline = pd.read_excel(file_path, sheet_name="Trendline") + +# colors creation +primary = "#1E5AA8" +secondary = "#2F6FB6" +background = "#E6EEF8" + +# graph creation +import plotly.graph_objects as go + +fig = go.Figure() + +fig.add_trace(go.Scatter( + x=trendline["Publication Date (GMT+01:00) London"], + y=trendline["Posts"], + name="Posts count evolution", + line=dict(color=primary, width=2), + marker=dict(size=6, color=secondary), + hovertemplate="Date: %{x}
Posts: %{y}" +)) + +fig.update_layout( + title="Posts count evolution in the last month", + plot_bgcolor="white", + paper_bgcolor=background, + xaxis_title="Date", + yaxis_title="Number of posts", +) + +fig.show()