From 9ab882640573d969dfddf4a88804ee206c4bbc95 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 23 Mar 2025 14:28:47 +0100 Subject: [PATCH 01/11] fix: config handling --- spec/support/shared_examples/config.yml_spec.rb | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spec/support/shared_examples/config.yml_spec.rb b/spec/support/shared_examples/config.yml_spec.rb index 31f6478..e29b312 100644 --- a/spec/support/shared_examples/config.yml_spec.rb +++ b/spec/support/shared_examples/config.yml_spec.rb @@ -18,9 +18,11 @@ end let(:config) do feed_name = file_path.split(File::Separator)[-2..].join(File::Separator) - feed_config = Html2rss::Configs.find_by_name(feed_name) + config = Html2rss::Configs.find_by_name(feed_name) - Html2rss::Config.new(feed_config, global_config, (params || {})) + config.merge!(global_config) + config[:params] = params if params + config end context 'with the file' do From caa5a6e5a212a214af307c79bcad1cb0a08e19f5 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 23 Mar 2025 16:21:35 +0100 Subject: [PATCH 02/11] test: improve error messages for fetched config --- .../shared_examples/config.yml_spec.rb | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/spec/support/shared_examples/config.yml_spec.rb b/spec/support/shared_examples/config.yml_spec.rb index e29b312..9b91d62 100644 --- a/spec/support/shared_examples/config.yml_spec.rb +++ b/spec/support/shared_examples/config.yml_spec.rb @@ -100,14 +100,29 @@ subject(:feed) { Html2rss.feed(config) } it 'has positive amount of items' do - expect(feed.items.count).to be_positive + expect(feed.items.count).to be_positive, <<~MSG + No items fetched. + Check the feed URL and selectors in `#{file_name}`. + + # #{file_name} + #{config} + + # resulted in RSS: + #{feed} + MSG end end context "when fetching #{params} / item", :fetch do - subject(:item) { Html2rss.feed(config).items.first } + subject(:item) do + items = Html2rss.feed(config).items + + expect(items.count).not_to be_zero, "Zero items fetched for `#{file_name}`" + + items.shift + end - let(:specified_attributes) { config.item_selector_names & %w[title description author category] } + let(:specified_attributes) { Html2rss::Selectors::ITEM_TAGS & %w[title description author category] } let(:text_attributes) { specified_attributes & %w[title description author] } it 'has no empty text attributes', :aggregate_failures do From 9a9101f448d1b418b705d5051ba4b67e16cd0964 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 23 Mar 2025 14:29:48 +0100 Subject: [PATCH 03/11] feat: migrate to published_at --- lib/html2rss/configs/cleanenergywire.org/news.yml | 2 +- lib/html2rss/configs/dfs.de/pressemitteilungen.yml | 2 +- lib/html2rss/configs/dsw-info.de/presse.yml | 2 +- lib/html2rss/configs/ifo.de/newsroom.yml | 2 +- .../pankow.lebensmittel-kontrollergebnisse.de/search.yml | 2 +- lib/html2rss/configs/spiegel.de/impressum_autor.yml | 2 +- lib/html2rss/configs/steuerzahler.de/news.yml | 2 +- lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml | 2 +- lib/html2rss/configs/support.apple.com/exchange_repair.yml | 2 +- spec/support/shared_examples/config.yml_spec.rb | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/lib/html2rss/configs/cleanenergywire.org/news.yml b/lib/html2rss/configs/cleanenergywire.org/news.yml index 74a320a..f429adf 100644 --- a/lib/html2rss/configs/cleanenergywire.org/news.yml +++ b/lib/html2rss/configs/cleanenergywire.org/news.yml @@ -10,7 +10,7 @@ selectors: link: selector: "h3 a" extractor: "href" - updated: + published_at: selector: ".date-display-single" post_process: name: parse_time diff --git a/lib/html2rss/configs/dfs.de/pressemitteilungen.yml b/lib/html2rss/configs/dfs.de/pressemitteilungen.yml index ab4b26b..48561d6 100644 --- a/lib/html2rss/configs/dfs.de/pressemitteilungen.yml +++ b/lib/html2rss/configs/dfs.de/pressemitteilungen.yml @@ -11,7 +11,7 @@ selectors: link: selector: "a" extractor: "href" - updated: + published_at: selector: "time" extractor: "attribute" attribute: "datetime" diff --git a/lib/html2rss/configs/dsw-info.de/presse.yml b/lib/html2rss/configs/dsw-info.de/presse.yml index bf85d39..a2838af 100644 --- a/lib/html2rss/configs/dsw-info.de/presse.yml +++ b/lib/html2rss/configs/dsw-info.de/presse.yml @@ -11,7 +11,7 @@ selectors: link: selector: "a" extractor: "href" - updated: + published_at: selector: ".vd small" post_process: - name: parse_time diff --git a/lib/html2rss/configs/ifo.de/newsroom.yml b/lib/html2rss/configs/ifo.de/newsroom.yml index d8e77cd..dfc20a1 100644 --- a/lib/html2rss/configs/ifo.de/newsroom.yml +++ b/lib/html2rss/configs/ifo.de/newsroom.yml @@ -23,7 +23,7 @@ selectors: selector: "field_kurztext" post_process: - name: sanitize_html - updated: + published_at: selector: "field_date default" post_process: name: parse_time diff --git a/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml b/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml index d4a270b..0a75b22 100644 --- a/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml +++ b/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml @@ -32,7 +32,7 @@ selectors: post_process: - name: substring start: 10 - update: + published_at: selector: ".bewertung-panel .row:nth-child(1)" post_process: - name: substring diff --git a/lib/html2rss/configs/spiegel.de/impressum_autor.yml b/lib/html2rss/configs/spiegel.de/impressum_autor.yml index fb89563..2ef1f02 100644 --- a/lib/html2rss/configs/spiegel.de/impressum_autor.yml +++ b/lib/html2rss/configs/spiegel.de/impressum_autor.yml @@ -15,7 +15,7 @@ selectors: extractor: "href" descripton: selector: ".leading-loose" - updated: + published_at: selector: "footer" extractor: text post_process: diff --git a/lib/html2rss/configs/steuerzahler.de/news.yml b/lib/html2rss/configs/steuerzahler.de/news.yml index fcc84d7..5c80f69 100644 --- a/lib/html2rss/configs/steuerzahler.de/news.yml +++ b/lib/html2rss/configs/steuerzahler.de/news.yml @@ -13,7 +13,7 @@ selectors: extractor: "href" description: selector: ".bdst_presslist__block__teaser" - updated: + published_at: selector: ".bdst_presslist__block__date" post_process: name: "parse_time" diff --git a/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml b/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml index 3cd9e0f..5418935 100644 --- a/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml +++ b/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml @@ -14,7 +14,7 @@ selectors: extractor: href description: selector: td:nth-child(2) - updated: + published_at: selector: td:nth-child(3) post_process: - name: parse_time diff --git a/lib/html2rss/configs/support.apple.com/exchange_repair.yml b/lib/html2rss/configs/support.apple.com/exchange_repair.yml index 164960d..4ee70cb 100644 --- a/lib/html2rss/configs/support.apple.com/exchange_repair.yml +++ b/lib/html2rss/configs/support.apple.com/exchange_repair.yml @@ -14,7 +14,7 @@ selectors: selector: "img" extractor: "attribute" attribute: "src" - updated: + published_at: selector: ".note" post_process: - name: parse_time diff --git a/spec/support/shared_examples/config.yml_spec.rb b/spec/support/shared_examples/config.yml_spec.rb index 9b91d62..7dc8337 100644 --- a/spec/support/shared_examples/config.yml_spec.rb +++ b/spec/support/shared_examples/config.yml_spec.rb @@ -18,7 +18,7 @@ end let(:config) do feed_name = file_path.split(File::Separator)[-2..].join(File::Separator) - config = Html2rss::Configs.find_by_name(feed_name) + config = {}.merge Html2rss::Configs.find_by_name(feed_name) config.merge!(global_config) config[:params] = params if params From aba8d8680901c1ace057dcf7e1ea035ce701ece8 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 23 Mar 2025 14:35:16 +0100 Subject: [PATCH 04/11] feat: migrate from link to url --- lib/html2rss/configs/adfc.de/pressemitteilungen.yml | 2 +- lib/html2rss/configs/apnews.com/hub.yml | 2 +- lib/html2rss/configs/avherald.com/index.yml | 2 +- lib/html2rss/configs/bbc.co.uk/available_episodes.yml | 2 +- lib/html2rss/configs/bbc.com/mundo.yml | 2 +- lib/html2rss/configs/bbc.com/news_stories.yml | 2 +- lib/html2rss/configs/canarianweekly.com/front.yml | 2 +- lib/html2rss/configs/cinemascore.com/index.yml | 2 +- lib/html2rss/configs/cleanenergywire.org/news.yml | 2 +- lib/html2rss/configs/cnet.com/section_sub.yml | 2 +- lib/html2rss/configs/computerbase.de/meistgelesen.yml | 2 +- lib/html2rss/configs/cutle.fish/index.yml | 2 +- lib/html2rss/configs/deraktionaer.de/meistgelesen.yml | 2 +- .../tutorials_data_documentation_technotes_json.yml | 2 +- lib/html2rss/configs/dfs.de/pressemitteilungen.yml | 2 +- lib/html2rss/configs/dsw-info.de/presse.yml | 2 +- lib/html2rss/configs/espn.com/f1.yml | 2 +- lib/html2rss/configs/fia.com/documents.yml | 2 +- lib/html2rss/configs/formula1.com/latest.yml | 2 +- lib/html2rss/configs/github.com/releases.yml | 2 +- lib/html2rss/configs/iaapa.org/news.yml | 2 +- lib/html2rss/configs/ifo.de/newsroom.yml | 2 +- lib/html2rss/configs/imdb.com/ratings.yml | 2 +- .../configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml | 2 +- lib/html2rss/configs/kinocheck.de/filmstarts.yml | 2 +- lib/html2rss/configs/metacritic.com/movies_release_date.yml | 2 +- lib/html2rss/configs/newyorker.com/magazine.yml | 2 +- lib/html2rss/configs/nomanssky.com/news.yml | 2 +- .../pankow.lebensmittel-kontrollergebnisse.de/search.yml | 2 +- lib/html2rss/configs/philomag.de/index.yml | 2 +- lib/html2rss/configs/phys.org/weekly.yml | 2 +- lib/html2rss/configs/pinboard.in/user.yml | 2 +- lib/html2rss/configs/rbb24.de/meistgeklickt.yml | 2 +- lib/html2rss/configs/rnd.de/themen.yml | 2 +- lib/html2rss/configs/robinwood.de/aktuelles.yml | 2 +- lib/html2rss/configs/s3.amazonaws.com/popular_movies.yml | 2 +- lib/html2rss/configs/softwareleadweekly.com/issues.yml | 2 +- lib/html2rss/configs/solarthermalworld.org/news.yml | 2 +- lib/html2rss/configs/spektrum.de/meistgelesen.yml | 2 +- lib/html2rss/configs/spiegel.de/impressum_autor.yml | 2 +- .../configs/stackoverflow.com/hot_network_questions.yml | 2 +- lib/html2rss/configs/steuerzahler.de/news.yml | 2 +- lib/html2rss/configs/stripes.com/index.yml | 2 +- lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml | 2 +- lib/html2rss/configs/support.apple.com/exchange_repair.yml | 2 +- lib/html2rss/configs/teneriffa-news.com/news.yml | 2 +- lib/html2rss/configs/test.de/archiv.yml | 2 +- .../configs/theguardian.com/international_mostpopular.yml | 2 +- lib/html2rss/configs/thoughtworks.com/insights.yml | 2 +- .../tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml | 2 +- lib/html2rss/configs/webentwickler-jobs.de/in.yml | 2 +- spec/support/shared_examples/config.yml_spec.rb | 2 +- 52 files changed, 52 insertions(+), 52 deletions(-) diff --git a/lib/html2rss/configs/adfc.de/pressemitteilungen.yml b/lib/html2rss/configs/adfc.de/pressemitteilungen.yml index d903823..9faac76 100644 --- a/lib/html2rss/configs/adfc.de/pressemitteilungen.yml +++ b/lib/html2rss/configs/adfc.de/pressemitteilungen.yml @@ -8,7 +8,7 @@ selectors: selector: ".articleteasercontainer .articleteaser" title: selector: ".articleteaser-header-container h4" - link: + url: selector: "a:first" extractor: "href" description: diff --git a/lib/html2rss/configs/apnews.com/hub.yml b/lib/html2rss/configs/apnews.com/hub.yml index 9cc0f87..07d3a66 100644 --- a/lib/html2rss/configs/apnews.com/hub.yml +++ b/lib/html2rss/configs/apnews.com/hub.yml @@ -9,7 +9,7 @@ selectors: selector: ".PagePromo" title: selector: h3 - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/avherald.com/index.yml b/lib/html2rss/configs/avherald.com/index.yml index 400ca33..dfb6444 100644 --- a/lib/html2rss/configs/avherald.com/index.yml +++ b/lib/html2rss/configs/avherald.com/index.yml @@ -9,5 +9,5 @@ selectors: selector: "table table a" title: selector: span - link: + url: extractor: href diff --git a/lib/html2rss/configs/bbc.co.uk/available_episodes.yml b/lib/html2rss/configs/bbc.co.uk/available_episodes.yml index 746898d..76056e8 100644 --- a/lib/html2rss/configs/bbc.co.uk/available_episodes.yml +++ b/lib/html2rss/configs/bbc.co.uk/available_episodes.yml @@ -7,7 +7,7 @@ selectors: selector: ".highlight-box-wrapper > div" title: selector: ".programme__title" - link: + url: selector: ".programme__titles > a" extractor: "href" description: diff --git a/lib/html2rss/configs/bbc.com/mundo.yml b/lib/html2rss/configs/bbc.com/mundo.yml index a510a4a..8dcb8e4 100644 --- a/lib/html2rss/configs/bbc.com/mundo.yml +++ b/lib/html2rss/configs/bbc.com/mundo.yml @@ -9,7 +9,7 @@ selectors: selector: '[data-testid="topic-promos"] > li' title: selector: a - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/bbc.com/news_stories.yml b/lib/html2rss/configs/bbc.com/news_stories.yml index 1fba0af..bbdd396 100644 --- a/lib/html2rss/configs/bbc.com/news_stories.yml +++ b/lib/html2rss/configs/bbc.com/news_stories.yml @@ -8,6 +8,6 @@ selectors: selector: ".gel-layout__item" title: selector: "h3" - link: + url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/canarianweekly.com/front.yml b/lib/html2rss/configs/canarianweekly.com/front.yml index 48ba33a..6e147aa 100644 --- a/lib/html2rss/configs/canarianweekly.com/front.yml +++ b/lib/html2rss/configs/canarianweekly.com/front.yml @@ -8,6 +8,6 @@ selectors: selector: ".article__entry" title: selector: "h5 > a" - link: + url: selector: "h5 > a" extractor: "href" diff --git a/lib/html2rss/configs/cinemascore.com/index.yml b/lib/html2rss/configs/cinemascore.com/index.yml index 3f832ea..f08c046 100644 --- a/lib/html2rss/configs/cinemascore.com/index.yml +++ b/lib/html2rss/configs/cinemascore.com/index.yml @@ -20,7 +20,7 @@ selectors: selector: "grade" movie_title: selector: "movie_title" - link: + url: post_process: - name: "template" string: "https://www.metacritic.com/search/movie/%{movie_title}/results" diff --git a/lib/html2rss/configs/cleanenergywire.org/news.yml b/lib/html2rss/configs/cleanenergywire.org/news.yml index f429adf..efec1c9 100644 --- a/lib/html2rss/configs/cleanenergywire.org/news.yml +++ b/lib/html2rss/configs/cleanenergywire.org/news.yml @@ -7,7 +7,7 @@ selectors: selector: ".view-content article" title: selector: "h3 a" - link: + url: selector: "h3 a" extractor: "href" published_at: diff --git a/lib/html2rss/configs/cnet.com/section_sub.yml b/lib/html2rss/configs/cnet.com/section_sub.yml index b61b21c..64d17e6 100644 --- a/lib/html2rss/configs/cnet.com/section_sub.yml +++ b/lib/html2rss/configs/cnet.com/section_sub.yml @@ -9,7 +9,7 @@ selectors: selector: ".c-storiesNeonHighlightsCard" title: selector: ".g-text-bold" - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/computerbase.de/meistgelesen.yml b/lib/html2rss/configs/computerbase.de/meistgelesen.yml index f8a89f1..6e397a5 100644 --- a/lib/html2rss/configs/computerbase.de/meistgelesen.yml +++ b/lib/html2rss/configs/computerbase.de/meistgelesen.yml @@ -14,6 +14,6 @@ selectors: string: "%{context}: %{self}" context: selector: ".charts__context" - link: + url: selector: ".charts__link" extractor: "href" diff --git a/lib/html2rss/configs/cutle.fish/index.yml b/lib/html2rss/configs/cutle.fish/index.yml index 8508782..95ac828 100644 --- a/lib/html2rss/configs/cutle.fish/index.yml +++ b/lib/html2rss/configs/cutle.fish/index.yml @@ -8,7 +8,7 @@ selectors: selector: main > div > div title: selector: a - link: + url: selector: a extractor: href description: diff --git a/lib/html2rss/configs/deraktionaer.de/meistgelesen.yml b/lib/html2rss/configs/deraktionaer.de/meistgelesen.yml index e7a9579..c338b25 100644 --- a/lib/html2rss/configs/deraktionaer.de/meistgelesen.yml +++ b/lib/html2rss/configs/deraktionaer.de/meistgelesen.yml @@ -9,7 +9,7 @@ selectors: selector: "#most-viewed ol > li" title: selector: "> a" - link: + url: selector: "> a" extractor: "href" isin: diff --git a/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml b/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml index 1b4da48..81dc42b 100644 --- a/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml +++ b/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml @@ -9,7 +9,7 @@ selectors: selector: " references > object > * > object" title: selector: title - link: + url: selector: url post_process: - name: parse_uri diff --git a/lib/html2rss/configs/dfs.de/pressemitteilungen.yml b/lib/html2rss/configs/dfs.de/pressemitteilungen.yml index 48561d6..3f10ed4 100644 --- a/lib/html2rss/configs/dfs.de/pressemitteilungen.yml +++ b/lib/html2rss/configs/dfs.de/pressemitteilungen.yml @@ -8,7 +8,7 @@ selectors: selector: ".article-teaser-list article" title: selector: ".headline" - link: + url: selector: "a" extractor: "href" published_at: diff --git a/lib/html2rss/configs/dsw-info.de/presse.yml b/lib/html2rss/configs/dsw-info.de/presse.yml index a2838af..a8e5d05 100644 --- a/lib/html2rss/configs/dsw-info.de/presse.yml +++ b/lib/html2rss/configs/dsw-info.de/presse.yml @@ -8,7 +8,7 @@ selectors: selector: ".cesprop-0 .container.zw-01 .frame:nth-child(2) ul > li" title: selector: "a" - link: + url: selector: "a" extractor: "href" published_at: diff --git a/lib/html2rss/configs/espn.com/f1.yml b/lib/html2rss/configs/espn.com/f1.yml index c7470f0..68f32a8 100644 --- a/lib/html2rss/configs/espn.com/f1.yml +++ b/lib/html2rss/configs/espn.com/f1.yml @@ -7,6 +7,6 @@ selectors: selector: ".headlineStack__list > li" title: selector: "a" - link: + url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/fia.com/documents.yml b/lib/html2rss/configs/fia.com/documents.yml index 87a2c98..f44e392 100644 --- a/lib/html2rss/configs/fia.com/documents.yml +++ b/lib/html2rss/configs/fia.com/documents.yml @@ -8,7 +8,7 @@ selectors: selector: ".decision-document-list .document-row" title: selector: ".title" - link: + url: selector: a extractor: href description: diff --git a/lib/html2rss/configs/formula1.com/latest.yml b/lib/html2rss/configs/formula1.com/latest.yml index d1a08f8..c95070a 100644 --- a/lib/html2rss/configs/formula1.com/latest.yml +++ b/lib/html2rss/configs/formula1.com/latest.yml @@ -7,7 +7,7 @@ selectors: selector: "#article-list > li" title: selector: "p" - link: + url: selector: "a" extractor: "href" categories: diff --git a/lib/html2rss/configs/github.com/releases.yml b/lib/html2rss/configs/github.com/releases.yml index 9e1fa30..902ca7d 100644 --- a/lib/html2rss/configs/github.com/releases.yml +++ b/lib/html2rss/configs/github.com/releases.yml @@ -8,7 +8,7 @@ selectors: selector: ".repository-content section" title: selector: "h2" - link: + url: selector: "a" extractor: "href" description: diff --git a/lib/html2rss/configs/iaapa.org/news.yml b/lib/html2rss/configs/iaapa.org/news.yml index 4e8ba7a..2999799 100644 --- a/lib/html2rss/configs/iaapa.org/news.yml +++ b/lib/html2rss/configs/iaapa.org/news.yml @@ -9,6 +9,6 @@ selectors: selector: ".home-latest-title" description: selector: ".home-latest-teaser" - link: + url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/ifo.de/newsroom.yml b/lib/html2rss/configs/ifo.de/newsroom.yml index dfc20a1..a5f8a22 100644 --- a/lib/html2rss/configs/ifo.de/newsroom.yml +++ b/lib/html2rss/configs/ifo.de/newsroom.yml @@ -9,7 +9,7 @@ selectors: selector: "fields > array > object" title: selector: "title" - link: + url: selector: "url" post_process: - name: template diff --git a/lib/html2rss/configs/imdb.com/ratings.yml b/lib/html2rss/configs/imdb.com/ratings.yml index eebdd17..3fffc50 100644 --- a/lib/html2rss/configs/imdb.com/ratings.yml +++ b/lib/html2rss/configs/imdb.com/ratings.yml @@ -13,7 +13,7 @@ selectors: replacement: "" - name: template string: 'Rated: "%{self}" %{user_rating} [IMDb: %{global_rating}]' - link: + url: selector: "a.ipc-title-link-wrapper" extractor: "href" description: diff --git a/lib/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml b/lib/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml index 954675a..e88712f 100644 --- a/lib/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml +++ b/lib/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml @@ -9,7 +9,7 @@ selectors: selector: ".section > .ing-teaser" title: selector: ".ing-teaser__headline" - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/kinocheck.de/filmstarts.yml b/lib/html2rss/configs/kinocheck.de/filmstarts.yml index 267ba99..22d95a4 100644 --- a/lib/html2rss/configs/kinocheck.de/filmstarts.yml +++ b/lib/html2rss/configs/kinocheck.de/filmstarts.yml @@ -11,7 +11,7 @@ selectors: post_process: name: "template" string: "%{self} (%{start_date})" - link: + url: selector: "a" extractor: "href" description: diff --git a/lib/html2rss/configs/metacritic.com/movies_release_date.yml b/lib/html2rss/configs/metacritic.com/movies_release_date.yml index 1a7cb0a..f5d94bd 100644 --- a/lib/html2rss/configs/metacritic.com/movies_release_date.yml +++ b/lib/html2rss/configs/metacritic.com/movies_release_date.yml @@ -14,7 +14,7 @@ selectors: score: selector: ".clamp-score-wrap > .metascore_anchor > .metascore_w" extractor: "text" - link: + url: selector: ".metascore_anchor" extractor: "href" description: diff --git a/lib/html2rss/configs/newyorker.com/magazine.yml b/lib/html2rss/configs/newyorker.com/magazine.yml index c4047ff..b6e8d8d 100644 --- a/lib/html2rss/configs/newyorker.com/magazine.yml +++ b/lib/html2rss/configs/newyorker.com/magazine.yml @@ -9,7 +9,7 @@ selectors: selector: .summary-item title: selector: h3 - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/nomanssky.com/news.yml b/lib/html2rss/configs/nomanssky.com/news.yml index 4f8e51f..505a6d4 100644 --- a/lib/html2rss/configs/nomanssky.com/news.yml +++ b/lib/html2rss/configs/nomanssky.com/news.yml @@ -9,7 +9,7 @@ selectors: selector: article title: selector: ".post-title" - link: + url: selector: a.view-article extractor: href description: diff --git a/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml b/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml index 0a75b22..67f42f3 100644 --- a/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml +++ b/lib/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml @@ -13,7 +13,7 @@ selectors: - name: template string: | %{self}: %{ergebnis} (%{punkte}/80 Punkte) - link: + url: selector: a extractor: href description: diff --git a/lib/html2rss/configs/philomag.de/index.yml b/lib/html2rss/configs/philomag.de/index.yml index 3a7bd4a..3f6d8c8 100644 --- a/lib/html2rss/configs/philomag.de/index.yml +++ b/lib/html2rss/configs/philomag.de/index.yml @@ -9,7 +9,7 @@ selectors: selector: ".content article" title: selector: h2 - link: + url: selector: h2 a extractor: href description: diff --git a/lib/html2rss/configs/phys.org/weekly.yml b/lib/html2rss/configs/phys.org/weekly.yml index 51bb0ef..429f70a 100644 --- a/lib/html2rss/configs/phys.org/weekly.yml +++ b/lib/html2rss/configs/phys.org/weekly.yml @@ -11,7 +11,7 @@ selectors: selector: ".text-info" categories: - category - link: + url: selector: ".news-link" extractor: "href" post_process: diff --git a/lib/html2rss/configs/pinboard.in/user.yml b/lib/html2rss/configs/pinboard.in/user.yml index e6f22eb..70e28a7 100644 --- a/lib/html2rss/configs/pinboard.in/user.yml +++ b/lib/html2rss/configs/pinboard.in/user.yml @@ -8,6 +8,6 @@ selectors: selector: "#bookmarks .bookmark" title: selector: ".bookmark_title" - link: + url: selector: ".bookmark_title" extractor: "href" diff --git a/lib/html2rss/configs/rbb24.de/meistgeklickt.yml b/lib/html2rss/configs/rbb24.de/meistgeklickt.yml index b4cd24c..9e8ab00 100644 --- a/lib/html2rss/configs/rbb24.de/meistgeklickt.yml +++ b/lib/html2rss/configs/rbb24.de/meistgeklickt.yml @@ -9,6 +9,6 @@ selectors: selector: ".tabmodul_container > li:last-child > ul > li" title: selector: "a" - link: + url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/rnd.de/themen.yml b/lib/html2rss/configs/rnd.de/themen.yml index b5e9341..1351c06 100644 --- a/lib/html2rss/configs/rnd.de/themen.yml +++ b/lib/html2rss/configs/rnd.de/themen.yml @@ -8,7 +8,7 @@ selectors: selector: "div[class^='DoubleChainstyled']:first-child a[class*='ContentTeaser']" title: selector: "h2" - link: + url: extractor: "href" description: selector: "p" diff --git a/lib/html2rss/configs/robinwood.de/aktuelles.yml b/lib/html2rss/configs/robinwood.de/aktuelles.yml index 9e5f33c..5b749b6 100644 --- a/lib/html2rss/configs/robinwood.de/aktuelles.yml +++ b/lib/html2rss/configs/robinwood.de/aktuelles.yml @@ -8,7 +8,7 @@ selectors: selector: "article[data-history-node-id]" title: selector: "h2" - link: + url: selector: "a" extractor: "href" description: diff --git a/lib/html2rss/configs/s3.amazonaws.com/popular_movies.yml b/lib/html2rss/configs/s3.amazonaws.com/popular_movies.yml index 8e536fa..7b8a4e0 100644 --- a/lib/html2rss/configs/s3.amazonaws.com/popular_movies.yml +++ b/lib/html2rss/configs/s3.amazonaws.com/popular_movies.yml @@ -14,7 +14,7 @@ selectors: selector: "imdb_id" poster_url: selector: "poster_url" - link: + url: post_process: - name: template string: "https://imdb.com/title/%{imdb_id}" diff --git a/lib/html2rss/configs/softwareleadweekly.com/issues.yml b/lib/html2rss/configs/softwareleadweekly.com/issues.yml index d347eab..248f272 100644 --- a/lib/html2rss/configs/softwareleadweekly.com/issues.yml +++ b/lib/html2rss/configs/softwareleadweekly.com/issues.yml @@ -7,6 +7,6 @@ selectors: selector: ".table-issue" title: selector: "a" - link: + url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/solarthermalworld.org/news.yml b/lib/html2rss/configs/solarthermalworld.org/news.yml index e58265b..c264e1d 100644 --- a/lib/html2rss/configs/solarthermalworld.org/news.yml +++ b/lib/html2rss/configs/solarthermalworld.org/news.yml @@ -7,7 +7,7 @@ selectors: selector: "article.post-wrapper" title: selector: "h2 a" - link: + url: selector: "h2 a" extractor: "href" description: diff --git a/lib/html2rss/configs/spektrum.de/meistgelesen.yml b/lib/html2rss/configs/spektrum.de/meistgelesen.yml index 3cf3b24..4c6b3c0 100644 --- a/lib/html2rss/configs/spektrum.de/meistgelesen.yml +++ b/lib/html2rss/configs/spektrum.de/meistgelesen.yml @@ -9,6 +9,6 @@ selectors: selector: ".teaser.teaser__list_box ol > li" title: selector: "a" - link: + url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/spiegel.de/impressum_autor.yml b/lib/html2rss/configs/spiegel.de/impressum_autor.yml index 2ef1f02..1320bd7 100644 --- a/lib/html2rss/configs/spiegel.de/impressum_autor.yml +++ b/lib/html2rss/configs/spiegel.de/impressum_autor.yml @@ -10,7 +10,7 @@ selectors: selector: "h2" prefixed_title: selector: "h2" - link: + url: selector: "section > a" extractor: "href" descripton: diff --git a/lib/html2rss/configs/stackoverflow.com/hot_network_questions.yml b/lib/html2rss/configs/stackoverflow.com/hot_network_questions.yml index 4fc6093..0e05e4e 100644 --- a/lib/html2rss/configs/stackoverflow.com/hot_network_questions.yml +++ b/lib/html2rss/configs/stackoverflow.com/hot_network_questions.yml @@ -8,7 +8,7 @@ selectors: selector: "#hot-network-questions ul > li" title: selector: a - link: + url: selector: a extractor: href network: diff --git a/lib/html2rss/configs/steuerzahler.de/news.yml b/lib/html2rss/configs/steuerzahler.de/news.yml index 5c80f69..cbbc176 100644 --- a/lib/html2rss/configs/steuerzahler.de/news.yml +++ b/lib/html2rss/configs/steuerzahler.de/news.yml @@ -8,7 +8,7 @@ selectors: selector: ".bdst_presslist .bdst_presslist__block" title: selector: "h4" - link: + url: selector: "a" extractor: "href" description: diff --git a/lib/html2rss/configs/stripes.com/index.yml b/lib/html2rss/configs/stripes.com/index.yml index b6006c0..177c241 100644 --- a/lib/html2rss/configs/stripes.com/index.yml +++ b/lib/html2rss/configs/stripes.com/index.yml @@ -9,7 +9,7 @@ selectors: selector: '[class$="teaser"]' title: selector: a - link: + url: selector: a extractor: href description: diff --git a/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml b/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml index 5418935..7e997ee 100644 --- a/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml +++ b/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml @@ -9,7 +9,7 @@ selectors: selector: "#tableWraper table tbody > tr:not(:first-child)" title: selector: a - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/support.apple.com/exchange_repair.yml b/lib/html2rss/configs/support.apple.com/exchange_repair.yml index 4ee70cb..e205b76 100644 --- a/lib/html2rss/configs/support.apple.com/exchange_repair.yml +++ b/lib/html2rss/configs/support.apple.com/exchange_repair.yml @@ -7,7 +7,7 @@ selectors: selector: ".main .as-columns--2up-extended" title: selector: "a" - link: + url: selector: "a" extractor: "href" img: diff --git a/lib/html2rss/configs/teneriffa-news.com/news.yml b/lib/html2rss/configs/teneriffa-news.com/news.yml index 1cd1b63..a170313 100644 --- a/lib/html2rss/configs/teneriffa-news.com/news.yml +++ b/lib/html2rss/configs/teneriffa-news.com/news.yml @@ -8,7 +8,7 @@ selectors: selector: ".main-content article" title: selector: "h2" - link: + url: selector: "a" extractor: "href" img_url: diff --git a/lib/html2rss/configs/test.de/archiv.yml b/lib/html2rss/configs/test.de/archiv.yml index 3f71c70..1bc19ae 100644 --- a/lib/html2rss/configs/test.de/archiv.yml +++ b/lib/html2rss/configs/test.de/archiv.yml @@ -9,7 +9,7 @@ selectors: selector: ".archive__list > li" title: selector: a - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/theguardian.com/international_mostpopular.yml b/lib/html2rss/configs/theguardian.com/international_mostpopular.yml index 6d4470a..0126e7a 100644 --- a/lib/html2rss/configs/theguardian.com/international_mostpopular.yml +++ b/lib/html2rss/configs/theguardian.com/international_mostpopular.yml @@ -8,6 +8,6 @@ selectors: selector: '[data-link-name="most-viewed"] > ol > li' title: selector: "h4 span" - link: + url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/thoughtworks.com/insights.yml b/lib/html2rss/configs/thoughtworks.com/insights.yml index 1c5a5f4..dfa1bdb 100644 --- a/lib/html2rss/configs/thoughtworks.com/insights.yml +++ b/lib/html2rss/configs/thoughtworks.com/insights.yml @@ -8,7 +8,7 @@ selectors: selector: ".collectionOfMedia .aem-GridColumn" title: selector: ".cmp-collection-of-media__card__text__title" - link: + url: selector: a:first extractor: href description: diff --git a/lib/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml b/lib/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml index 00bcb57..436023a 100644 --- a/lib/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml +++ b/lib/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml @@ -8,7 +8,7 @@ selectors: selector: "article.article" title: selector: "h3" - link: + url: selector: "a" extractor: "href" topic: diff --git a/lib/html2rss/configs/webentwickler-jobs.de/in.yml b/lib/html2rss/configs/webentwickler-jobs.de/in.yml index d31d168..6ba65ac 100644 --- a/lib/html2rss/configs/webentwickler-jobs.de/in.yml +++ b/lib/html2rss/configs/webentwickler-jobs.de/in.yml @@ -9,7 +9,7 @@ selectors: selector: ".job-item" title: selector: ".job-title" - link: + url: selector: a:first extractor: href description: diff --git a/spec/support/shared_examples/config.yml_spec.rb b/spec/support/shared_examples/config.yml_spec.rb index 7dc8337..fd170a2 100644 --- a/spec/support/shared_examples/config.yml_spec.rb +++ b/spec/support/shared_examples/config.yml_spec.rb @@ -138,7 +138,7 @@ end it 'has link content beginning with "http" when config has a link selector' do - expect(item&.link&.to_s).to start_with('http') if config.item_selector_names.include?(:link) + expect(item&.link&.to_s).to start_with('http') if Html2rss::Selectors::ITEM_TAGS.include?(:url) end end end From 957ff31c9a3f551e539e362836fe2c00d5a40152 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 23 Mar 2025 14:41:59 +0100 Subject: [PATCH 05/11] feat: migrate to post_process array syntax --- .../configs/adfc.de/pressemitteilungen.yml | 2 -- .../configs/bbc.co.uk/available_episodes.yml | 2 -- lib/html2rss/configs/cinemascore.com/index.yml | 14 +++++++------- lib/html2rss/configs/cleanenergywire.org/news.yml | 2 +- .../configs/computerbase.de/meistgelesen.yml | 4 ++-- ...tutorials_data_documentation_technotes_json.yml | 2 +- lib/html2rss/configs/github.com/releases.yml | 2 -- lib/html2rss/configs/iaapa.org/news.yml | 6 +++--- lib/html2rss/configs/ifo.de/newsroom.yml | 4 +--- lib/html2rss/configs/kinocheck.de/filmstarts.yml | 6 ++---- .../configs/metacritic.com/movies_release_date.yml | 4 ++-- lib/html2rss/configs/nomanssky.com/news.yml | 6 +++--- lib/html2rss/configs/phys.org/weekly.yml | 2 -- lib/html2rss/configs/rnd.de/themen.yml | 2 +- .../configs/solarthermalworld.org/news.yml | 2 -- lib/html2rss/configs/steuerzahler.de/news.yml | 2 +- .../configs/support.apple.com/en_gb_ht201222.yml | 2 +- lib/html2rss/configs/thoughtworks.com/insights.yml | 4 ++-- lib/html2rss/configs/webentwickler-jobs.de/in.yml | 2 -- 19 files changed, 27 insertions(+), 43 deletions(-) diff --git a/lib/html2rss/configs/adfc.de/pressemitteilungen.yml b/lib/html2rss/configs/adfc.de/pressemitteilungen.yml index 9faac76..a56071e 100644 --- a/lib/html2rss/configs/adfc.de/pressemitteilungen.yml +++ b/lib/html2rss/configs/adfc.de/pressemitteilungen.yml @@ -14,5 +14,3 @@ selectors: description: selector: ".articleteaser-header-container p" extractor: "html" - post_process: - - name: sanitize_html diff --git a/lib/html2rss/configs/bbc.co.uk/available_episodes.yml b/lib/html2rss/configs/bbc.co.uk/available_episodes.yml index 76056e8..ac07b6f 100644 --- a/lib/html2rss/configs/bbc.co.uk/available_episodes.yml +++ b/lib/html2rss/configs/bbc.co.uk/available_episodes.yml @@ -12,5 +12,3 @@ selectors: extractor: "href" description: selector: ".programme__synopsis" - post_process: - name: sanitize_html diff --git a/lib/html2rss/configs/cinemascore.com/index.yml b/lib/html2rss/configs/cinemascore.com/index.yml index f08c046..446b66a 100644 --- a/lib/html2rss/configs/cinemascore.com/index.yml +++ b/lib/html2rss/configs/cinemascore.com/index.yml @@ -9,13 +9,13 @@ selectors: title: selector: "title" post_process: - name: "template" - string: "%s (%s) [Rated %s] (%s)" - methods: - - self - - grade - - mpaa_rating - - year + - name: "template" + string: "%s (%s) [Rated %s] (%s)" + methods: + - self + - grade + - mpaa_rating + - year grade: selector: "grade" movie_title: diff --git a/lib/html2rss/configs/cleanenergywire.org/news.yml b/lib/html2rss/configs/cleanenergywire.org/news.yml index efec1c9..bb53efe 100644 --- a/lib/html2rss/configs/cleanenergywire.org/news.yml +++ b/lib/html2rss/configs/cleanenergywire.org/news.yml @@ -13,4 +13,4 @@ selectors: published_at: selector: ".date-display-single" post_process: - name: parse_time + - name: parse_time diff --git a/lib/html2rss/configs/computerbase.de/meistgelesen.yml b/lib/html2rss/configs/computerbase.de/meistgelesen.yml index 6e397a5..b5c18e9 100644 --- a/lib/html2rss/configs/computerbase.de/meistgelesen.yml +++ b/lib/html2rss/configs/computerbase.de/meistgelesen.yml @@ -10,8 +10,8 @@ selectors: title: selector: ".charts__title" post_process: - name: "template" - string: "%{context}: %{self}" + - name: "template" + string: "%{context}: %{self}" context: selector: ".charts__context" url: diff --git a/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml b/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml index 81dc42b..fe55c8d 100644 --- a/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml +++ b/lib/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml @@ -6,7 +6,7 @@ channel: time_zone: UTC selectors: items: - selector: " references > object > * > object" + selector: "references > object > * > object:has(url)" title: selector: title url: diff --git a/lib/html2rss/configs/github.com/releases.yml b/lib/html2rss/configs/github.com/releases.yml index 902ca7d..9019703 100644 --- a/lib/html2rss/configs/github.com/releases.yml +++ b/lib/html2rss/configs/github.com/releases.yml @@ -14,5 +14,3 @@ selectors: description: selector: '[data-test-selector="body-content"]' extractor: "html" - post_process: - name: "sanitize_html" diff --git a/lib/html2rss/configs/iaapa.org/news.yml b/lib/html2rss/configs/iaapa.org/news.yml index 2999799..bf35635 100644 --- a/lib/html2rss/configs/iaapa.org/news.yml +++ b/lib/html2rss/configs/iaapa.org/news.yml @@ -4,11 +4,11 @@ channel: ttl: 720 selectors: items: - selector: ".nh-row-home .views-row" + selector: ".views-row > article" title: - selector: ".home-latest-title" + selector: h3 description: - selector: ".home-latest-teaser" + selector: ".event-card__summary" url: selector: "a" extractor: "href" diff --git a/lib/html2rss/configs/ifo.de/newsroom.yml b/lib/html2rss/configs/ifo.de/newsroom.yml index a5f8a22..d7b749b 100644 --- a/lib/html2rss/configs/ifo.de/newsroom.yml +++ b/lib/html2rss/configs/ifo.de/newsroom.yml @@ -21,9 +21,7 @@ selectors: selector: ".type" description: selector: "field_kurztext" - post_process: - - name: sanitize_html published_at: selector: "field_date default" post_process: - name: parse_time + - name: parse_time diff --git a/lib/html2rss/configs/kinocheck.de/filmstarts.yml b/lib/html2rss/configs/kinocheck.de/filmstarts.yml index 22d95a4..7b38776 100644 --- a/lib/html2rss/configs/kinocheck.de/filmstarts.yml +++ b/lib/html2rss/configs/kinocheck.de/filmstarts.yml @@ -9,15 +9,13 @@ selectors: title: selector: "h2" post_process: - name: "template" - string: "%{self} (%{start_date})" + - name: "template" + string: "%{self} (%{start_date})" url: selector: "a" extractor: "href" description: selector: ".information" extractor: "html" - post_process: - name: "sanitize_html" start_date: selector: ".release-date .highlight" diff --git a/lib/html2rss/configs/metacritic.com/movies_release_date.yml b/lib/html2rss/configs/metacritic.com/movies_release_date.yml index f5d94bd..a815564 100644 --- a/lib/html2rss/configs/metacritic.com/movies_release_date.yml +++ b/lib/html2rss/configs/metacritic.com/movies_release_date.yml @@ -9,8 +9,8 @@ selectors: title: selector: "h3" post_process: - name: "template" - string: "%{self} (Score: %{score})" + - name: "template" + string: "%{self} (Score: %{score})" score: selector: ".clamp-score-wrap > .metascore_anchor > .metascore_w" extractor: "text" diff --git a/lib/html2rss/configs/nomanssky.com/news.yml b/lib/html2rss/configs/nomanssky.com/news.yml index 505a6d4..511fa4d 100644 --- a/lib/html2rss/configs/nomanssky.com/news.yml +++ b/lib/html2rss/configs/nomanssky.com/news.yml @@ -15,6 +15,6 @@ selectors: description: selector: p:first post_process: - name: substring - start: 0 - end: -14 + - name: substring + start: 0 + end: -14 diff --git a/lib/html2rss/configs/phys.org/weekly.yml b/lib/html2rss/configs/phys.org/weekly.yml index 429f70a..3002a9f 100644 --- a/lib/html2rss/configs/phys.org/weekly.yml +++ b/lib/html2rss/configs/phys.org/weekly.yml @@ -14,5 +14,3 @@ selectors: url: selector: ".news-link" extractor: "href" - post_process: - name: "parse_uri" diff --git a/lib/html2rss/configs/rnd.de/themen.yml b/lib/html2rss/configs/rnd.de/themen.yml index 1351c06..4788d7f 100644 --- a/lib/html2rss/configs/rnd.de/themen.yml +++ b/lib/html2rss/configs/rnd.de/themen.yml @@ -14,4 +14,4 @@ selectors: selector: "p" extractor: html post_process: - name: sanitize_html + - name: sanitize_html diff --git a/lib/html2rss/configs/solarthermalworld.org/news.yml b/lib/html2rss/configs/solarthermalworld.org/news.yml index c264e1d..af186b1 100644 --- a/lib/html2rss/configs/solarthermalworld.org/news.yml +++ b/lib/html2rss/configs/solarthermalworld.org/news.yml @@ -13,5 +13,3 @@ selectors: description: selector: '[property="content:encoded"]' extractor: html - post_process: - name: "sanitize_html" diff --git a/lib/html2rss/configs/steuerzahler.de/news.yml b/lib/html2rss/configs/steuerzahler.de/news.yml index cbbc176..42513ed 100644 --- a/lib/html2rss/configs/steuerzahler.de/news.yml +++ b/lib/html2rss/configs/steuerzahler.de/news.yml @@ -16,4 +16,4 @@ selectors: published_at: selector: ".bdst_presslist__block__date" post_process: - name: "parse_time" + - name: "parse_time" diff --git a/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml b/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml index 7e997ee..3c621c6 100644 --- a/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml +++ b/lib/html2rss/configs/support.apple.com/en_gb_ht201222.yml @@ -6,7 +6,7 @@ channel: time_zone: UTC selectors: items: - selector: "#tableWraper table tbody > tr:not(:first-child)" + selector: ".table-wrapper table tbody > tr:not(:first-child)" title: selector: a url: diff --git a/lib/html2rss/configs/thoughtworks.com/insights.yml b/lib/html2rss/configs/thoughtworks.com/insights.yml index dfa1bdb..cf53552 100644 --- a/lib/html2rss/configs/thoughtworks.com/insights.yml +++ b/lib/html2rss/configs/thoughtworks.com/insights.yml @@ -5,9 +5,9 @@ channel: time_zone: UTC selectors: items: - selector: ".collectionOfMedia .aem-GridColumn" + selector: ".cmp-content-card" title: - selector: ".cmp-collection-of-media__card__text__title" + selector: ".cmp-content-card__text__para-text" url: selector: a:first extractor: href diff --git a/lib/html2rss/configs/webentwickler-jobs.de/in.yml b/lib/html2rss/configs/webentwickler-jobs.de/in.yml index 6ba65ac..e37f19b 100644 --- a/lib/html2rss/configs/webentwickler-jobs.de/in.yml +++ b/lib/html2rss/configs/webentwickler-jobs.de/in.yml @@ -15,5 +15,3 @@ selectors: description: selector: "> div" extractor: html - post_process: - - name: sanitize_html From dfbd4728a24cc0583f88d1dd245782479325d1cf Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Sun, 23 Mar 2025 17:19:12 +0100 Subject: [PATCH 06/11] feat: remove broken configs --- lib/html2rss/configs/bbc.com/news_stories.yml | 13 --------- lib/html2rss/configs/ifo.de/newsroom.yml | 27 ------------------- .../metacritic.com/movies_release_date.yml | 22 --------------- lib/html2rss/configs/philomag.de/index.yml | 23 ---------------- lib/html2rss/configs/pinboard.in/user.yml | 13 --------- lib/html2rss/configs/rnd.de/themen.yml | 17 ------------ .../configs/bbc.com/news_stories.yml_spec.rb | 5 ---- .../configs/ifo.de/newsroom.yml_spec.rb | 5 ---- .../movies_release_date.yml_spec.rb | 5 ---- .../configs/philomag.de/index.yml_spec.rb | 5 ---- .../configs/pinboard.in/user.yml_spec.rb | 5 ---- .../configs/rnd.de/themen.yml_spec.rb | 5 ---- spec/html2rss/configs_spec.rb | 2 +- 13 files changed, 1 insertion(+), 146 deletions(-) delete mode 100644 lib/html2rss/configs/bbc.com/news_stories.yml delete mode 100644 lib/html2rss/configs/ifo.de/newsroom.yml delete mode 100644 lib/html2rss/configs/metacritic.com/movies_release_date.yml delete mode 100644 lib/html2rss/configs/philomag.de/index.yml delete mode 100644 lib/html2rss/configs/pinboard.in/user.yml delete mode 100644 lib/html2rss/configs/rnd.de/themen.yml delete mode 100644 spec/html2rss/configs/bbc.com/news_stories.yml_spec.rb delete mode 100644 spec/html2rss/configs/ifo.de/newsroom.yml_spec.rb delete mode 100644 spec/html2rss/configs/metacritic.com/movies_release_date.yml_spec.rb delete mode 100644 spec/html2rss/configs/philomag.de/index.yml_spec.rb delete mode 100644 spec/html2rss/configs/pinboard.in/user.yml_spec.rb delete mode 100644 spec/html2rss/configs/rnd.de/themen.yml_spec.rb diff --git a/lib/html2rss/configs/bbc.com/news_stories.yml b/lib/html2rss/configs/bbc.com/news_stories.yml deleted file mode 100644 index bbdd396..0000000 --- a/lib/html2rss/configs/bbc.com/news_stories.yml +++ /dev/null @@ -1,13 +0,0 @@ -channel: - url: https://bbc.com/news/stories - time_zone: Europe/London - ttl: 720 - language: en -selectors: - items: - selector: ".gel-layout__item" - title: - selector: "h3" - url: - selector: "a" - extractor: "href" diff --git a/lib/html2rss/configs/ifo.de/newsroom.yml b/lib/html2rss/configs/ifo.de/newsroom.yml deleted file mode 100644 index d7b749b..0000000 --- a/lib/html2rss/configs/ifo.de/newsroom.yml +++ /dev/null @@ -1,27 +0,0 @@ -channel: - url: https://www.ifo.de/get-template/pressemitteilungen/false/de - time_zone: Europe/Berlin - ttl: 720 - language: de - json: true -selectors: - items: - selector: "fields > array > object" - title: - selector: "title" - url: - selector: "url" - post_process: - - name: template - string: "https://www.ifo.de%{self}" - - name: "parse_uri" - categories: - - type - type: - selector: ".type" - description: - selector: "field_kurztext" - published_at: - selector: "field_date default" - post_process: - - name: parse_time diff --git a/lib/html2rss/configs/metacritic.com/movies_release_date.yml b/lib/html2rss/configs/metacritic.com/movies_release_date.yml deleted file mode 100644 index a815564..0000000 --- a/lib/html2rss/configs/metacritic.com/movies_release_date.yml +++ /dev/null @@ -1,22 +0,0 @@ -channel: - title: "metacritic.com: movies popular" - url: https://www.metacritic.com/browse/movies/release-date/theaters/date - time_zone: America/Los_Angeles - ttl: 180 -selectors: - items: - selector: ".clamp-list tr .clamp-summary-wrap" - title: - selector: "h3" - post_process: - - name: "template" - string: "%{self} (Score: %{score})" - score: - selector: ".clamp-score-wrap > .metascore_anchor > .metascore_w" - extractor: "text" - url: - selector: ".metascore_anchor" - extractor: "href" - description: - selector: ".summary" - extractor: "html" diff --git a/lib/html2rss/configs/philomag.de/index.yml b/lib/html2rss/configs/philomag.de/index.yml deleted file mode 100644 index 3f6d8c8..0000000 --- a/lib/html2rss/configs/philomag.de/index.yml +++ /dev/null @@ -1,23 +0,0 @@ ---- -channel: - url: https://www.philomag.de/ - language: de - ttl: 360 - time_zone: Europe/Berlin -selectors: - items: - selector: ".content article" - title: - selector: h2 - url: - selector: h2 a - extractor: href - description: - selector: p - extractor: html - post_process: - - name: sanitize_html - category: - selector: ".field" - categories: - - category diff --git a/lib/html2rss/configs/pinboard.in/user.yml b/lib/html2rss/configs/pinboard.in/user.yml deleted file mode 100644 index 70e28a7..0000000 --- a/lib/html2rss/configs/pinboard.in/user.yml +++ /dev/null @@ -1,13 +0,0 @@ -channel: - url: https://pinboard.in/u:%s?per_page=160 - time_zone: UTC - ttl: 720 - language: en -selectors: - items: - selector: "#bookmarks .bookmark" - title: - selector: ".bookmark_title" - url: - selector: ".bookmark_title" - extractor: "href" diff --git a/lib/html2rss/configs/rnd.de/themen.yml b/lib/html2rss/configs/rnd.de/themen.yml deleted file mode 100644 index 4788d7f..0000000 --- a/lib/html2rss/configs/rnd.de/themen.yml +++ /dev/null @@ -1,17 +0,0 @@ -channel: - url: https://www.rnd.de/themen/%s/ - time_zone: Europe/Berlin - ttl: 60 - language: de -selectors: - items: - selector: "div[class^='DoubleChainstyled']:first-child a[class*='ContentTeaser']" - title: - selector: "h2" - url: - extractor: "href" - description: - selector: "p" - extractor: html - post_process: - - name: sanitize_html diff --git a/spec/html2rss/configs/bbc.com/news_stories.yml_spec.rb b/spec/html2rss/configs/bbc.com/news_stories.yml_spec.rb deleted file mode 100644 index 102f25e..0000000 --- a/spec/html2rss/configs/bbc.com/news_stories.yml_spec.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe 'bbc.com/news_stories.yml' do - include_examples 'config.yml', 'bbc.com/news_stories.yml' -end diff --git a/spec/html2rss/configs/ifo.de/newsroom.yml_spec.rb b/spec/html2rss/configs/ifo.de/newsroom.yml_spec.rb deleted file mode 100644 index faf0202..0000000 --- a/spec/html2rss/configs/ifo.de/newsroom.yml_spec.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe 'ifo.de/newsroom.yml' do - include_examples 'config.yml', 'ifo.de/newsroom.yml' -end diff --git a/spec/html2rss/configs/metacritic.com/movies_release_date.yml_spec.rb b/spec/html2rss/configs/metacritic.com/movies_release_date.yml_spec.rb deleted file mode 100644 index bc938f4..0000000 --- a/spec/html2rss/configs/metacritic.com/movies_release_date.yml_spec.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe 'metacritic.com/movies_release_date.yml' do - include_examples 'config.yml', 'metacritic.com/movies_release_date.yml' -end diff --git a/spec/html2rss/configs/philomag.de/index.yml_spec.rb b/spec/html2rss/configs/philomag.de/index.yml_spec.rb deleted file mode 100644 index 821bdd4..0000000 --- a/spec/html2rss/configs/philomag.de/index.yml_spec.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe 'philomag.de/index.yml' do - include_examples 'config.yml', 'philomag.de/index.yml' -end diff --git a/spec/html2rss/configs/pinboard.in/user.yml_spec.rb b/spec/html2rss/configs/pinboard.in/user.yml_spec.rb deleted file mode 100644 index 8a6407a..0000000 --- a/spec/html2rss/configs/pinboard.in/user.yml_spec.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe 'pinboard.in/user.yml' do - include_examples 'config.yml', 'pinboard.in/user.yml', username: :marcin -end diff --git a/spec/html2rss/configs/rnd.de/themen.yml_spec.rb b/spec/html2rss/configs/rnd.de/themen.yml_spec.rb deleted file mode 100644 index 3ff9a4b..0000000 --- a/spec/html2rss/configs/rnd.de/themen.yml_spec.rb +++ /dev/null @@ -1,5 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe 'rnd.de/themen.yml' do - include_examples 'config.yml', 'rnd.de/themen.yml', thema: 'hamburg' -end diff --git a/spec/html2rss/configs_spec.rb b/spec/html2rss/configs_spec.rb index 4214368..825c441 100644 --- a/spec/html2rss/configs_spec.rb +++ b/spec/html2rss/configs_spec.rb @@ -15,7 +15,7 @@ describe '.find_by_name' do context 'with valid name' do - subject(:find_by_name) { described_class.find_by_name('ifo.de/newsroom') } + subject(:find_by_name) { described_class.find_by_name('adfc.de/pressemitteilungen') } specify(:aggregate_failures) do expect(find_by_name).to be_a(Hash) & be_frozen From e1a8e966874f08e950370d5545faad158dbaae0e Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Thu, 29 May 2025 13:45:11 +0200 Subject: [PATCH 07/11] fix: syntax changed on adfc pms --- lib/html2rss/configs/adfc.de/pressemitteilungen.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/html2rss/configs/adfc.de/pressemitteilungen.yml b/lib/html2rss/configs/adfc.de/pressemitteilungen.yml index a56071e..8592aad 100644 --- a/lib/html2rss/configs/adfc.de/pressemitteilungen.yml +++ b/lib/html2rss/configs/adfc.de/pressemitteilungen.yml @@ -5,12 +5,11 @@ channel: language: de selectors: items: - selector: ".articleteasercontainer .articleteaser" + selector: "#articleteasercontainer-columns .articleteaser" title: - selector: ".articleteaser-header-container h4" + selector: ".articleteaser-header-container a" url: selector: "a:first" extractor: "href" description: selector: ".articleteaser-header-container p" - extractor: "html" From e229d2f840ad2590c823ddb76d10955103ceb544 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Thu, 29 May 2025 13:47:00 +0200 Subject: [PATCH 08/11] chore: use html2rss from master branch --- Gemfile | 2 + Gemfile.lock | 174 +++++++++++++++++++++++++++++++++------------------ 2 files changed, 115 insertions(+), 61 deletions(-) diff --git a/Gemfile b/Gemfile index 88c890b..5795448 100644 --- a/Gemfile +++ b/Gemfile @@ -4,6 +4,8 @@ source 'https://rubygems.org' git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } +gem 'html2rss', github: 'html2rss/html2rss', branch: :master + group :development do # gem 'html2rss-generator', path: '../generator' gem 'html2rss-generator', github: 'html2rss/generator', branch: :main diff --git a/Gemfile.lock b/Gemfile.lock index 19b0b0f..a149134 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,6 +1,6 @@ GIT remote: https://github.com/html2rss/generator - revision: 3a00f3c618ba6e548334180e53f3a3506da090a3 + revision: 05bca799648034e8937e08baaa47b03af4aed5b0 branch: main specs: html2rss-generator (0.1.0) @@ -10,6 +10,29 @@ GIT tty-markdown tty-prompt +GIT + remote: https://github.com/html2rss/html2rss + revision: 70540c7d2accdfab85fb52d3839b31a615ffe3b3 + branch: master + specs: + html2rss (0.17.0) + addressable (~> 2.7) + dry-validation + faraday (> 2.0.1, < 3.0) + faraday-follow_redirects + kramdown + mime-types (> 3.0) + nokogiri (>= 1.10, < 2.0) + parallel + puppeteer-ruby + regexp_parser + reverse_markdown (~> 3.0) + rss + sanitize + thor + tzinfo + zeitwerk + PATH remote: . specs: @@ -21,11 +44,48 @@ GEM specs: addressable (2.8.7) public_suffix (>= 2.0.2, < 7.0) - ast (2.4.2) - concurrent-ruby (1.3.4) + ast (2.4.3) + base64 (0.2.0) + bigdecimal (3.1.9) + concurrent-ruby (1.3.5) crass (1.0.6) - diff-lcs (1.5.1) - faraday (2.12.2) + diff-lcs (1.6.2) + dry-configurable (1.3.0) + dry-core (~> 1.1) + zeitwerk (~> 2.6) + dry-core (1.1.0) + concurrent-ruby (~> 1.0) + logger + zeitwerk (~> 2.6) + dry-inflector (1.2.0) + dry-initializer (3.2.0) + dry-logic (1.6.0) + bigdecimal + concurrent-ruby (~> 1.0) + dry-core (~> 1.1) + zeitwerk (~> 2.6) + dry-schema (1.14.1) + concurrent-ruby (~> 1.0) + dry-configurable (~> 1.0, >= 1.0.1) + dry-core (~> 1.1) + dry-initializer (~> 3.2) + dry-logic (~> 1.5) + dry-types (~> 1.8) + zeitwerk (~> 2.6) + dry-types (1.8.2) + bigdecimal (~> 3.0) + concurrent-ruby (~> 1.0) + dry-core (~> 1.0) + dry-inflector (~> 1.0) + dry-logic (~> 1.4) + zeitwerk (~> 2.6) + dry-validation (1.11.1) + concurrent-ruby (~> 1.0) + dry-core (~> 1.1) + dry-initializer (~> 3.2) + dry-schema (~> 1.14) + zeitwerk (~> 2.6) + faraday (2.13.1) faraday-net_http (>= 2.0, < 3.5) json logger @@ -34,95 +94,84 @@ GEM faraday-net_http (3.4.0) net-http (>= 0.5.0) hashie (5.0.0) - html2rss (0.16.0) - addressable (~> 2.7) - faraday (> 2.0.1, < 3.0) - faraday-follow_redirects - kramdown - mime-types (> 3.0) - nokogiri (>= 1.10, < 2.0) - parallel - puppeteer-ruby - regexp_parser - reverse_markdown (~> 3.0) - rss - sanitize (~> 6.0) - thor - tzinfo - zeitwerk (~> 2.6.0) htmlbeautifier (1.4.3) - json (2.9.1) + json (2.12.2) kramdown (2.5.1) rexml (>= 3.3.9) - language_server-protocol (3.17.0.3) - logger (1.6.4) - mime-types (3.6.0) + language_server-protocol (3.17.0.5) + lint_roller (1.1.0) + logger (1.7.0) + mime-types (3.7.0) logger - mime-types-data (~> 3.2015) - mime-types-data (3.2024.1203) + mime-types-data (~> 3.2025, >= 3.2025.0507) + mime-types-data (3.2025.0527) net-http (0.6.0) uri - nokogiri (1.16.7-arm64-darwin) + nokogiri (1.18.8-arm64-darwin) racc (~> 1.4) - nokogiri (1.16.7-x86_64-darwin) + nokogiri (1.18.8-x86_64-darwin) racc (~> 1.4) - nokogiri (1.16.7-x86_64-linux) + nokogiri (1.18.8-x86_64-linux-gnu) racc (~> 1.4) - parallel (1.26.3) - parser (3.3.4.2) + parallel (1.27.0) + parser (3.3.8.0) ast (~> 2.4.1) racc pastel (0.8.0) tty-color (~> 0.5) - public_suffix (6.0.1) + prism (1.4.0) + public_suffix (6.0.2) puppeteer-ruby (0.45.6) concurrent-ruby (>= 1.1, < 1.4) mime-types (>= 3.0) websocket-driver (>= 0.6.0) racc (1.8.1) rainbow (3.1.1) - regexp_parser (2.9.3) + regexp_parser (2.10.0) reverse_markdown (3.0.0) nokogiri - rexml (3.4.0) - rouge (4.3.0) - rspec (3.13.0) + rexml (3.4.1) + rouge (4.5.2) + rspec (3.13.1) rspec-core (~> 3.13.0) rspec-expectations (~> 3.13.0) rspec-mocks (~> 3.13.0) - rspec-core (3.13.0) + rspec-core (3.13.4) rspec-support (~> 3.13.0) - rspec-expectations (3.13.1) + rspec-expectations (3.13.5) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.13.0) - rspec-mocks (3.13.1) + rspec-mocks (3.13.5) diff-lcs (>= 1.2.0, < 2.0) rspec-support (~> 3.13.0) - rspec-support (3.13.1) + rspec-support (3.13.4) rss (0.3.1) rexml - rubocop (1.65.1) + rubocop (1.75.8) json (~> 2.3) - language_server-protocol (>= 3.17.0) + language_server-protocol (~> 3.17.0.2) + lint_roller (~> 1.1.0) parallel (~> 1.10) parser (>= 3.3.0.2) rainbow (>= 2.2.2, < 4.0) - regexp_parser (>= 2.4, < 3.0) - rexml (>= 3.2.5, < 4.0) - rubocop-ast (>= 1.31.1, < 2.0) + regexp_parser (>= 2.9.3, < 3.0) + rubocop-ast (>= 1.44.0, < 2.0) ruby-progressbar (~> 1.7) - unicode-display_width (>= 2.4.0, < 3.0) - rubocop-ast (1.32.0) - parser (>= 3.3.1.0) - rubocop-performance (1.21.1) - rubocop (>= 1.48.1, < 2.0) - rubocop-ast (>= 1.31.1, < 2.0) - rubocop-rspec (3.0.4) - rubocop (~> 1.61) + unicode-display_width (>= 2.4.0, < 4.0) + rubocop-ast (1.44.1) + parser (>= 3.3.7.2) + prism (~> 1.4) + rubocop-performance (1.25.0) + lint_roller (~> 1.1) + rubocop (>= 1.75.0, < 2.0) + rubocop-ast (>= 1.38.0, < 2.0) + rubocop-rspec (3.6.0) + lint_roller (~> 1.1) + rubocop (~> 1.72, >= 1.72.1) ruby-progressbar (1.13.0) - sanitize (6.1.3) + sanitize (7.0.0) crass (~> 1.0.2) - nokogiri (>= 1.12.0) + nokogiri (>= 1.16.8) strings (0.2.1) strings-ansi (~> 0.2) unicode-display_width (>= 1.5, < 3.0) @@ -148,21 +197,24 @@ GEM tty-screen (0.8.2) tzinfo (2.0.6) concurrent-ruby (~> 1.0) - unicode-display_width (2.5.0) + unicode-display_width (2.6.0) unicode_utils (1.4.0) - uri (1.0.2) - websocket-driver (0.7.6) + uri (1.0.3) + websocket-driver (0.8.0) + base64 websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) wisper (2.0.1) - zeitwerk (2.6.18) + zeitwerk (2.7.3) PLATFORMS arm64-darwin-23 + arm64-darwin-24 x86_64-darwin-20 x86_64-linux DEPENDENCIES + html2rss! html2rss-configs! html2rss-generator! nokogiri @@ -172,4 +224,4 @@ DEPENDENCIES rubocop-rspec BUNDLED WITH - 2.5.17 + 2.6.6 From 624511a300c7cf034603e0d0d070960806c2fa3f Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Thu, 29 May 2025 13:49:03 +0200 Subject: [PATCH 09/11] style: apply rubocop autocorrections --- spec/html2rss/configs/adfc.de/pressemitteilungen.yml_spec.rb | 2 +- spec/html2rss/configs/apnews.com/hub.yml_spec.rb | 2 +- spec/html2rss/configs/avherald.com/index.yml_spec.rb | 2 +- spec/html2rss/configs/bbc.co.uk/available_episodes.yml_spec.rb | 2 +- spec/html2rss/configs/bbc.com/mundo.yml_spec.rb | 2 +- spec/html2rss/configs/canarianweekly.com/front.yml_spec.rb | 2 +- spec/html2rss/configs/cinemascore.com/index.yml_spec.rb | 2 +- spec/html2rss/configs/cleanenergywire.org/news.yml_spec.rb | 2 +- spec/html2rss/configs/cnet.com/section_sub.yml_spec.rb | 2 +- spec/html2rss/configs/computerbase.de/meistgelesen.yml_spec.rb | 2 +- spec/html2rss/configs/cutle.fish/index.yml_spec.rb | 2 +- spec/html2rss/configs/deraktionaer.de/meistgelesen.yml_spec.rb | 2 +- .../tutorials_data_documentation_technotes_json.yml_spec.rb | 2 +- spec/html2rss/configs/dfs.de/pressemitteilungen.yml_spec.rb | 2 +- spec/html2rss/configs/dsw-info.de/presse.yml_spec.rb | 2 +- spec/html2rss/configs/espn.com/f1.yml_spec.rb | 2 +- spec/html2rss/configs/fia.com/documents.yml_spec.rb | 2 +- spec/html2rss/configs/formula1.com/latest.yml_spec.rb | 2 +- spec/html2rss/configs/github.com/releases.yml_spec.rb | 2 +- spec/html2rss/configs/iaapa.org/news.yml_spec.rb | 2 +- spec/html2rss/configs/imdb.com/ratings.yml_spec.rb | 2 +- .../ingenieur.de/karriere_arbeitsleben_heiko_mell.yml_spec.rb | 2 +- spec/html2rss/configs/kinocheck.de/filmstarts.yml_spec.rb | 2 +- spec/html2rss/configs/newyorker.com/magazine.yml_spec.rb | 2 +- spec/html2rss/configs/nomanssky.com/news.yml_spec.rb | 2 +- .../search.yml_spec.rb | 2 +- spec/html2rss/configs/phys.org/weekly.yml_spec.rb | 2 +- spec/html2rss/configs/rbb24.de/meistgeklickt.yml_spec.rb | 2 +- spec/html2rss/configs/robinwood.de/aktuelles.yml_spec.rb | 2 +- .../configs/s3.amazonaws.com/popular_movies.yml_spec.rb | 2 +- spec/html2rss/configs/sebastianvettel.de/news.yml_spec.rb | 2 +- spec/html2rss/configs/softwareleadweekly.com/issues.yml_spec.rb | 2 +- spec/html2rss/configs/solarthermalworld.org/news.yml_spec.rb | 2 +- spec/html2rss/configs/spektrum.de/meistgelesen.yml_spec.rb | 2 +- spec/html2rss/configs/spiegel.de/impressum_autor.yml_spec.rb | 2 +- .../configs/stackoverflow.com/hot_network_questions.yml_spec.rb | 2 +- spec/html2rss/configs/steuerzahler.de/news.yml_spec.rb | 2 +- spec/html2rss/configs/stripes.com/index.yml_spec.rb | 2 +- .../configs/support.apple.com/en_gb_ht201222.yml_spec.rb | 2 +- .../configs/support.apple.com/exchange_repair.yml_spec.rb | 2 +- spec/html2rss/configs/teneriffa-news.com/news.yml_spec.rb | 2 +- spec/html2rss/configs/test.de/archiv.yml_spec.rb | 2 +- .../theguardian.com/international_mostpopular.yml_spec.rb | 2 +- spec/html2rss/configs/thoughtworks.com/insights.yml_spec.rb | 2 +- .../aktuelle_nachrichten.yml_spec.rb | 2 +- spec/html2rss/configs/webentwickler-jobs.de/in.yml_spec.rb | 2 +- 46 files changed, 46 insertions(+), 46 deletions(-) diff --git a/spec/html2rss/configs/adfc.de/pressemitteilungen.yml_spec.rb b/spec/html2rss/configs/adfc.de/pressemitteilungen.yml_spec.rb index 395d35c..d315fcf 100644 --- a/spec/html2rss/configs/adfc.de/pressemitteilungen.yml_spec.rb +++ b/spec/html2rss/configs/adfc.de/pressemitteilungen.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'adfc.de/pressemitteilungen.yml' do - include_examples 'config.yml', 'adfc.de/pressemitteilungen.yml' + it_behaves_like 'config.yml', 'adfc.de/pressemitteilungen.yml' end diff --git a/spec/html2rss/configs/apnews.com/hub.yml_spec.rb b/spec/html2rss/configs/apnews.com/hub.yml_spec.rb index ba0942e..1170446 100644 --- a/spec/html2rss/configs/apnews.com/hub.yml_spec.rb +++ b/spec/html2rss/configs/apnews.com/hub.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'apnews.com/hub.yml' do - include_examples 'config.yml', 'apnews.com/hub.yml', section: 'world-news' + it_behaves_like 'config.yml', 'apnews.com/hub.yml', section: 'world-news' end diff --git a/spec/html2rss/configs/avherald.com/index.yml_spec.rb b/spec/html2rss/configs/avherald.com/index.yml_spec.rb index e7aed18..7312fc2 100644 --- a/spec/html2rss/configs/avherald.com/index.yml_spec.rb +++ b/spec/html2rss/configs/avherald.com/index.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'avherald.com/index.yml' do - include_examples 'config.yml', 'avherald.com/index.yml' + it_behaves_like 'config.yml', 'avherald.com/index.yml' end diff --git a/spec/html2rss/configs/bbc.co.uk/available_episodes.yml_spec.rb b/spec/html2rss/configs/bbc.co.uk/available_episodes.yml_spec.rb index 7600d7e..79d63f3 100644 --- a/spec/html2rss/configs/bbc.co.uk/available_episodes.yml_spec.rb +++ b/spec/html2rss/configs/bbc.co.uk/available_episodes.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'bbc.co.uk/available-episodes.yml' do - include_examples 'config.yml', 'bbc.co.uk/available_episodes.yml', id: 'b006wkfp' + it_behaves_like 'config.yml', 'bbc.co.uk/available_episodes.yml', id: 'b006wkfp' end diff --git a/spec/html2rss/configs/bbc.com/mundo.yml_spec.rb b/spec/html2rss/configs/bbc.com/mundo.yml_spec.rb index 4e8b499..757994f 100644 --- a/spec/html2rss/configs/bbc.com/mundo.yml_spec.rb +++ b/spec/html2rss/configs/bbc.com/mundo.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'bbc.com/mundo.yml' do - include_examples 'config.yml', 'bbc.com/mundo.yml' + it_behaves_like 'config.yml', 'bbc.com/mundo.yml' end diff --git a/spec/html2rss/configs/canarianweekly.com/front.yml_spec.rb b/spec/html2rss/configs/canarianweekly.com/front.yml_spec.rb index 3561366..ea2b915 100644 --- a/spec/html2rss/configs/canarianweekly.com/front.yml_spec.rb +++ b/spec/html2rss/configs/canarianweekly.com/front.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'canarianweekly.com/front.yml' do - include_examples 'config.yml', 'canarianweekly.com/front.yml' + it_behaves_like 'config.yml', 'canarianweekly.com/front.yml' end diff --git a/spec/html2rss/configs/cinemascore.com/index.yml_spec.rb b/spec/html2rss/configs/cinemascore.com/index.yml_spec.rb index 269ac3e..f4149b6 100644 --- a/spec/html2rss/configs/cinemascore.com/index.yml_spec.rb +++ b/spec/html2rss/configs/cinemascore.com/index.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'cinemascore.com/index.yml' do - include_examples 'config.yml', 'cinemascore.com/index.yml' + it_behaves_like 'config.yml', 'cinemascore.com/index.yml' end diff --git a/spec/html2rss/configs/cleanenergywire.org/news.yml_spec.rb b/spec/html2rss/configs/cleanenergywire.org/news.yml_spec.rb index 949b680..313d0d5 100644 --- a/spec/html2rss/configs/cleanenergywire.org/news.yml_spec.rb +++ b/spec/html2rss/configs/cleanenergywire.org/news.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'cleanenergywire.org/news.yml' do - include_examples 'config.yml', 'cleanenergywire.org/news.yml' + it_behaves_like 'config.yml', 'cleanenergywire.org/news.yml' end diff --git a/spec/html2rss/configs/cnet.com/section_sub.yml_spec.rb b/spec/html2rss/configs/cnet.com/section_sub.yml_spec.rb index 8dceaf9..7940c97 100644 --- a/spec/html2rss/configs/cnet.com/section_sub.yml_spec.rb +++ b/spec/html2rss/configs/cnet.com/section_sub.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'cnet.com/section_sub.yml' do - include_examples 'config.yml', 'cnet.com/section_sub.yml', section: 'culture', sub: 'internet' + it_behaves_like 'config.yml', 'cnet.com/section_sub.yml', section: 'culture', sub: 'internet' end diff --git a/spec/html2rss/configs/computerbase.de/meistgelesen.yml_spec.rb b/spec/html2rss/configs/computerbase.de/meistgelesen.yml_spec.rb index 2ede28e..fb34173 100644 --- a/spec/html2rss/configs/computerbase.de/meistgelesen.yml_spec.rb +++ b/spec/html2rss/configs/computerbase.de/meistgelesen.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'computerbase.de/meistgelesen.yml' do - include_examples 'config.yml', 'computerbase.de/meistgelesen.yml' + it_behaves_like 'config.yml', 'computerbase.de/meistgelesen.yml' end diff --git a/spec/html2rss/configs/cutle.fish/index.yml_spec.rb b/spec/html2rss/configs/cutle.fish/index.yml_spec.rb index fc46aac..d8f0aeb 100644 --- a/spec/html2rss/configs/cutle.fish/index.yml_spec.rb +++ b/spec/html2rss/configs/cutle.fish/index.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'cutle.fish/index.yml' do - include_examples 'config.yml', 'cutle.fish/index.yml' + it_behaves_like 'config.yml', 'cutle.fish/index.yml' end diff --git a/spec/html2rss/configs/deraktionaer.de/meistgelesen.yml_spec.rb b/spec/html2rss/configs/deraktionaer.de/meistgelesen.yml_spec.rb index b8cf0b1..695a082 100644 --- a/spec/html2rss/configs/deraktionaer.de/meistgelesen.yml_spec.rb +++ b/spec/html2rss/configs/deraktionaer.de/meistgelesen.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'deraktionaer.de/meistgelesen.yml' do - include_examples 'config.yml', 'deraktionaer.de/meistgelesen.yml' + it_behaves_like 'config.yml', 'deraktionaer.de/meistgelesen.yml' end diff --git a/spec/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml_spec.rb b/spec/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml_spec.rb index baab382..ddeb4e1 100644 --- a/spec/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml_spec.rb +++ b/spec/html2rss/configs/developer.apple.com/tutorials_data_documentation_technotes_json.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'developer.apple.com/tutorials_data_documentation_technotes_json.yml' do - include_examples 'config.yml', 'developer.apple.com/tutorials_data_documentation_technotes_json.yml' + it_behaves_like 'config.yml', 'developer.apple.com/tutorials_data_documentation_technotes_json.yml' end diff --git a/spec/html2rss/configs/dfs.de/pressemitteilungen.yml_spec.rb b/spec/html2rss/configs/dfs.de/pressemitteilungen.yml_spec.rb index bc5c79f..750426e 100644 --- a/spec/html2rss/configs/dfs.de/pressemitteilungen.yml_spec.rb +++ b/spec/html2rss/configs/dfs.de/pressemitteilungen.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'dfs.de/pressemitteilungen.yml' do - include_examples 'config.yml', 'dfs.de/pressemitteilungen.yml' + it_behaves_like 'config.yml', 'dfs.de/pressemitteilungen.yml' end diff --git a/spec/html2rss/configs/dsw-info.de/presse.yml_spec.rb b/spec/html2rss/configs/dsw-info.de/presse.yml_spec.rb index 6933f57..99eb736 100644 --- a/spec/html2rss/configs/dsw-info.de/presse.yml_spec.rb +++ b/spec/html2rss/configs/dsw-info.de/presse.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'dsw-info.de/presse.yml' do - include_examples 'config.yml', 'dsw-info.de/presse.yml' + it_behaves_like 'config.yml', 'dsw-info.de/presse.yml' end diff --git a/spec/html2rss/configs/espn.com/f1.yml_spec.rb b/spec/html2rss/configs/espn.com/f1.yml_spec.rb index 5548d20..50ebed5 100644 --- a/spec/html2rss/configs/espn.com/f1.yml_spec.rb +++ b/spec/html2rss/configs/espn.com/f1.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'espn.com/f1.yml' do - include_examples 'config.yml', 'espn.com/f1.yml' + it_behaves_like 'config.yml', 'espn.com/f1.yml' end diff --git a/spec/html2rss/configs/fia.com/documents.yml_spec.rb b/spec/html2rss/configs/fia.com/documents.yml_spec.rb index e0dab6e..ae6ad64 100644 --- a/spec/html2rss/configs/fia.com/documents.yml_spec.rb +++ b/spec/html2rss/configs/fia.com/documents.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'fia.com/documents.yml' do - include_examples 'config.yml', 'fia.com/documents.yml' + it_behaves_like 'config.yml', 'fia.com/documents.yml' end diff --git a/spec/html2rss/configs/formula1.com/latest.yml_spec.rb b/spec/html2rss/configs/formula1.com/latest.yml_spec.rb index 4ce7a8a..8841bd7 100644 --- a/spec/html2rss/configs/formula1.com/latest.yml_spec.rb +++ b/spec/html2rss/configs/formula1.com/latest.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'formula1.com/latest.yml' do - include_examples 'config.yml', 'formula1.com/latest.yml' + it_behaves_like 'config.yml', 'formula1.com/latest.yml' end diff --git a/spec/html2rss/configs/github.com/releases.yml_spec.rb b/spec/html2rss/configs/github.com/releases.yml_spec.rb index 9fa2f87..319ee6a 100644 --- a/spec/html2rss/configs/github.com/releases.yml_spec.rb +++ b/spec/html2rss/configs/github.com/releases.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'github.com/releases.yml' do - include_examples 'config.yml', 'github.com/releases.yml', username: 'nuxt', repository: 'nuxt.js' + it_behaves_like 'config.yml', 'github.com/releases.yml', username: 'nuxt', repository: 'nuxt.js' end diff --git a/spec/html2rss/configs/iaapa.org/news.yml_spec.rb b/spec/html2rss/configs/iaapa.org/news.yml_spec.rb index fb39cd3..0736743 100644 --- a/spec/html2rss/configs/iaapa.org/news.yml_spec.rb +++ b/spec/html2rss/configs/iaapa.org/news.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'iaapa.org/news.yml' do - include_examples 'config.yml', 'iaapa.org/news.yml' + it_behaves_like 'config.yml', 'iaapa.org/news.yml' end diff --git a/spec/html2rss/configs/imdb.com/ratings.yml_spec.rb b/spec/html2rss/configs/imdb.com/ratings.yml_spec.rb index f6d9346..b233986 100644 --- a/spec/html2rss/configs/imdb.com/ratings.yml_spec.rb +++ b/spec/html2rss/configs/imdb.com/ratings.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'imdb.com/ratings.yml' do - include_examples 'config.yml', 'imdb.com/ratings.yml', user_id: 'ur7019649' + it_behaves_like 'config.yml', 'imdb.com/ratings.yml', user_id: 'ur7019649' end diff --git a/spec/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml_spec.rb b/spec/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml_spec.rb index 2e2bcc3..4a1ea47 100644 --- a/spec/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml_spec.rb +++ b/spec/html2rss/configs/ingenieur.de/karriere_arbeitsleben_heiko_mell.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'ingenieur.de/karriere_arbeitsleben_heiko_mell.yml' do - include_examples 'config.yml', 'ingenieur.de/karriere_arbeitsleben_heiko_mell.yml' + it_behaves_like 'config.yml', 'ingenieur.de/karriere_arbeitsleben_heiko_mell.yml' end diff --git a/spec/html2rss/configs/kinocheck.de/filmstarts.yml_spec.rb b/spec/html2rss/configs/kinocheck.de/filmstarts.yml_spec.rb index a128cbe..24bec98 100644 --- a/spec/html2rss/configs/kinocheck.de/filmstarts.yml_spec.rb +++ b/spec/html2rss/configs/kinocheck.de/filmstarts.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'kinocheck.de/filmstarts.yml' do - include_examples 'config.yml', 'kinocheck.de/filmstarts.yml' + it_behaves_like 'config.yml', 'kinocheck.de/filmstarts.yml' end diff --git a/spec/html2rss/configs/newyorker.com/magazine.yml_spec.rb b/spec/html2rss/configs/newyorker.com/magazine.yml_spec.rb index 8d93ab7..ddf84ba 100644 --- a/spec/html2rss/configs/newyorker.com/magazine.yml_spec.rb +++ b/spec/html2rss/configs/newyorker.com/magazine.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'newyorker.com/magazine.yml' do - include_examples 'config.yml', 'newyorker.com/magazine.yml' + it_behaves_like 'config.yml', 'newyorker.com/magazine.yml' end diff --git a/spec/html2rss/configs/nomanssky.com/news.yml_spec.rb b/spec/html2rss/configs/nomanssky.com/news.yml_spec.rb index c697ee8..260600b 100644 --- a/spec/html2rss/configs/nomanssky.com/news.yml_spec.rb +++ b/spec/html2rss/configs/nomanssky.com/news.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'nomanssky.com/news.yml' do - include_examples 'config.yml', 'nomanssky.com/news.yml' + it_behaves_like 'config.yml', 'nomanssky.com/news.yml' end diff --git a/spec/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml_spec.rb b/spec/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml_spec.rb index 720d901..07eeb3c 100644 --- a/spec/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml_spec.rb +++ b/spec/html2rss/configs/pankow.lebensmittel-kontrollergebnisse.de/search.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'pankow.lebensmittel-kontrollergebnisse.de/search.yml' do - include_examples 'config.yml', 'pankow.lebensmittel-kontrollergebnisse.de/search.yml' + it_behaves_like 'config.yml', 'pankow.lebensmittel-kontrollergebnisse.de/search.yml' end diff --git a/spec/html2rss/configs/phys.org/weekly.yml_spec.rb b/spec/html2rss/configs/phys.org/weekly.yml_spec.rb index dd9340c..6392571 100644 --- a/spec/html2rss/configs/phys.org/weekly.yml_spec.rb +++ b/spec/html2rss/configs/phys.org/weekly.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'phys.org/weekly.yml' do - include_examples 'config.yml', 'phys.org/weekly.yml' + it_behaves_like 'config.yml', 'phys.org/weekly.yml' end diff --git a/spec/html2rss/configs/rbb24.de/meistgeklickt.yml_spec.rb b/spec/html2rss/configs/rbb24.de/meistgeklickt.yml_spec.rb index 10a434a..2c3211d 100644 --- a/spec/html2rss/configs/rbb24.de/meistgeklickt.yml_spec.rb +++ b/spec/html2rss/configs/rbb24.de/meistgeklickt.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'rbb24.de/meistgeklickt.yml' do - include_examples 'config.yml', 'rbb24.de/meistgeklickt.yml' + it_behaves_like 'config.yml', 'rbb24.de/meistgeklickt.yml' end diff --git a/spec/html2rss/configs/robinwood.de/aktuelles.yml_spec.rb b/spec/html2rss/configs/robinwood.de/aktuelles.yml_spec.rb index 40cb264..86ae0e8 100644 --- a/spec/html2rss/configs/robinwood.de/aktuelles.yml_spec.rb +++ b/spec/html2rss/configs/robinwood.de/aktuelles.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'robinwood.de/aktuelles.yml' do - include_examples 'config.yml', 'robinwood.de/aktuelles.yml' + it_behaves_like 'config.yml', 'robinwood.de/aktuelles.yml' end diff --git a/spec/html2rss/configs/s3.amazonaws.com/popular_movies.yml_spec.rb b/spec/html2rss/configs/s3.amazonaws.com/popular_movies.yml_spec.rb index 937b916..045b767 100644 --- a/spec/html2rss/configs/s3.amazonaws.com/popular_movies.yml_spec.rb +++ b/spec/html2rss/configs/s3.amazonaws.com/popular_movies.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 's3.amaoznaws.com/popular_movies.yml' do - include_examples 'config.yml', 's3.amazonaws.com/popular_movies.yml' + it_behaves_like 'config.yml', 's3.amazonaws.com/popular_movies.yml' end diff --git a/spec/html2rss/configs/sebastianvettel.de/news.yml_spec.rb b/spec/html2rss/configs/sebastianvettel.de/news.yml_spec.rb index 0eb24bf..4faaafb 100644 --- a/spec/html2rss/configs/sebastianvettel.de/news.yml_spec.rb +++ b/spec/html2rss/configs/sebastianvettel.de/news.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'sebastianvettel.de/news.yml' do - include_examples 'config.yml', 'sebastianvettel.de/news.yml' + it_behaves_like 'config.yml', 'sebastianvettel.de/news.yml' end diff --git a/spec/html2rss/configs/softwareleadweekly.com/issues.yml_spec.rb b/spec/html2rss/configs/softwareleadweekly.com/issues.yml_spec.rb index e90354b..59191df 100644 --- a/spec/html2rss/configs/softwareleadweekly.com/issues.yml_spec.rb +++ b/spec/html2rss/configs/softwareleadweekly.com/issues.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'softwareleadweekly.com/issues.yml' do - include_examples 'config.yml', 'softwareleadweekly.com/issues.yml' + it_behaves_like 'config.yml', 'softwareleadweekly.com/issues.yml' end diff --git a/spec/html2rss/configs/solarthermalworld.org/news.yml_spec.rb b/spec/html2rss/configs/solarthermalworld.org/news.yml_spec.rb index 9a12b9d..65e3df0 100644 --- a/spec/html2rss/configs/solarthermalworld.org/news.yml_spec.rb +++ b/spec/html2rss/configs/solarthermalworld.org/news.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'solarthermalworld.org/news.yml' do - include_examples 'config.yml', 'solarthermalworld.org/news.yml' + it_behaves_like 'config.yml', 'solarthermalworld.org/news.yml' end diff --git a/spec/html2rss/configs/spektrum.de/meistgelesen.yml_spec.rb b/spec/html2rss/configs/spektrum.de/meistgelesen.yml_spec.rb index 83656b2..7b963e4 100644 --- a/spec/html2rss/configs/spektrum.de/meistgelesen.yml_spec.rb +++ b/spec/html2rss/configs/spektrum.de/meistgelesen.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'spektrum.de/meistgelesen.yml' do - include_examples 'config.yml', 'spektrum.de/meistgelesen.yml' + it_behaves_like 'config.yml', 'spektrum.de/meistgelesen.yml' end diff --git a/spec/html2rss/configs/spiegel.de/impressum_autor.yml_spec.rb b/spec/html2rss/configs/spiegel.de/impressum_autor.yml_spec.rb index 5bb5d8d..db7272b 100644 --- a/spec/html2rss/configs/spiegel.de/impressum_autor.yml_spec.rb +++ b/spec/html2rss/configs/spiegel.de/impressum_autor.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'spiegel.de/impressum_autor.yml' do - include_examples 'config.yml', 'spiegel.de/impressum_autor.yml', id: '975b6ae0-0001-0003-0000-000000018282' + it_behaves_like 'config.yml', 'spiegel.de/impressum_autor.yml', id: '975b6ae0-0001-0003-0000-000000018282' end diff --git a/spec/html2rss/configs/stackoverflow.com/hot_network_questions.yml_spec.rb b/spec/html2rss/configs/stackoverflow.com/hot_network_questions.yml_spec.rb index e4967b0..b705193 100644 --- a/spec/html2rss/configs/stackoverflow.com/hot_network_questions.yml_spec.rb +++ b/spec/html2rss/configs/stackoverflow.com/hot_network_questions.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'stackoverflow.com/hot_network_questions.yml' do - include_examples 'config.yml', 'stackoverflow.com/hot_network_questions.yml' + it_behaves_like 'config.yml', 'stackoverflow.com/hot_network_questions.yml' end diff --git a/spec/html2rss/configs/steuerzahler.de/news.yml_spec.rb b/spec/html2rss/configs/steuerzahler.de/news.yml_spec.rb index f1d9700..b7400c8 100644 --- a/spec/html2rss/configs/steuerzahler.de/news.yml_spec.rb +++ b/spec/html2rss/configs/steuerzahler.de/news.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'steuerzahler.de/news.yml' do - include_examples 'config.yml', 'steuerzahler.de/news.yml' + it_behaves_like 'config.yml', 'steuerzahler.de/news.yml' end diff --git a/spec/html2rss/configs/stripes.com/index.yml_spec.rb b/spec/html2rss/configs/stripes.com/index.yml_spec.rb index ea0877a..281c8d5 100644 --- a/spec/html2rss/configs/stripes.com/index.yml_spec.rb +++ b/spec/html2rss/configs/stripes.com/index.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'stripes.com/index.yml' do - include_examples 'config.yml', 'stripes.com/index.yml' + it_behaves_like 'config.yml', 'stripes.com/index.yml' end diff --git a/spec/html2rss/configs/support.apple.com/en_gb_ht201222.yml_spec.rb b/spec/html2rss/configs/support.apple.com/en_gb_ht201222.yml_spec.rb index 566b89c..a17f5ef 100644 --- a/spec/html2rss/configs/support.apple.com/en_gb_ht201222.yml_spec.rb +++ b/spec/html2rss/configs/support.apple.com/en_gb_ht201222.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'support.apple.com/en_gb_ht201222.yml' do - include_examples 'config.yml', 'support.apple.com/en_gb_ht201222.yml' + it_behaves_like 'config.yml', 'support.apple.com/en_gb_ht201222.yml' end diff --git a/spec/html2rss/configs/support.apple.com/exchange_repair.yml_spec.rb b/spec/html2rss/configs/support.apple.com/exchange_repair.yml_spec.rb index 57f1df7..36a3f49 100644 --- a/spec/html2rss/configs/support.apple.com/exchange_repair.yml_spec.rb +++ b/spec/html2rss/configs/support.apple.com/exchange_repair.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'support.apple.com/exchange_repair.yml' do - include_examples 'config.yml', 'support.apple.com/exchange_repair.yml' + it_behaves_like 'config.yml', 'support.apple.com/exchange_repair.yml' end diff --git a/spec/html2rss/configs/teneriffa-news.com/news.yml_spec.rb b/spec/html2rss/configs/teneriffa-news.com/news.yml_spec.rb index ba4bb60..5eeda50 100644 --- a/spec/html2rss/configs/teneriffa-news.com/news.yml_spec.rb +++ b/spec/html2rss/configs/teneriffa-news.com/news.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'teneriffa-news.com/news.yml' do - include_examples 'config.yml', 'teneriffa-news.com/news.yml' + it_behaves_like 'config.yml', 'teneriffa-news.com/news.yml' end diff --git a/spec/html2rss/configs/test.de/archiv.yml_spec.rb b/spec/html2rss/configs/test.de/archiv.yml_spec.rb index 7b151db..36847a8 100644 --- a/spec/html2rss/configs/test.de/archiv.yml_spec.rb +++ b/spec/html2rss/configs/test.de/archiv.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'test.de/archiv.yml' do - include_examples 'config.yml', 'test.de/archiv.yml' + it_behaves_like 'config.yml', 'test.de/archiv.yml' end diff --git a/spec/html2rss/configs/theguardian.com/international_mostpopular.yml_spec.rb b/spec/html2rss/configs/theguardian.com/international_mostpopular.yml_spec.rb index 094a7ed..d535a0e 100644 --- a/spec/html2rss/configs/theguardian.com/international_mostpopular.yml_spec.rb +++ b/spec/html2rss/configs/theguardian.com/international_mostpopular.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'theguardian.com/international_mostpopular.yml' do - include_examples 'config.yml', 'theguardian.com/international_mostpopular.yml' + it_behaves_like 'config.yml', 'theguardian.com/international_mostpopular.yml' end diff --git a/spec/html2rss/configs/thoughtworks.com/insights.yml_spec.rb b/spec/html2rss/configs/thoughtworks.com/insights.yml_spec.rb index 21de5d4..9832aa5 100644 --- a/spec/html2rss/configs/thoughtworks.com/insights.yml_spec.rb +++ b/spec/html2rss/configs/thoughtworks.com/insights.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'thoughtworks.com/insights.yml' do - include_examples 'config.yml', 'thoughtworks.com/insights.yml' + it_behaves_like 'config.yml', 'thoughtworks.com/insights.yml' end diff --git a/spec/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml_spec.rb b/spec/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml_spec.rb index bfd376e..04ad77c 100644 --- a/spec/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml_spec.rb +++ b/spec/html2rss/configs/tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml' do - include_examples 'config.yml', 'tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml' + it_behaves_like 'config.yml', 'tourismusnetzwerk-brandenburg.de/aktuelle_nachrichten.yml' end diff --git a/spec/html2rss/configs/webentwickler-jobs.de/in.yml_spec.rb b/spec/html2rss/configs/webentwickler-jobs.de/in.yml_spec.rb index 3b88942..c7bd2d3 100644 --- a/spec/html2rss/configs/webentwickler-jobs.de/in.yml_spec.rb +++ b/spec/html2rss/configs/webentwickler-jobs.de/in.yml_spec.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true RSpec.describe 'webentwickler-jobs.de/in.yml' do - include_examples 'config.yml', 'webentwickler-jobs.de/in.yml', region: 'berlin' + it_behaves_like 'config.yml', 'webentwickler-jobs.de/in.yml', region: 'berlin' end From 8addc84e0c751f23b76c25e56cf0eb9705166a60 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Thu, 29 May 2025 14:17:02 +0200 Subject: [PATCH 10/11] spec: prevent frozen string error --- spec/spec_helper.rb | 2 ++ .../shared_examples/config.yml_spec.rb | 24 +++++++------------ 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 3cba95f..0c977d5 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -7,6 +7,8 @@ Dir['./spec/support/**/*.rb'].each { |f| require f } +Zeitwerk::Loader.eager_load_all + RSpec.configure do |config| # Enable flags like --only-failures and --next-failure config.example_status_persistence_file_path = '.rspec_status' diff --git a/spec/support/shared_examples/config.yml_spec.rb b/spec/support/shared_examples/config.yml_spec.rb index fd170a2..1f556a7 100644 --- a/spec/support/shared_examples/config.yml_spec.rb +++ b/spec/support/shared_examples/config.yml_spec.rb @@ -12,7 +12,12 @@ let(:global_config) do { 'headers' => { - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:67.0) Gecko/20100101 Firefox/67.0' + 'User-Agent': <<~UA.delete("\n") + Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) + AppleWebKit/537.36 (KHTML, like Gecko) + Chrome/134.0.0.0 + Safari/537.36' + UA } } end @@ -20,7 +25,7 @@ feed_name = file_path.split(File::Separator)[-2..].join(File::Separator) config = {}.merge Html2rss::Configs.find_by_name(feed_name) - config.merge!(global_config) + config.merge!(global_config.dup) config[:params] = params if params config end @@ -63,17 +68,6 @@ end end - context 'with sanitize_html post_processor' do - it 'is used for description selector' do - if (description_selector = yaml['selectors']['description']) - post_processors = [description_selector['post_process']].flatten.compact - sanitize_html = post_processors.select { |p| p['name'] == 'sanitize_html' } - - expect(sanitize_html).not_to be_nil - end - end - end - context 'with template post_processor' do it 'references available selectors only', :aggregate_failures do Helper.referenced_selectors_in_template(yaml['selectors']).each do |referenced_selector| @@ -97,7 +91,7 @@ end context "when fetching #{params}", :fetch do - subject(:feed) { Html2rss.feed(config) } + subject(:feed) { Html2rss.feed(config.dup) } it 'has positive amount of items' do expect(feed.items.count).to be_positive, <<~MSG @@ -115,7 +109,7 @@ context "when fetching #{params} / item", :fetch do subject(:item) do - items = Html2rss.feed(config).items + items = Html2rss.feed(config.dup).items expect(items.count).not_to be_zero, "Zero items fetched for `#{file_name}`" From cc6f61f4ac6259337f174f8fc573d9a7fc706d45 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Thu, 29 May 2025 14:44:05 +0200 Subject: [PATCH 11/11] feat: sanitize_html is called automatically for description --- lib/html2rss/configs/dsw-info.de/presse.yml | 2 -- lib/html2rss/configs/fia.com/documents.yml | 2 -- lib/html2rss/configs/sebastianvettel.de/news.yml | 2 -- 3 files changed, 6 deletions(-) diff --git a/lib/html2rss/configs/dsw-info.de/presse.yml b/lib/html2rss/configs/dsw-info.de/presse.yml index a8e5d05..afab1ec 100644 --- a/lib/html2rss/configs/dsw-info.de/presse.yml +++ b/lib/html2rss/configs/dsw-info.de/presse.yml @@ -17,5 +17,3 @@ selectors: - name: parse_time description: selector: ".bt small" - post_process: - - name: "sanitize_html" diff --git a/lib/html2rss/configs/fia.com/documents.yml b/lib/html2rss/configs/fia.com/documents.yml index f44e392..4b085c7 100644 --- a/lib/html2rss/configs/fia.com/documents.yml +++ b/lib/html2rss/configs/fia.com/documents.yml @@ -14,5 +14,3 @@ selectors: description: selector: a extractor: html - post_process: - - name: sanitize_html diff --git a/lib/html2rss/configs/sebastianvettel.de/news.yml b/lib/html2rss/configs/sebastianvettel.de/news.yml index f368a33..be4deb2 100644 --- a/lib/html2rss/configs/sebastianvettel.de/news.yml +++ b/lib/html2rss/configs/sebastianvettel.de/news.yml @@ -12,5 +12,3 @@ selectors: description: selector: ".row-text-bild-modul" extractor: html - post_process: - - name: sanitize_html