From 535ad80c29bf27da29a41a302d07dedc13bbd23a Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Fri, 2 Jan 2026 14:26:43 +0100 Subject: [PATCH 1/2] dev: relax config folder convention for subdomains to allow top-level --- Gemfile | 1 + Gemfile.lock | 5 ++++ README.md | 2 ++ spec/helper_spec.rb | 29 +++++++++++++++++++ spec/support/helper.rb | 27 +++++++++++++++-- .../shared_examples/config.yml_spec.rb | 9 +++--- 6 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 spec/helper_spec.rb diff --git a/Gemfile b/Gemfile index 5795448..be260aa 100644 --- a/Gemfile +++ b/Gemfile @@ -11,6 +11,7 @@ group :development do gem 'html2rss-generator', github: 'html2rss/generator', branch: :main gem 'nokogiri' + gem 'public_suffix' gem 'rspec', '~> 3.0' gem 'rubocop' gem 'rubocop-performance' diff --git a/Gemfile.lock b/Gemfile.lock index 7017eb5..aa39ddd 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -111,8 +111,12 @@ GEM logger mime-types-data (~> 3.2025, >= 3.2025.0507) mime-types-data (3.2025.0924) + mini_portile2 (2.8.9) net-http (0.9.1) uri (>= 0.11.1) + nokogiri (1.18.8) + mini_portile2 (~> 2.8.2) + racc (~> 1.4) nokogiri (1.18.8-arm64-darwin) racc (~> 1.4) nokogiri (1.18.8-x86_64-darwin) @@ -225,6 +229,7 @@ DEPENDENCIES html2rss-configs! html2rss-generator! nokogiri + public_suffix rspec (~> 3.0) rubocop rubocop-performance diff --git a/README.md b/README.md index ed18d98..9610a96 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,8 @@ make test-domain DOMAIN=github.com **Adding new configs**: Just create the YAML file and run tests. No spec file needed. +**Config folder convention**: Place configs under the registrable domain folder (e.g., `example.com/` or `bbc.co.uk/`). Legacy subdomain folders (e.g., `news.example.com/`) are allowed but not preferred. + ## Documentation - [Main Documentation](https://html2rss.github.io/html2rss-configs/) diff --git a/spec/helper_spec.rb b/spec/helper_spec.rb new file mode 100644 index 0000000..01a1aa6 --- /dev/null +++ b/spec/helper_spec.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +RSpec.describe Helper do + describe '.url_to_registrable_domain' do + it 'collapses subdomains to the registrable domain' do + expect(described_class.url_to_registrable_domain('https://blog.example.com/posts')).to eq('example.com') + end + + it 'keeps multi-part TLDs intact for registrable domain' do + expect(described_class.url_to_registrable_domain('https://news.bbc.co.uk/world')).to eq('bbc.co.uk') + end + + it 'preserves single-host domains' do + expect(described_class.url_to_registrable_domain('https://example.com')).to eq('example.com') + end + + it 'returns nil for blank or invalid URLs', :aggregate_failures do + expect(described_class.url_to_registrable_domain(nil)).to be_nil + expect(described_class.url_to_registrable_domain('')).to be_nil + expect(described_class.url_to_registrable_domain('not a url')).to be_nil + end + end + + describe '.url_to_host_name' do + it 'returns the full host' do + expect(described_class.url_to_host_name('https://news.bbc.co.uk/world')).to eq('news.bbc.co.uk') + end + end +end diff --git a/spec/support/helper.rb b/spec/support/helper.rb index 76ccc32..d01a651 100644 --- a/spec/support/helper.rb +++ b/spec/support/helper.rb @@ -3,7 +3,7 @@ require 'json' require 'nokogiri' require 'yaml' -require 'uri' +require 'public_suffix' ## # A collection of helper methods. @@ -11,8 +11,29 @@ module Helper ## # @param url [String] # @return [String] - def self.url_to_directory_name(url) - URI(url.split('/')[0..2].join('/')).host.gsub(/^(api|www|webapp)\./, '') + def self.url_to_registrable_domain(url) + host = url_to_host_name(url) + return host unless host + + registrable_domain(host) + end + + ## + # @param url [String] + # @return [String, nil] + def self.url_to_host_name(url) + Html2rss::Url.for_channel(url)&.host + rescue ArgumentError + nil + end + + ## + # @param host [String] + # @return [String] + def self.registrable_domain(host) + PublicSuffix.domain(host) || host + rescue PublicSuffix::DomainInvalid + host end ## diff --git a/spec/support/shared_examples/config.yml_spec.rb b/spec/support/shared_examples/config.yml_spec.rb index fa57427..0b7a788 100644 --- a/spec/support/shared_examples/config.yml_spec.rb +++ b/spec/support/shared_examples/config.yml_spec.rb @@ -41,16 +41,17 @@ config end - context 'with the file' do - let(:host_name) { Helper.url_to_directory_name yaml['channel']['url'] } + context 'with the file' do # rubocop:disable RSpec/MultipleMemoizedHelpers + let(:host_name) { Helper.url_to_host_name yaml['channel']['url'] } + let(:domain_name) { Helper.url_to_registrable_domain yaml['channel']['url'] } let(:dirname) { File.dirname(file_path).split(File::Separator).last } it 'is parseable' do expect { yaml }.not_to raise_error end - it "resides in a folder named after channel.url's host" do - expect(dirname).to eq(host_name) + it "resides in a folder named after channel.url's host or domain" do + expect([domain_name, host_name]).to include(dirname) end end From 25f50e8db2b292f83ad9fdb83693e665b3d2d4f3 Mon Sep 17 00:00:00 2001 From: Gil Desmarais Date: Fri, 2 Jan 2026 14:43:02 +0100 Subject: [PATCH 2/2] fix: yard docs & add missing --- spec/helper_spec.rb | 27 +++++++++++++++++++++++++++ spec/support/helper.rb | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/spec/helper_spec.rb b/spec/helper_spec.rb index 01a1aa6..05527c1 100644 --- a/spec/helper_spec.rb +++ b/spec/helper_spec.rb @@ -25,5 +25,32 @@ it 'returns the full host' do expect(described_class.url_to_host_name('https://news.bbc.co.uk/world')).to eq('news.bbc.co.uk') end + + it 'returns nil for blank or invalid URLs', :aggregate_failures do + expect(described_class.url_to_host_name(nil)).to be_nil + expect(described_class.url_to_host_name('')).to be_nil + expect(described_class.url_to_host_name('not a url')).to be_nil + end + end + + describe 'legacy naming guardrail' do + it 'does not expose url_to_directory_name' do + expect(described_class).not_to respond_to(:url_to_directory_name) + end + end + + describe '.registrable_domain' do + it 'falls back to host when PublicSuffix returns nil' do + allow(PublicSuffix).to receive(:domain).with('example.local').and_return(nil) + + expect(described_class.send(:registrable_domain, 'example.local')).to eq('example.local') + end + + it 'falls back to host when PublicSuffix raises DomainInvalid' do + allow(PublicSuffix).to receive(:domain).with('invalid..host') + .and_raise(PublicSuffix::DomainInvalid) + + expect(described_class.send(:registrable_domain, 'invalid..host')).to eq('invalid..host') + end end end diff --git a/spec/support/helper.rb b/spec/support/helper.rb index d01a651..e97e88a 100644 --- a/spec/support/helper.rb +++ b/spec/support/helper.rb @@ -10,7 +10,7 @@ module Helper ## # @param url [String] - # @return [String] + # @return [String, nil] def self.url_to_registrable_domain(url) host = url_to_host_name(url) return host unless host