diff --git a/Gemfile b/Gemfile index 5795448..be260aa 100644 --- a/Gemfile +++ b/Gemfile @@ -11,6 +11,7 @@ group :development do gem 'html2rss-generator', github: 'html2rss/generator', branch: :main gem 'nokogiri' + gem 'public_suffix' gem 'rspec', '~> 3.0' gem 'rubocop' gem 'rubocop-performance' diff --git a/Gemfile.lock b/Gemfile.lock index 7017eb5..aa39ddd 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -111,8 +111,12 @@ GEM logger mime-types-data (~> 3.2025, >= 3.2025.0507) mime-types-data (3.2025.0924) + mini_portile2 (2.8.9) net-http (0.9.1) uri (>= 0.11.1) + nokogiri (1.18.8) + mini_portile2 (~> 2.8.2) + racc (~> 1.4) nokogiri (1.18.8-arm64-darwin) racc (~> 1.4) nokogiri (1.18.8-x86_64-darwin) @@ -225,6 +229,7 @@ DEPENDENCIES html2rss-configs! html2rss-generator! nokogiri + public_suffix rspec (~> 3.0) rubocop rubocop-performance diff --git a/README.md b/README.md index ed18d98..9610a96 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,8 @@ make test-domain DOMAIN=github.com **Adding new configs**: Just create the YAML file and run tests. No spec file needed. +**Config folder convention**: Place configs under the registrable domain folder (e.g., `example.com/` or `bbc.co.uk/`). Legacy subdomain folders (e.g., `news.example.com/`) are allowed but not preferred. + ## Documentation - [Main Documentation](https://html2rss.github.io/html2rss-configs/) diff --git a/spec/helper_spec.rb b/spec/helper_spec.rb new file mode 100644 index 0000000..05527c1 --- /dev/null +++ b/spec/helper_spec.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +RSpec.describe Helper do + describe '.url_to_registrable_domain' do + it 'collapses subdomains to the registrable domain' do + expect(described_class.url_to_registrable_domain('https://blog.example.com/posts')).to eq('example.com') + end + + it 'keeps multi-part TLDs intact for registrable domain' do + expect(described_class.url_to_registrable_domain('https://news.bbc.co.uk/world')).to eq('bbc.co.uk') + end + + it 'preserves single-host domains' do + expect(described_class.url_to_registrable_domain('https://example.com')).to eq('example.com') + end + + it 'returns nil for blank or invalid URLs', :aggregate_failures do + expect(described_class.url_to_registrable_domain(nil)).to be_nil + expect(described_class.url_to_registrable_domain('')).to be_nil + expect(described_class.url_to_registrable_domain('not a url')).to be_nil + end + end + + describe '.url_to_host_name' do + it 'returns the full host' do + expect(described_class.url_to_host_name('https://news.bbc.co.uk/world')).to eq('news.bbc.co.uk') + end + + it 'returns nil for blank or invalid URLs', :aggregate_failures do + expect(described_class.url_to_host_name(nil)).to be_nil + expect(described_class.url_to_host_name('')).to be_nil + expect(described_class.url_to_host_name('not a url')).to be_nil + end + end + + describe 'legacy naming guardrail' do + it 'does not expose url_to_directory_name' do + expect(described_class).not_to respond_to(:url_to_directory_name) + end + end + + describe '.registrable_domain' do + it 'falls back to host when PublicSuffix returns nil' do + allow(PublicSuffix).to receive(:domain).with('example.local').and_return(nil) + + expect(described_class.send(:registrable_domain, 'example.local')).to eq('example.local') + end + + it 'falls back to host when PublicSuffix raises DomainInvalid' do + allow(PublicSuffix).to receive(:domain).with('invalid..host') + .and_raise(PublicSuffix::DomainInvalid) + + expect(described_class.send(:registrable_domain, 'invalid..host')).to eq('invalid..host') + end + end +end diff --git a/spec/support/helper.rb b/spec/support/helper.rb index 76ccc32..e97e88a 100644 --- a/spec/support/helper.rb +++ b/spec/support/helper.rb @@ -3,16 +3,37 @@ require 'json' require 'nokogiri' require 'yaml' -require 'uri' +require 'public_suffix' ## # A collection of helper methods. module Helper ## # @param url [String] + # @return [String, nil] + def self.url_to_registrable_domain(url) + host = url_to_host_name(url) + return host unless host + + registrable_domain(host) + end + + ## + # @param url [String] + # @return [String, nil] + def self.url_to_host_name(url) + Html2rss::Url.for_channel(url)&.host + rescue ArgumentError + nil + end + + ## + # @param host [String] # @return [String] - def self.url_to_directory_name(url) - URI(url.split('/')[0..2].join('/')).host.gsub(/^(api|www|webapp)\./, '') + def self.registrable_domain(host) + PublicSuffix.domain(host) || host + rescue PublicSuffix::DomainInvalid + host end ## diff --git a/spec/support/shared_examples/config.yml_spec.rb b/spec/support/shared_examples/config.yml_spec.rb index fa57427..0b7a788 100644 --- a/spec/support/shared_examples/config.yml_spec.rb +++ b/spec/support/shared_examples/config.yml_spec.rb @@ -41,16 +41,17 @@ config end - context 'with the file' do - let(:host_name) { Helper.url_to_directory_name yaml['channel']['url'] } + context 'with the file' do # rubocop:disable RSpec/MultipleMemoizedHelpers + let(:host_name) { Helper.url_to_host_name yaml['channel']['url'] } + let(:domain_name) { Helper.url_to_registrable_domain yaml['channel']['url'] } let(:dirname) { File.dirname(file_path).split(File::Separator).last } it 'is parseable' do expect { yaml }.not_to raise_error end - it "resides in a folder named after channel.url's host" do - expect(dirname).to eq(host_name) + it "resides in a folder named after channel.url's host or domain" do + expect([domain_name, host_name]).to include(dirname) end end