Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ group :development do
gem 'html2rss-generator', github: 'html2rss/generator', branch: :main

gem 'nokogiri'
gem 'public_suffix'
gem 'rspec', '~> 3.0'
gem 'rubocop'
gem 'rubocop-performance'
Expand Down
5 changes: 5 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,12 @@ GEM
logger
mime-types-data (~> 3.2025, >= 3.2025.0507)
mime-types-data (3.2025.0924)
mini_portile2 (2.8.9)
net-http (0.9.1)
uri (>= 0.11.1)
nokogiri (1.18.8)
mini_portile2 (~> 2.8.2)
racc (~> 1.4)
nokogiri (1.18.8-arm64-darwin)
racc (~> 1.4)
nokogiri (1.18.8-x86_64-darwin)
Expand Down Expand Up @@ -225,6 +229,7 @@ DEPENDENCIES
html2rss-configs!
html2rss-generator!
nokogiri
public_suffix
rspec (~> 3.0)
rubocop
rubocop-performance
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ make test-domain DOMAIN=github.com

**Adding new configs**: Just create the YAML file and run tests. No spec file needed.

**Config folder convention**: Place configs under the registrable domain folder (e.g., `example.com/` or `bbc.co.uk/`). Legacy subdomain folders (e.g., `news.example.com/`) are allowed but not preferred.

## Documentation

- [Main Documentation](https://html2rss.github.io/html2rss-configs/)
Expand Down
56 changes: 56 additions & 0 deletions spec/helper_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# frozen_string_literal: true

RSpec.describe Helper do
describe '.url_to_registrable_domain' do
it 'collapses subdomains to the registrable domain' do
expect(described_class.url_to_registrable_domain('https://blog.example.com/posts')).to eq('example.com')
end

it 'keeps multi-part TLDs intact for registrable domain' do
expect(described_class.url_to_registrable_domain('https://news.bbc.co.uk/world')).to eq('bbc.co.uk')
end

it 'preserves single-host domains' do
expect(described_class.url_to_registrable_domain('https://example.com')).to eq('example.com')
end

it 'returns nil for blank or invalid URLs', :aggregate_failures do
expect(described_class.url_to_registrable_domain(nil)).to be_nil
expect(described_class.url_to_registrable_domain('')).to be_nil
expect(described_class.url_to_registrable_domain('not a url')).to be_nil
end
end

describe '.url_to_host_name' do
it 'returns the full host' do
expect(described_class.url_to_host_name('https://news.bbc.co.uk/world')).to eq('news.bbc.co.uk')
end

it 'returns nil for blank or invalid URLs', :aggregate_failures do
expect(described_class.url_to_host_name(nil)).to be_nil
expect(described_class.url_to_host_name('')).to be_nil
expect(described_class.url_to_host_name('not a url')).to be_nil
end
end

describe 'legacy naming guardrail' do
it 'does not expose url_to_directory_name' do
expect(described_class).not_to respond_to(:url_to_directory_name)
end
end

describe '.registrable_domain' do
it 'falls back to host when PublicSuffix returns nil' do
allow(PublicSuffix).to receive(:domain).with('example.local').and_return(nil)

expect(described_class.send(:registrable_domain, 'example.local')).to eq('example.local')
end

it 'falls back to host when PublicSuffix raises DomainInvalid' do
allow(PublicSuffix).to receive(:domain).with('invalid..host')
.and_raise(PublicSuffix::DomainInvalid)

expect(described_class.send(:registrable_domain, 'invalid..host')).to eq('invalid..host')
end
end
end
27 changes: 24 additions & 3 deletions spec/support/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,37 @@
require 'json'
require 'nokogiri'
require 'yaml'
require 'uri'
require 'public_suffix'

##
# A collection of helper methods.
module Helper
##
# @param url [String]
# @return [String, nil]
def self.url_to_registrable_domain(url)
host = url_to_host_name(url)
return host unless host

registrable_domain(host)
end
Comment thread
gildesmarais marked this conversation as resolved.

##
# @param url [String]
# @return [String, nil]
def self.url_to_host_name(url)
Html2rss::Url.for_channel(url)&.host
rescue ArgumentError
nil
end

##
# @param host [String]
# @return [String]
def self.url_to_directory_name(url)
URI(url.split('/')[0..2].join('/')).host.gsub(/^(api|www|webapp)\./, '')
def self.registrable_domain(host)
PublicSuffix.domain(host) || host
rescue PublicSuffix::DomainInvalid
host
end
Comment thread
gildesmarais marked this conversation as resolved.

##
Expand Down
9 changes: 5 additions & 4 deletions spec/support/shared_examples/config.yml_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,17 @@
config
end

context 'with the file' do
let(:host_name) { Helper.url_to_directory_name yaml['channel']['url'] }
context 'with the file' do # rubocop:disable RSpec/MultipleMemoizedHelpers
let(:host_name) { Helper.url_to_host_name yaml['channel']['url'] }
let(:domain_name) { Helper.url_to_registrable_domain yaml['channel']['url'] }
let(:dirname) { File.dirname(file_path).split(File::Separator).last }

it 'is parseable' do
expect { yaml }.not_to raise_error
end

it "resides in a folder named after channel.url's host" do
expect(dirname).to eq(host_name)
it "resides in a folder named after channel.url's host or domain" do
expect([domain_name, host_name]).to include(dirname)
end
end

Expand Down