Skip to content

Commit 19a03cf

Browse files
authored
Merge branch 'master' into feat/add-microsoft.com
2 parents e6f4843 + 7782d60 commit 19a03cf

6 files changed

Lines changed: 106 additions & 20 deletions

File tree

Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ group :development do
1111
gem 'html2rss-generator', github: 'html2rss/generator', branch: :main
1212

1313
gem 'nokogiri'
14+
gem 'public_suffix'
1415
gem 'rspec', '~> 3.0'
1516
gem 'rubocop'
1617
gem 'rubocop-performance'

Gemfile.lock

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ GIT
1212

1313
GIT
1414
remote: https://github.com/html2rss/html2rss
15-
revision: 861cc9cae785510cc7120185b458390c18fd615a
15+
revision: 400e796540e82a69e1f1e014b6f89c626acf32fd
1616
branch: master
1717
specs:
1818
html2rss (0.17.0)
@@ -44,13 +44,13 @@ PATH
4444
GEM
4545
remote: https://rubygems.org/
4646
specs:
47-
addressable (2.8.7)
48-
public_suffix (>= 2.0.2, < 7.0)
47+
addressable (2.8.8)
48+
public_suffix (>= 2.0.2, < 8.0)
4949
ast (2.4.3)
5050
base64 (0.3.0)
5151
bigdecimal (3.3.1)
5252
brotli (0.7.0)
53-
concurrent-ruby (1.3.5)
53+
concurrent-ruby (1.3.6)
5454
crass (1.0.6)
5555
diff-lcs (1.6.2)
5656
dry-configurable (1.3.0)
@@ -92,16 +92,16 @@ GEM
9292
faraday-net_http (>= 2.0, < 3.5)
9393
json
9494
logger
95-
faraday-follow_redirects (0.4.0)
95+
faraday-follow_redirects (0.5.0)
9696
faraday (>= 1, < 3)
9797
faraday-gzip (3.0.4)
9898
faraday (>= 2.0, < 3)
9999
zlib (~> 3.0)
100-
faraday-net_http (3.4.1)
101-
net-http (>= 0.5.0)
100+
faraday-net_http (3.4.2)
101+
net-http (~> 0.5)
102102
hashie (5.0.0)
103103
htmlbeautifier (1.4.3)
104-
json (2.15.2)
104+
json (2.18.0)
105105
kramdown (2.5.1)
106106
rexml (>= 3.3.9)
107107
language_server-protocol (3.17.0.5)
@@ -111,8 +111,12 @@ GEM
111111
logger
112112
mime-types-data (~> 3.2025, >= 3.2025.0507)
113113
mime-types-data (3.2025.0924)
114-
net-http (0.7.0)
115-
uri
114+
mini_portile2 (2.8.9)
115+
net-http (0.9.1)
116+
uri (>= 0.11.1)
117+
nokogiri (1.18.8)
118+
mini_portile2 (~> 2.8.2)
119+
racc (~> 1.4)
116120
nokogiri (1.18.8-arm64-darwin)
117121
racc (~> 1.4)
118122
nokogiri (1.18.8-x86_64-darwin)
@@ -134,7 +138,7 @@ GEM
134138
racc (1.8.1)
135139
rainbow (3.1.1)
136140
regexp_parser (2.11.3)
137-
reverse_markdown (3.0.0)
141+
reverse_markdown (3.0.1)
138142
nokogiri
139143
rexml (3.4.4)
140144
rouge (4.5.2)
@@ -151,7 +155,7 @@ GEM
151155
diff-lcs (>= 1.2.0, < 2.0)
152156
rspec-support (~> 3.13.0)
153157
rspec-support (3.13.4)
154-
rss (0.3.1)
158+
rss (0.3.2)
155159
rexml
156160
rubocop (1.75.8)
157161
json (~> 2.3)
@@ -205,7 +209,7 @@ GEM
205209
concurrent-ruby (~> 1.0)
206210
unicode-display_width (2.6.0)
207211
unicode_utils (1.4.0)
208-
uri (1.1.0)
212+
uri (1.1.1)
209213
websocket-driver (0.8.0)
210214
base64
211215
websocket-extensions (>= 0.1.0)
@@ -225,6 +229,7 @@ DEPENDENCIES
225229
html2rss-configs!
226230
html2rss-generator!
227231
nokogiri
232+
public_suffix
228233
rspec (~> 3.0)
229234
rubocop
230235
rubocop-performance

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ make test-domain DOMAIN=github.com
5757

5858
**Adding new configs**: Just create the YAML file and run tests. No spec file needed.
5959

60+
**Config folder convention**: Place configs under the registrable domain folder (e.g., `example.com/` or `bbc.co.uk/`). Legacy subdomain folders (e.g., `news.example.com/`) are allowed but not preferred.
61+
6062
## Documentation
6163

6264
- [Main Documentation](https://html2rss.github.io/html2rss-configs/)

spec/helper_spec.rb

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# frozen_string_literal: true
2+
3+
RSpec.describe Helper do
4+
describe '.url_to_registrable_domain' do
5+
it 'collapses subdomains to the registrable domain' do
6+
expect(described_class.url_to_registrable_domain('https://blog.example.com/posts')).to eq('example.com')
7+
end
8+
9+
it 'keeps multi-part TLDs intact for registrable domain' do
10+
expect(described_class.url_to_registrable_domain('https://news.bbc.co.uk/world')).to eq('bbc.co.uk')
11+
end
12+
13+
it 'preserves single-host domains' do
14+
expect(described_class.url_to_registrable_domain('https://example.com')).to eq('example.com')
15+
end
16+
17+
it 'returns nil for blank or invalid URLs', :aggregate_failures do
18+
expect(described_class.url_to_registrable_domain(nil)).to be_nil
19+
expect(described_class.url_to_registrable_domain('')).to be_nil
20+
expect(described_class.url_to_registrable_domain('not a url')).to be_nil
21+
end
22+
end
23+
24+
describe '.url_to_host_name' do
25+
it 'returns the full host' do
26+
expect(described_class.url_to_host_name('https://news.bbc.co.uk/world')).to eq('news.bbc.co.uk')
27+
end
28+
29+
it 'returns nil for blank or invalid URLs', :aggregate_failures do
30+
expect(described_class.url_to_host_name(nil)).to be_nil
31+
expect(described_class.url_to_host_name('')).to be_nil
32+
expect(described_class.url_to_host_name('not a url')).to be_nil
33+
end
34+
end
35+
36+
describe 'legacy naming guardrail' do
37+
it 'does not expose url_to_directory_name' do
38+
expect(described_class).not_to respond_to(:url_to_directory_name)
39+
end
40+
end
41+
42+
describe '.registrable_domain' do
43+
it 'falls back to host when PublicSuffix returns nil' do
44+
allow(PublicSuffix).to receive(:domain).with('example.local').and_return(nil)
45+
46+
expect(described_class.send(:registrable_domain, 'example.local')).to eq('example.local')
47+
end
48+
49+
it 'falls back to host when PublicSuffix raises DomainInvalid' do
50+
allow(PublicSuffix).to receive(:domain).with('invalid..host')
51+
.and_raise(PublicSuffix::DomainInvalid)
52+
53+
expect(described_class.send(:registrable_domain, 'invalid..host')).to eq('invalid..host')
54+
end
55+
end
56+
end

spec/support/helper.rb

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,37 @@
33
require 'json'
44
require 'nokogiri'
55
require 'yaml'
6-
require 'uri'
6+
require 'public_suffix'
77

88
##
99
# A collection of helper methods.
1010
module Helper
1111
##
1212
# @param url [String]
13+
# @return [String, nil]
14+
def self.url_to_registrable_domain(url)
15+
host = url_to_host_name(url)
16+
return host unless host
17+
18+
registrable_domain(host)
19+
end
20+
21+
##
22+
# @param url [String]
23+
# @return [String, nil]
24+
def self.url_to_host_name(url)
25+
Html2rss::Url.for_channel(url)&.host
26+
rescue ArgumentError
27+
nil
28+
end
29+
30+
##
31+
# @param host [String]
1332
# @return [String]
14-
def self.url_to_directory_name(url)
15-
URI(url.split('/')[0..2].join('/')).host.gsub(/^(api|www|webapp)\./, '')
33+
def self.registrable_domain(host)
34+
PublicSuffix.domain(host) || host
35+
rescue PublicSuffix::DomainInvalid
36+
host
1637
end
1738

1839
##

spec/support/shared_examples/config.yml_spec.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,17 @@
4141
config
4242
end
4343

44-
context 'with the file' do
45-
let(:host_name) { Helper.url_to_directory_name yaml['channel']['url'] }
44+
context 'with the file' do # rubocop:disable RSpec/MultipleMemoizedHelpers
45+
let(:host_name) { Helper.url_to_host_name yaml['channel']['url'] }
46+
let(:domain_name) { Helper.url_to_registrable_domain yaml['channel']['url'] }
4647
let(:dirname) { File.dirname(file_path).split(File::Separator).last }
4748

4849
it 'is parseable' do
4950
expect { yaml }.not_to raise_error
5051
end
5152

52-
it "resides in a folder named after channel.url's host" do
53-
expect(dirname).to eq(host_name)
53+
it "resides in a folder named after channel.url's host or domain" do
54+
expect([domain_name, host_name]).to include(dirname)
5455
end
5556
end
5657

0 commit comments

Comments
 (0)