@@ -281,6 +281,15 @@ y">link</a>
281281 }
282282 })
283283
284+ mux .HandleFunc ("/sitemap.xml.gz" , func (w http.ResponseWriter , r * http.Request ) {
285+ // Return a 404 HTML page for a non-existent .xml.gz URL.
286+ // This simulates the scenario in issue #745 where a server
287+ // returns an HTML error page for a missing gzipped sitemap.
288+ w .Header ().Set ("Content-Type" , "text/html" )
289+ w .WriteHeader (404 )
290+ w .Write ([]byte (`<!DOCTYPE html><html><body><h1>404 Not Found</h1></body></html>` ))
291+ })
292+
284293 return httptest .NewUnstartedServer (mux )
285294}
286295
@@ -1926,3 +1935,36 @@ func TestCheckRequestHeadersFunc(t *testing.T) {
19261935 t .Error ("TestCheckRequestHeadersFunc failed" )
19271936 }
19281937}
1938+
1939+ func TestIssue745GzipURLWith404Response (t * testing.T ) {
1940+ ts := newTestServer ()
1941+ defer ts .Close ()
1942+
1943+ c := NewCollector ()
1944+
1945+ var responseStatusCode int
1946+ c .OnError (func (resp * Response , err error ) {
1947+ responseStatusCode = resp .StatusCode
1948+ // The error should NOT be "gzip: invalid header".
1949+ // A 404 response for a .xml.gz URL should be treated as a
1950+ // normal HTTP error, not a decompression failure.
1951+ if strings .Contains (err .Error (), "gzip" ) {
1952+ t .Errorf ("Expected HTTP error, got gzip decompression error: %v" , err )
1953+ }
1954+ })
1955+
1956+ c .OnResponse (func (resp * Response ) {
1957+ // A 404 should not reach OnResponse as a successful response
1958+ if resp .StatusCode == 404 {
1959+ responseStatusCode = resp .StatusCode
1960+ }
1961+ })
1962+
1963+ c .Visit (ts .URL + "/sitemap.xml.gz" )
1964+
1965+ // The response should have been received (either via OnError or OnResponse)
1966+ // with status 404, not a gzip decompression error
1967+ if responseStatusCode != 404 {
1968+ t .Errorf ("Expected status code 404, got %d" , responseStatusCode )
1969+ }
1970+ }
0 commit comments