44import nl .melp .redis .collections .*;
55import org .apache .http .Header ;
66import org .apache .http .HttpEntity ;
7+ import org .apache .http .client .config .RequestConfig ;
78import org .apache .http .client .methods .CloseableHttpResponse ;
89import org .apache .http .client .methods .HttpGet ;
910import org .apache .http .impl .client .CloseableHttpClient ;
@@ -43,7 +44,7 @@ public class LinkChecker {
4344 private final Map <URI , Set <URI >> reverseLinks ;
4445 private final Map <String , Set <URI >> invalidUrls ;
4546 private final ExecutorService executor ;
46- private final Set <Future > running ;
47+ private final Set <Future <?> > running ;
4748 private final BiPredicate <URI , URI > shouldFollowLinks ;
4849 private final BiPredicate <URI , HttpEntity > shouldExtractLinks ;
4950 private final int msDelay ;
@@ -70,8 +71,14 @@ public LinkChecker(
7071 this .clients = new LinkedBlockingDeque <>(numThreads );
7172 this .msDelay = msDelay ;
7273
74+ RequestConfig config = RequestConfig .custom ()
75+ .setConnectTimeout (timeout * 1000 )
76+ .setConnectionRequestTimeout (timeout * 1000 )
77+ .setSocketTimeout (timeout * 1000 )
78+ .build ();
79+
7380 for (int i = 0 ; i < numThreads ; i ++) {
74- clients .offer (HttpClients .createMinimal ());
81+ clients .offer (HttpClients .custom (). setDefaultRequestConfig ( config ). build ());
7582 }
7683 }
7784
@@ -81,7 +88,7 @@ private void logMonitor() {
8188 int size = statuses .size ();
8289
8390 synchronized (running ) {
84- Set <Future > remove = new HashSet <>();
91+ Set <Future <?> > remove = new HashSet <>();
8592 for (var r : running ) {
8693 if (r .isDone ()) {
8794 remove .add (r );
@@ -120,14 +127,19 @@ public Map<URI, Integer> run() throws InterruptedException {
120127 Set <ExecutorService > executorServices = new HashSet <>();
121128 executorServices .add (executor );
122129
123- ScheduledExecutorService loggerService = Executors .newScheduledThreadPool (1 );
130+ ScheduledExecutorService loggerService = Executors .newScheduledThreadPool (1 , runnable -> {
131+ Thread t = new Thread (runnable );
132+ t .setDaemon (true );
133+ return t ;
134+ });
135+
124136 executorServices .add (loggerService );
125137
126138 startTimeMs = System .currentTimeMillis ();
127139
128140 this .logMonitor ();
129141 loggerService .scheduleAtFixedRate (this ::logMonitor , 1 , 1 , TimeUnit .SECONDS );
130- HashMap <Future , Long > startedAt = new HashMap <>();
142+ HashMap <Future <?> , Long > startedAt = new HashMap <>();
131143 do {
132144 for (URI url : urls ) {
133145 urls .remove (url );
@@ -156,7 +168,7 @@ public Map<URI, Integer> run() throws InterruptedException {
156168 statuses .put (url , status );
157169
158170 if (status >= 400 ) {
159- logger .info ("Got status " + status + " at " + url + "; so far referred to by " + reverseLinks .get (url ));
171+ logger .info ("Got status " + status + " at " + url + "; so far referred to by " + Arrays . toString ( reverseLinks .get (url ). toArray () ));
160172 } else {
161173 logger .trace ("Got status " + status + " at " + url );
162174 }
@@ -377,7 +389,7 @@ public void checkServerTrusted(
377389 HttpsURLConnection .setDefaultSSLSocketFactory (sc .getSocketFactory ());
378390 }
379391
380- try (Socket socket = new Socket (redisHost , Integer .valueOf (redisPort ))) {
392+ try (Socket socket = new Socket (redisHost , Integer .parseInt (redisPort ))) {
381393 Redis redis = new Redis (socket );
382394 Set <URI > urls = new SerializedSet <>(redis , LinkChecker .class .getCanonicalName () + ".urls" );
383395 SerializedHashMap <URI , Integer > results = new SerializedHashMap <>(redis , LinkChecker .class .getCanonicalName () + ".statuses" );
@@ -396,21 +408,26 @@ public void checkServerTrusted(
396408 results .forEach ((k , v ) -> {
397409 if (isErrorStatus (v )) {
398410 if (flags .contains ("recheck-only-errors" ) && v > 0 ) {
411+ logger .trace ("recheck - skip status {} for {}" , v , k );
399412 return ;
400413 }
401414 if (!flags .contains ("recheck" ) && v >= 0 ) {
415+ logger .trace ("recheck - skip status {} for {}" , v , k );
402416 return ;
403417 }
418+ logger .debug ("recheck - add status {} for {}" , v , k );
404419 urls .add (k );
405420 reset .add (k );
421+ } else {
422+ logger .trace ("recheck - no error {} for {}" , v , k );
406423 }
407424 });
408425 if (reset .size () > 0 ) {
409426 logger .info ("{} found, restoring the to the queue" , reset .size ());
410427 urls .addAll (reset );
411428 reset .forEach (results ::remove );
412429 } else {
413- logger .info ("None found." , reset . size () );
430+ logger .info ("None found." );
414431 }
415432 }
416433
@@ -451,8 +468,8 @@ public void checkServerTrusted(
451468 return false ;
452469 },
453470 (context , response ) -> !flags .contains ("no-follow" ) && localHosts .contains (context .getHost ()),
454- opts .containsKey ("threads" ) ? Integer .valueOf (opts .get ("threads" ).stream ().findFirst ().orElse ("40" )) : 40 ,
455- opts .containsKey ("delay-ms" ) ? Integer .valueOf (opts .get ("delay-ms" ).stream ().findFirst ().orElse ("20" )) : 20
471+ opts .containsKey ("threads" ) ? Integer .parseInt (opts .get ("threads" ).stream ().findFirst ().orElse ("40" )) : 40 ,
472+ opts .containsKey ("delay-ms" ) ? Integer .parseInt (opts .get ("delay-ms" ).stream ().findFirst ().orElse ("20" )) : 20
456473 );
457474
458475 if (flags .contains ("resume" ) || flags .contains ("reset" )) {
0 commit comments