Skip to content

Commit f6db612

Browse files
committed
add timeouts, fix some cs
1 parent 2ad0677 commit f6db612

1 file changed

Lines changed: 27 additions & 10 deletions

File tree

src/nl/melp/linkchecker/LinkChecker.java

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import nl.melp.redis.collections.*;
55
import org.apache.http.Header;
66
import org.apache.http.HttpEntity;
7+
import org.apache.http.client.config.RequestConfig;
78
import org.apache.http.client.methods.CloseableHttpResponse;
89
import org.apache.http.client.methods.HttpGet;
910
import org.apache.http.impl.client.CloseableHttpClient;
@@ -43,7 +44,7 @@ public class LinkChecker {
4344
private final Map<URI, Set<URI>> reverseLinks;
4445
private final Map<String, Set<URI>> invalidUrls;
4546
private final ExecutorService executor;
46-
private final Set<Future> running;
47+
private final Set<Future<?>> running;
4748
private final BiPredicate<URI, URI> shouldFollowLinks;
4849
private final BiPredicate<URI, HttpEntity> shouldExtractLinks;
4950
private final int msDelay;
@@ -70,8 +71,14 @@ public LinkChecker(
7071
this.clients = new LinkedBlockingDeque<>(numThreads);
7172
this.msDelay = msDelay;
7273

74+
RequestConfig config = RequestConfig.custom()
75+
.setConnectTimeout(timeout * 1000)
76+
.setConnectionRequestTimeout(timeout * 1000)
77+
.setSocketTimeout(timeout * 1000)
78+
.build();
79+
7380
for (int i = 0; i < numThreads; i++) {
74-
clients.offer(HttpClients.createMinimal());
81+
clients.offer(HttpClients.custom().setDefaultRequestConfig(config).build());
7582
}
7683
}
7784

@@ -81,7 +88,7 @@ private void logMonitor() {
8188
int size = statuses.size();
8289

8390
synchronized (running) {
84-
Set<Future> remove = new HashSet<>();
91+
Set<Future<?>> remove = new HashSet<>();
8592
for (var r : running) {
8693
if (r.isDone()) {
8794
remove.add(r);
@@ -120,14 +127,19 @@ public Map<URI, Integer> run() throws InterruptedException {
120127
Set<ExecutorService> executorServices = new HashSet<>();
121128
executorServices.add(executor);
122129

123-
ScheduledExecutorService loggerService = Executors.newScheduledThreadPool(1);
130+
ScheduledExecutorService loggerService = Executors.newScheduledThreadPool(1, runnable -> {
131+
Thread t = new Thread(runnable);
132+
t.setDaemon(true);
133+
return t;
134+
});
135+
124136
executorServices.add(loggerService);
125137

126138
startTimeMs = System.currentTimeMillis();
127139

128140
this.logMonitor();
129141
loggerService.scheduleAtFixedRate(this::logMonitor, 1, 1, TimeUnit.SECONDS);
130-
HashMap<Future, Long> startedAt = new HashMap<>();
142+
HashMap<Future<?>, Long> startedAt = new HashMap<>();
131143
do {
132144
for (URI url : urls) {
133145
urls.remove(url);
@@ -156,7 +168,7 @@ public Map<URI, Integer> run() throws InterruptedException {
156168
statuses.put(url, status);
157169

158170
if (status >= 400) {
159-
logger.info("Got status " + status + " at " + url + "; so far referred to by " + reverseLinks.get(url));
171+
logger.info("Got status " + status + " at " + url + "; so far referred to by " + Arrays.toString(reverseLinks.get(url).toArray()));
160172
} else {
161173
logger.trace("Got status " + status + " at " + url);
162174
}
@@ -377,7 +389,7 @@ public void checkServerTrusted(
377389
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
378390
}
379391

380-
try (Socket socket = new Socket(redisHost, Integer.valueOf(redisPort))) {
392+
try (Socket socket = new Socket(redisHost, Integer.parseInt(redisPort))) {
381393
Redis redis = new Redis(socket);
382394
Set<URI> urls = new SerializedSet<>(redis, LinkChecker.class.getCanonicalName() + ".urls");
383395
SerializedHashMap<URI, Integer> results = new SerializedHashMap<>(redis, LinkChecker.class.getCanonicalName() + ".statuses");
@@ -396,21 +408,26 @@ public void checkServerTrusted(
396408
results.forEach((k, v) -> {
397409
if (isErrorStatus(v)) {
398410
if (flags.contains("recheck-only-errors") && v > 0) {
411+
logger.trace("recheck - skip status {} for {}", v, k);
399412
return;
400413
}
401414
if (!flags.contains("recheck") && v >= 0) {
415+
logger.trace("recheck - skip status {} for {}", v, k);
402416
return;
403417
}
418+
logger.debug("recheck - add status {} for {}", v, k);
404419
urls.add(k);
405420
reset.add(k);
421+
} else {
422+
logger.trace("recheck - no error {} for {}", v, k);
406423
}
407424
});
408425
if (reset.size() > 0) {
409426
logger.info("{} found, restoring the to the queue", reset.size());
410427
urls.addAll(reset);
411428
reset.forEach(results::remove);
412429
} else {
413-
logger.info("None found.", reset.size());
430+
logger.info("None found.");
414431
}
415432
}
416433

@@ -451,8 +468,8 @@ public void checkServerTrusted(
451468
return false;
452469
},
453470
(context, response) -> !flags.contains("no-follow") && localHosts.contains(context.getHost()),
454-
opts.containsKey("threads") ? Integer.valueOf(opts.get("threads").stream().findFirst().orElse("40")) : 40,
455-
opts.containsKey("delay-ms") ? Integer.valueOf(opts.get("delay-ms").stream().findFirst().orElse("20")) : 20
471+
opts.containsKey("threads") ? Integer.parseInt(opts.get("threads").stream().findFirst().orElse("40")) : 40,
472+
opts.containsKey("delay-ms") ? Integer.parseInt(opts.get("delay-ms").stream().findFirst().orElse("20")) : 20
456473
);
457474

458475
if (flags.contains("resume") || flags.contains("reset")) {

0 commit comments

Comments
 (0)