Skip to content

Commit 7874cec

Browse files
authored
Add support for analysing fastly log files (eclipse-openvsx#1599)
* add support for analysing fastly log files * rename operation to method * refactor log services and support configurable cron schedule * add logging * fix fastly log parser: search for start of json content * update test data
1 parent 6693174 commit 7874cec

11 files changed

Lines changed: 350 additions & 40 deletions

File tree

server/src/main/java/org/eclipse/openvsx/storage/log/AwsDownloadCountService.java renamed to server/src/main/java/org/eclipse/openvsx/storage/log/AwsDownloadCountHandler.java

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,22 @@
1212
import org.apache.commons.lang3.StringUtils;
1313
import org.eclipse.openvsx.entities.Extension;
1414
import org.eclipse.openvsx.entities.FileResource;
15+
import org.eclipse.openvsx.migration.HandlerJobRequest;
1516
import org.eclipse.openvsx.storage.AwsStorageService;
1617
import org.eclipse.openvsx.util.TempFile;
1718
import org.jobrunr.jobs.annotations.Job;
18-
import org.jobrunr.jobs.annotations.Recurring;
19+
import org.jobrunr.jobs.lambdas.JobRequestHandler;
1920
import org.slf4j.Logger;
2021
import org.slf4j.LoggerFactory;
2122
import org.springframework.beans.factory.annotation.Value;
2223
import org.springframework.stereotype.Component;
2324
import org.springframework.util.StopWatch;
2425
import org.springframework.web.util.UriUtils;
25-
import software.amazon.awssdk.auth.credentials.*;
2626
import software.amazon.awssdk.core.sync.ResponseTransformer;
2727
import software.amazon.awssdk.services.s3.S3Client;
2828
import software.amazon.awssdk.services.s3.model.*;
2929

30+
import javax.annotation.PostConstruct;
3031
import java.io.*;
3132
import java.nio.charset.StandardCharsets;
3233
import java.nio.file.Files;
@@ -41,17 +42,22 @@
4142
/**
4243
* Pulls logs from an Amazon S3 bucket, extracts downloads from the logs and updates download counts in the database.
4344
* <p>
44-
* Currently only log files uploaded by Amazon CloudFront are supported.
45+
* The following log file formats are supported:
46+
* <ul>
47+
* <li>cloudfront</li>
48+
* <li>fastly</li>
49+
* </ul>
4550
* <p>
46-
* Links:
51+
* See
4752
* <ul>
48-
* <li><a href="https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/standard-logging.html">CloudFront standard logging</a></li>
49-
* <li><a href="https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/standard-logs-reference.html">CloudFront log format</a></li>
53+
* <li><a href="https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/standard-logging.html">CloudFront standard logging</a></li>
54+
* <li><a href="https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/standard-logs-reference.html">CloudFront log format</a></li>
55+
* <li><a href="https://www.fastly.com/documentation/guides/integrations/streaming-logs/custom-log-formats/">Fastly custom log format</a></li>
5056
* </ul>
5157
*/
5258
@Component
53-
public class AwsDownloadCountService {
54-
private final Logger logger = LoggerFactory.getLogger(AwsDownloadCountService.class);
59+
public class AwsDownloadCountHandler implements JobRequestHandler<HandlerJobRequest<?>> {
60+
private final Logger logger = LoggerFactory.getLogger(AwsDownloadCountHandler.class);
5561

5662
private static final String LOG_LOCATION_PREFIX = "AWSLogs/";
5763
private static final int MAX_KEYS = 100;
@@ -65,11 +71,36 @@ public class AwsDownloadCountService {
6571
@Value("${ovsx.logs.aws.log-location-prefix:" + LOG_LOCATION_PREFIX + "}")
6672
String logLocationPrefix;
6773

68-
public AwsDownloadCountService(AwsStorageService awsStorageService, DownloadCountProcessor processor) {
74+
@Value("${ovsx.logs.aws.format:cloudfront}")
75+
String logFormat;
76+
77+
@Value("${ovsx.logs.aws.cron:0 10 * * * *}")
78+
String cronSchedule;
79+
80+
LogFileParser logFileParser;
81+
82+
public AwsDownloadCountHandler(AwsStorageService awsStorageService, DownloadCountProcessor processor) {
6983
this.awsStorageService = awsStorageService;
7084
this.processor = processor;
7185
}
7286

87+
@PostConstruct
88+
public void initialize() {
89+
logFileParser = switch (logFormat.toLowerCase()) {
90+
case "cloudfront" -> new CloudFrontLogFileParser();
91+
case "fastly" -> new FastlyLogFileParser();
92+
default -> throw new IllegalArgumentException("unsupported log file format '" + logFormat + "'");
93+
};
94+
}
95+
96+
public String getRecurringJobId() {
97+
return "update-aws-download-counts";
98+
}
99+
100+
public String getCronSchedule() {
101+
return cronSchedule;
102+
}
103+
73104
/**
74105
* Indicates whether the download service is enabled by application config.
75106
*/
@@ -82,11 +113,11 @@ private S3Client getS3Client() {
82113
}
83114

84115
/**
85-
* Task scheduled once per hour to pull logs from AWS S3 Storage and update extension download counts.
116+
* Scheduled task to pull logs from AWS S3 Storage and update extension download counts.
86117
*/
118+
@Override
87119
@Job(name = "Update AWS Download Counts", retries = 0)
88-
@Recurring(id = "update-aws-download-counts", cron = "0 10 * * * *", zoneId = "UTC")
89-
public void updateDownloadCounts() {
120+
public void run(HandlerJobRequest<?> jobRequest) throws Exception {
90121
if (!isEnabled()) {
91122
return;
92123
}
@@ -193,16 +224,14 @@ private Map<String, Integer> processLogFile(String fileName) throws IOException
193224
var lines = reader.lines().iterator();
194225
while (lines.hasNext()) {
195226
var line = lines.next();
196-
if (line.startsWith("#")) {
227+
228+
var record = logFileParser.parse(line);
229+
if (record == null) {
197230
continue;
198231
}
199232

200-
// Format:
201-
// date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields c-port time-to-first-byte x-edge-detailed-result-type sc-content-type sc-content-len sc-range-start sc-range-end
202-
var components = line.split("[ \t]+");
203-
204-
if (isGetOperation(components) && isStatusOk(components) && isExtensionPackageUri(components)) {
205-
var uri = components[7];
233+
if (isGetOperation(record) && isStatusOk(record) && isExtensionPackageUri(record)) {
234+
var uri = record.url();
206235
var uriComponents = uri.split("/");
207236
var vsixFile = UriUtils.decode(uriComponents[uriComponents.length - 1], StandardCharsets.UTF_8).toUpperCase();
208237
fileCounts.merge(vsixFile, 1, Integer::sum);
@@ -212,16 +241,16 @@ private Map<String, Integer> processLogFile(String fileName) throws IOException
212241
}
213242
}
214243

215-
private boolean isGetOperation(String[] components) {
216-
return components[5].equalsIgnoreCase("GET");
244+
private boolean isGetOperation(LogRecord record) {
245+
return record.method().equalsIgnoreCase("GET");
217246
}
218247

219-
private boolean isStatusOk(String[] components) {
220-
return Integer.parseInt(components[8]) == 200;
248+
private boolean isStatusOk(LogRecord record) {
249+
return record.status() == 200;
221250
}
222251

223-
private boolean isExtensionPackageUri(String[] components) {
224-
return components[7].endsWith(".vsix");
252+
private boolean isExtensionPackageUri(LogRecord record) {
253+
return record.url().endsWith(".vsix");
225254
}
226255

227256
private TempFile downloadFile(String objectKey) throws IOException {

server/src/main/java/org/eclipse/openvsx/storage/log/AzureDownloadCountService.java renamed to server/src/main/java/org/eclipse/openvsx/storage/log/AzureDownloadCountHandler.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@
2121
import com.fasterxml.jackson.databind.ObjectMapper;
2222
import org.apache.commons.lang3.StringUtils;
2323
import org.eclipse.openvsx.entities.FileResource;
24+
import org.eclipse.openvsx.migration.HandlerJobRequest;
2425
import org.eclipse.openvsx.util.TempFile;
2526
import org.jobrunr.jobs.annotations.Job;
2627
import org.jobrunr.jobs.annotations.Recurring;
28+
import org.jobrunr.jobs.lambdas.JobRequestHandler;
2729
import org.slf4j.Logger;
2830
import org.slf4j.LoggerFactory;
2931
import org.springframework.beans.factory.annotation.Value;
@@ -51,9 +53,9 @@
5153
* and updates download counts in the database.
5254
*/
5355
@Component
54-
public class AzureDownloadCountService {
56+
public class AzureDownloadCountHandler implements JobRequestHandler<HandlerJobRequest<?>> {
5557

56-
protected final Logger logger = LoggerFactory.getLogger(AzureDownloadCountService.class);
58+
protected final Logger logger = LoggerFactory.getLogger(AzureDownloadCountHandler.class);
5759

5860
private final DownloadCountProcessor processor;
5961
private BlobContainerClient containerClient;
@@ -75,10 +77,21 @@ public class AzureDownloadCountService {
7577
@Value("${ovsx.storage.azure.blob-container:openvsx-resources}")
7678
String storageBlobContainer;
7779

78-
public AzureDownloadCountService(DownloadCountProcessor processor) {
80+
@Value("${ovsx.logs.azure.cron:0 5 * * * *}")
81+
String cronSchedule;
82+
83+
public AzureDownloadCountHandler(DownloadCountProcessor processor) {
7984
this.processor = processor;
8085
}
8186

87+
public String getRecurringJobId() {
88+
return "update-azure-download-counts";
89+
}
90+
91+
public String getCronSchedule() {
92+
return cronSchedule;
93+
}
94+
8295
/**
8396
* Indicates whether the download service is enabled by application config.
8497
*/
@@ -96,8 +109,7 @@ public boolean isEnabled() {
96109
* Task scheduled once per hour to pull logs from Azure Blob Storage and update extension download counts.
97110
*/
98111
@Job(name = "Update Azure Download Counts", retries = 0)
99-
@Recurring(id = "update-azure-download-counts", cron = "0 5 * * * *", zoneId = "UTC")
100-
public void updateDownloadCounts() {
112+
public void run(HandlerJobRequest<?> jobRequest) throws Exception {
101113
if (!isEnabled()) {
102114
return;
103115
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/******************************************************************************
2+
* Copyright (c) 2026 Contributors to the Eclipse Foundation.
3+
*
4+
* See the NOTICE file(s) distributed with this work for additional
5+
* information regarding copyright ownership.
6+
*
7+
* This program and the accompanying materials are made available under the
8+
* terms of the Eclipse Public License 2.0 which is available at
9+
* https://www.eclipse.org/legal/epl-2.0.
10+
*
11+
* SPDX-License-Identifier: EPL-2.0
12+
*****************************************************************************/
13+
package org.eclipse.openvsx.storage.log;
14+
15+
class CloudFrontLogFileParser implements LogFileParser {
16+
@Override
17+
public LogRecord parse(String line) {
18+
if (line.startsWith("#")) {
19+
return null;
20+
}
21+
22+
// Format:
23+
// date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields c-port time-to-first-byte x-edge-detailed-result-type sc-content-type sc-content-len sc-range-start sc-range-end
24+
var components = line.split("[ \t]+");
25+
return new LogRecord(components[5], Integer.parseInt(components[8]), components[7]);
26+
}
27+
}

server/src/main/java/org/eclipse/openvsx/storage/log/DownloadCountService.java

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,37 @@
1010
package org.eclipse.openvsx.storage.log;
1111

1212
import org.eclipse.openvsx.entities.FileResource;
13-
import org.springframework.stereotype.Component;
13+
import org.eclipse.openvsx.migration.HandlerJobRequest;
14+
import org.jobrunr.scheduling.JobRequestScheduler;
15+
import org.slf4j.Logger;
16+
import org.slf4j.LoggerFactory;
17+
import org.springframework.boot.context.event.ApplicationStartedEvent;
18+
import org.springframework.context.event.EventListener;
19+
import org.springframework.stereotype.Service;
20+
21+
import java.time.ZoneId;
1422

1523
/**
1624
* A utility service to determine whether an optimized download count service
1725
* is available for a specific {@link FileResource}.
1826
*/
19-
@Component
27+
@Service
2028
public class DownloadCountService {
2129

22-
private final AwsDownloadCountService awsDownloadCountService;
23-
private final AzureDownloadCountService azureDownloadCountService;
30+
protected final Logger logger = LoggerFactory.getLogger(DownloadCountService.class);
31+
32+
private final JobRequestScheduler scheduler;
33+
private final AwsDownloadCountHandler awsDownloadCountHandler;
34+
private final AzureDownloadCountHandler azureDownloadCountHandler;
2435

2536
public DownloadCountService(
26-
AwsDownloadCountService awsDownloadCountService,
27-
AzureDownloadCountService azureDownloadCountService
37+
JobRequestScheduler scheduler,
38+
AwsDownloadCountHandler awsDownloadCountHandler,
39+
AzureDownloadCountHandler azureDownloadCountHandler
2840
) {
29-
this.awsDownloadCountService = awsDownloadCountService;
30-
this.azureDownloadCountService = azureDownloadCountService;
41+
this.scheduler = scheduler;
42+
this.awsDownloadCountHandler = awsDownloadCountHandler;
43+
this.azureDownloadCountHandler = azureDownloadCountHandler;
3144
}
3245

3346
/**
@@ -38,9 +51,36 @@ public DownloadCountService(
3851
*/
3952
public boolean isEnabled(FileResource resource) {
4053
return switch (resource.getStorageType()) {
41-
case FileResource.STORAGE_AWS -> awsDownloadCountService.isEnabled();
42-
case FileResource.STORAGE_AZURE -> azureDownloadCountService.isEnabled();
54+
case FileResource.STORAGE_AWS -> awsDownloadCountHandler.isEnabled();
55+
case FileResource.STORAGE_AZURE -> azureDownloadCountHandler.isEnabled();
4356
default -> false;
4457
};
4558
}
59+
60+
@EventListener
61+
public void applicationStarted(ApplicationStartedEvent event) {
62+
if (awsDownloadCountHandler.isEnabled()) {
63+
logger.info("Scheduling AWS download count handler with cron '{}'", awsDownloadCountHandler.getCronSchedule());
64+
scheduler.scheduleRecurrently(
65+
awsDownloadCountHandler.getRecurringJobId(),
66+
awsDownloadCountHandler.getCronSchedule(),
67+
ZoneId.of("UTC"),
68+
new HandlerJobRequest<>(AwsDownloadCountHandler.class)
69+
);
70+
} else {
71+
scheduler.deleteRecurringJob(awsDownloadCountHandler.getRecurringJobId());
72+
}
73+
74+
if (azureDownloadCountHandler.isEnabled()) {
75+
logger.info("Scheduling Azure download count handler with cron '{}'", azureDownloadCountHandler.getCronSchedule());
76+
scheduler.scheduleRecurrently(
77+
azureDownloadCountHandler.getRecurringJobId(),
78+
azureDownloadCountHandler.getCronSchedule(),
79+
ZoneId.of("UTC"),
80+
new HandlerJobRequest<>(AwsDownloadCountHandler.class)
81+
);
82+
} else {
83+
scheduler.deleteRecurringJob(azureDownloadCountHandler.getRecurringJobId());
84+
}
85+
}
4686
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
/******************************************************************************
2+
* Copyright (c) 2026 Contributors to the Eclipse Foundation.
3+
*
4+
* See the NOTICE file(s) distributed with this work for additional
5+
* information regarding copyright ownership.
6+
*
7+
* This program and the accompanying materials are made available under the
8+
* terms of the Eclipse Public License 2.0 which is available at
9+
* https://www.eclipse.org/legal/epl-2.0.
10+
*
11+
* SPDX-License-Identifier: EPL-2.0
12+
*****************************************************************************/
13+
package org.eclipse.openvsx.storage.log;
14+
15+
import com.fasterxml.jackson.core.JacksonException;
16+
import com.fasterxml.jackson.core.JsonParser;
17+
import com.fasterxml.jackson.databind.DeserializationContext;
18+
import com.fasterxml.jackson.databind.JsonNode;
19+
import com.fasterxml.jackson.databind.ObjectMapper;
20+
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
21+
import com.fasterxml.jackson.databind.module.SimpleModule;
22+
import org.slf4j.Logger;
23+
import org.slf4j.LoggerFactory;
24+
25+
import javax.annotation.Nullable;
26+
import java.io.IOException;
27+
28+
class FastlyLogFileParser implements LogFileParser {
29+
private final Logger logger = LoggerFactory.getLogger(FastlyLogFileParser.class);
30+
31+
private final ObjectMapper mapper;
32+
33+
public FastlyLogFileParser() {
34+
this.mapper = new ObjectMapper();
35+
36+
SimpleModule module = new SimpleModule();
37+
module.addDeserializer(LogRecord.class, new LogRecordDeserializer());
38+
mapper.registerModule(module);
39+
}
40+
41+
@Override
42+
public @Nullable LogRecord parse(String line) {
43+
try {
44+
var jsonStartIndex = line.indexOf("{");
45+
if (jsonStartIndex != -1) {
46+
return mapper.readValue(line.substring(jsonStartIndex), LogRecord.class);
47+
} else {
48+
return null;
49+
}
50+
} catch (JacksonException ex) {
51+
logger.error("could not parse log line '{}'", line, ex);
52+
return null;
53+
}
54+
}
55+
}
56+
57+
class LogRecordDeserializer extends StdDeserializer<LogRecord> {
58+
59+
public LogRecordDeserializer() {
60+
this(null);
61+
}
62+
63+
public LogRecordDeserializer(Class<?> vc) {
64+
super(vc);
65+
}
66+
67+
@Override
68+
public LogRecord deserialize(JsonParser jp, DeserializationContext ctxt)
69+
throws IOException {
70+
JsonNode node = jp.getCodec().readTree(jp);
71+
String operation = node.get("request_method").asText();
72+
int status = (Integer) node.get("response_status").numberValue();
73+
String url = node.get("url").asText();
74+
return new LogRecord(operation, status, url);
75+
}
76+
}

0 commit comments

Comments
 (0)