Skip to content

Commit 1723215

Browse files
committed
Merge branch 'main' into luca/feature/part2
# Conflicts: # Makefile # README.md
2 parents e059b07 + f0809a2 commit 1723215

3 files changed

Lines changed: 311 additions & 104 deletions

File tree

Makefile

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,6 @@
11
build:
22
mvn clean package
33

4-
iterate: build
5-
@echo iterating over all of the local warcs:
6-
@echo
7-
@echo warc:
8-
mvn -q exec:java -Dexec.mainClass=org.commoncrawl.whirlwind.ReadWARC -Dexec.args="data/whirlwind.warc.gz"
9-
@echo
10-
@echo wet:
11-
mvn -q exec:java -Dexec.mainClass=org.commoncrawl.whirlwind.ReadWARC -Dexec.args="data/whirlwind.warc.wet.gz"
12-
@echo
13-
@echo wat:
14-
mvn -q exec:java -Dexec.mainClass=org.commoncrawl.whirlwind.ReadWARC -Dexec.args="data/whirlwind.warc.wat.gz"
15-
@echo
16-
174
# cdxj:
185
# @echo "creating *.cdxj index files from the local warcs"
196
# cdxj-indexer whirlwind.warc.gz > whirlwind.warc.cdxj
@@ -67,7 +54,7 @@ iterate: build
6754
#
6855
get_jwarc:
6956
@echo "downloading JWarc JAR"
70-
curl -fL -o jwarc-0.33.0.jar https://github.com/iipc/jwarc/releases/download/v0.33.0/jwarc-0.33.0.jar
57+
curl -fL -o jwarc.jar https://github.com/iipc/jwarc/releases/download/v0.33.0/jwarc-0.33.0.jar
7158

7259
wreck_the_warc: build get_jwarc
7360
@echo

0 commit comments

Comments
 (0)