Skip to content

Commit 1bb8b03

Browse files
silentninjawumpus
authored andcommitted
Expand the home directory path before searching for the warc.parquet files
1 parent da9c01e commit 1bb8b03

1 file changed

Lines changed: 1 addition & 1 deletion

File tree

duck.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def get_files(algo, crawl):
2424
files = f's3://commoncrawl/cc-index/table/cc-main/warc/crawl={crawl}/subset=warc/*.parquet'
2525
raise NotImplementedError('will cause a 403')
2626
elif algo == 'local_files':
27-
files = f'~/commmoncrawl/cc-index/table/cc-main/warc/crawl={crawl}/subset=warc/*.parquet'
27+
files = os.path.expanduser(f'~/commmoncrawl/cc-index/table/cc-main/warc/crawl={crawl}/subset=warc/*.parquet')
2828
files = glob.glob(files)
2929
# did we already download? we expect 300 files of about a gigabyte
3030
if len(files) < 250:

0 commit comments

Comments
 (0)