We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent da9c01e commit 1bb8b03Copy full SHA for 1bb8b03
1 file changed
duck.py
@@ -24,7 +24,7 @@ def get_files(algo, crawl):
24
files = f's3://commoncrawl/cc-index/table/cc-main/warc/crawl={crawl}/subset=warc/*.parquet'
25
raise NotImplementedError('will cause a 403')
26
elif algo == 'local_files':
27
- files = f'~/commmoncrawl/cc-index/table/cc-main/warc/crawl={crawl}/subset=warc/*.parquet'
+ files = os.path.expanduser(f'~/commmoncrawl/cc-index/table/cc-main/warc/crawl={crawl}/subset=warc/*.parquet')
28
files = glob.glob(files)
29
# did we already download? we expect 300 files of about a gigabyte
30
if len(files) < 250:
0 commit comments