Skip to content

Commit 2ea4fc2

Browse files
authored
Merge pull request #407 from realpython/python-download-file-from-url
How to Download Files (Materials)
2 parents cd9be54 + 86d7405 commit 2ea4fc2

8 files changed

Lines changed: 243 additions & 0 deletions

File tree

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from urllib.request import urlretrieve
2+
3+
url = (
4+
"https://api.worldbank.org/v2/en/indicator/"
5+
"NY.GDP.MKTP.CD?downloadformat=csv"
6+
)
7+
filename = "gdp_by_country.zip"
8+
9+
path, headers = urlretrieve(url, filename)
10+
for name, value in headers.items():
11+
print(name, value)
12+
13+
print(f"Downloaded file {path}")
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import requests
2+
3+
url = "https://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD"
4+
query_parameters = {"downloadformat": "csv"}
5+
6+
response = requests.get(url, query_parameters)
7+
8+
print(f"{response.url = }")
9+
print(f"{response.ok = }")
10+
print(f"{response.status_code = }")
11+
12+
filename = "gdp_by_country.zip"
13+
with open(filename, mode="wb") as file:
14+
file.write(response.content)
15+
16+
print(f"Downloaded file {filename}")
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import requests
2+
3+
filename = "WDI_CSV.zip"
4+
url = f"https://databank.worldbank.org/data/download/{filename}"
5+
6+
print("Downloading...")
7+
8+
response = requests.get(url, stream=True)
9+
with open(filename, mode="wb") as file:
10+
for chunk in response.iter_content(chunk_size=10 * 1024):
11+
file.write(chunk)
12+
print(f"Downloaded file {filename}")
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from concurrent.futures import ThreadPoolExecutor
2+
3+
import requests
4+
5+
6+
def download_file(url):
7+
response = requests.get(url)
8+
if "content-disposition" in response.headers:
9+
content_disposition = response.headers["content-disposition"]
10+
filename = content_disposition.split("filename=")[1]
11+
else:
12+
filename = url.split("/")[-1]
13+
with open(filename, mode="wb") as file:
14+
file.write(response.content)
15+
print(f"Downloaded file {filename}")
16+
17+
18+
template_url = (
19+
"https://api.worldbank.org/v2/en/indicator/"
20+
"{resource}?downloadformat=csv"
21+
)
22+
23+
urls = [
24+
# Total population by country
25+
template_url.format(resource="SP.POP.TOTL"),
26+
# GDP by country
27+
template_url.format(resource="NY.GDP.MKTP.CD"),
28+
# Population density by country
29+
template_url.format(resource="EN.POP.DNST"),
30+
]
31+
32+
with ThreadPoolExecutor() as executor:
33+
executor.map(download_file, urls)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import requests
2+
3+
4+
def download_file(url):
5+
response = requests.get(url)
6+
if "content-disposition" in response.headers:
7+
content_disposition = response.headers["content-disposition"]
8+
filename = content_disposition.split("filename=")[1]
9+
else:
10+
filename = url.split("/")[-1]
11+
with open(filename, mode="wb") as file:
12+
file.write(response.content)
13+
print(f"Downloaded file {filename}")
14+
15+
16+
template_url = (
17+
"https://api.worldbank.org/v2/en/indicator/"
18+
"{resource}?downloadformat=csv"
19+
)
20+
21+
urls = [
22+
# Total population by country
23+
template_url.format(resource="SP.POP.TOTL"),
24+
# GDP by country
25+
template_url.format(resource="NY.GDP.MKTP.CD"),
26+
# Population density by country
27+
template_url.format(resource="EN.POP.DNST"),
28+
]
29+
30+
for url in urls:
31+
download_file(url)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import asyncio
2+
3+
import aiohttp
4+
5+
6+
async def download_file(url):
7+
async with aiohttp.ClientSession() as session:
8+
async with session.get(url) as response:
9+
if "content-disposition" in response.headers:
10+
header = response.headers["content-disposition"]
11+
filename = header.split("filename=")[1]
12+
else:
13+
filename = url.split("/")[-1]
14+
with open(filename, mode="wb") as file:
15+
while True:
16+
chunk = await response.content.read()
17+
if not chunk:
18+
break
19+
file.write(chunk)
20+
print(f"Downloaded file {filename}")
21+
22+
23+
template_url = (
24+
"https://api.worldbank.org/v2/en/indicator/"
25+
"{resource}?downloadformat=csv"
26+
)
27+
28+
urls = [
29+
# Total population by country
30+
template_url.format(resource="SP.POP.TOTL"),
31+
# GDP by country
32+
template_url.format(resource="NY.GDP.MKTP.CD"),
33+
# Population density by country
34+
template_url.format(resource="EN.POP.DNST"),
35+
]
36+
37+
38+
async def main():
39+
tasks = [download_file(url) for url in urls]
40+
await asyncio.gather(*tasks)
41+
42+
43+
asyncio.run(main())
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# How to Download Files From URLs With Python
2+
3+
This folder contains sample code for the [How to Download Files From URLs With Python](https://realpython.com/python-download-file-from-url/) tutorial on Real Python.
4+
5+
## Installation
6+
7+
Some of the code requires the following third-party libraries:
8+
9+
- [`aiohttp`](https://pypi.org/project/aiohttp/)
10+
- [`requests`](https://pypi.org/project/requests/)
11+
12+
To install them into a [virtual environment](https://realpython.com/python-virtual-environments-a-primer/), type the following commands:
13+
14+
```shell
15+
$ python3 -m venv venv/
16+
$ source venv/bin/activate
17+
(venv) $ python -m pip install -r requirements.txt
18+
```
19+
20+
## Running
21+
22+
### 01_download_urllib
23+
24+
```shell
25+
$ python 01_download_urllib.py
26+
Date Wed, 28 Jun 2023 19:40:57 GMT
27+
Content-Type application/zip
28+
Content-Length 128310
29+
Connection close
30+
Set-Cookie api_https.cookieCORS=76a6c6567ab12cea5dac4942d8df71cc; Path=/; SameSite=None; Secure
31+
Set-Cookie api_https.cookie=76a6c6567ab12cea5dac4942d8df71cc; Path=/
32+
Cache-Control public, must-revalidate, max-age=1
33+
Expires Wed, 28 Jun 2023 19:40:58 GMT
34+
Last-Modified Wed, 28 Jun 2023 19:40:57 GMT
35+
Content-Disposition attachment; filename=API_NY.GDP.MKTP.CD_DS2_en_csv_v2_5551501.zip
36+
Request-Context appId=cid-v1:da002513-bd8b-4441-9f30-737944134422
37+
Downloaded file gdp_by_country.zip
38+
```
39+
40+
### 02_download_requests
41+
42+
```shell
43+
(venv) $ python 02_download_requests.py
44+
response.url = 'https://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=csv'
45+
response.ok = True
46+
response.status_code = 200
47+
Downloaded file gdp_by_country.zip
48+
```
49+
50+
### 03_download_streaming
51+
52+
```shell
53+
(venv) $ python 03_download_streaming.py
54+
Downloading...
55+
Downloaded file WDI_CSV.zip
56+
```
57+
58+
### 04_download_threading
59+
60+
```shell
61+
(venv) $ python 04_download_threading.py
62+
Downloaded file API_SP.POP.TOTL_DS2_en_csv_v2_5551506.zip
63+
Downloaded file API_EN.POP.DNST_DS2_en_csv_v2_5552158.zip
64+
Downloaded file API_NY.GDP.MKTP.CD_DS2_en_csv_v2_5551501.zip
65+
```
66+
67+
### 05_download_sequential
68+
69+
```shell
70+
(venv) $ python 05_download_sequential.py
71+
Downloaded file API_SP.POP.TOTL_DS2_en_csv_v2_5551506.zip
72+
Downloaded file API_NY.GDP.MKTP.CD_DS2_en_csv_v2_5551501.zip
73+
Downloaded file API_EN.POP.DNST_DS2_en_csv_v2_5552158.zip
74+
```
75+
76+
### 06_download_async
77+
78+
```shell
79+
(venv) $ python 06_download_async.py
80+
Downloaded file API_SP.POP.TOTL_DS2_en_csv_v2_5551506.zip
81+
Downloaded file API_EN.POP.DNST_DS2_en_csv_v2_5552158.zip
82+
Downloaded file API_NY.GDP.MKTP.CD_DS2_en_csv_v2_5551501.zip
83+
```
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
aiohttp==3.8.4
2+
aiosignal==1.3.1
3+
async-timeout==4.0.2
4+
attrs==23.1.0
5+
certifi==2023.5.7
6+
charset-normalizer==3.1.0
7+
frozenlist==1.3.3
8+
idna==3.4
9+
multidict==6.0.4
10+
requests==2.31.0
11+
urllib3==2.0.3
12+
yarl==1.9.2

0 commit comments

Comments
 (0)