Skip to content

Commit 4261440

Browse files
ENH: Improve connection towards Azure (#121)
* reuses Session object towards Azure Blob Storage
1 parent cf2ee4a commit 4261440

1 file changed

Lines changed: 30 additions & 28 deletions

File tree

datareservoirio/storage/storage.py

Lines changed: 30 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@
1515

1616
log = logging.getLogger(__name__)
1717

18+
_BLOBSTORAGE_SESSION = requests.Session()
19+
_BLOBSTORAGE_SESSION.mount(
20+
"https://",
21+
requests.adapters.HTTPAdapter(
22+
max_retries=requests.adapters.Retry(total=5, backoff_factor=0.4, backoff_max=10)
23+
),
24+
)
25+
1826

1927
def _encode_for_path_safety(value):
2028
return str(base64.urlsafe_b64encode(str(value).encode()).decode())
@@ -276,7 +284,7 @@ def _evict_from_cache(self):
276284
)
277285

278286

279-
def _blob_to_df(blob_url):
287+
def _blob_to_df(blob_url, session=_BLOBSTORAGE_SESSION):
280288
"""
281289
Download blob from remote storage and present as a Pandas Series.
282290
@@ -285,6 +293,8 @@ def _blob_to_df(blob_url):
285293
blob_url : str
286294
Fully formated URL to the blob. Must contain all the required parameters
287295
in the URL.
296+
session : requests.Session, default _BLOBSTORAGE_SESSION
297+
Session object to make HTTP calls.
288298
289299
Return
290300
------
@@ -293,19 +303,15 @@ def _blob_to_df(blob_url):
293303
(``Int64``) and column ``values`` are ``str`` or ``float64``.
294304
"""
295305

296-
with requests.Session() as session:
297-
retries = requests.adapters.Retry(total=5, backoff_factor=0.4, backoff_max=10)
298-
session.mount("https://", requests.adapters.HTTPAdapter(max_retries=retries))
299-
response = session.request(method="get", url=blob_url, timeout=30, stream=True)
300-
response.raise_for_status()
301-
302-
response.encoding = "utf-8" # enforce encoding
306+
response = session.request(method="get", url=blob_url, timeout=30, stream=True)
307+
response.raise_for_status()
308+
response.encoding = "utf-8" # enforce encoding
303309

304-
content = [
305-
line.split(",", maxsplit=1)
306-
for line in response.iter_lines(decode_unicode=True)
307-
if line
308-
]
310+
content = [
311+
line.split(",", maxsplit=1)
312+
for line in response.iter_lines(decode_unicode=True)
313+
if line
314+
]
309315

310316
df = (
311317
pd.DataFrame(content, columns=("index", "values"), copy=False)
@@ -316,7 +322,7 @@ def _blob_to_df(blob_url):
316322
return df
317323

318324

319-
def _df_to_blob(df, blob_url):
325+
def _df_to_blob(df, blob_url, session=_BLOBSTORAGE_SESSION):
320326
"""
321327
Upload a Pandas Dataframe as blob to a remote storage.
322328
@@ -328,6 +334,9 @@ def _df_to_blob(df, blob_url):
328334
df : pandas.DataFrame
329335
Pandas DataFrame where column ``index`` is nano-seconds since epoch
330336
(``Int64``) and column ``values`` are ``str`` or ``float64``.
337+
session : requests.Session, default _BLOBSTORAGE_SESSION
338+
Session object to make HTTP calls.
339+
331340
"""
332341
if not isinstance(df, pd.DataFrame):
333342
raise ValueError
@@ -340,18 +349,11 @@ def _df_to_blob(df, blob_url):
340349
df.to_csv(fp, line_terminator="\n", **kwargs)
341350
fp.seek(0)
342351

343-
with requests.Session() as session:
344-
retries = requests.adapters.Retry(
345-
total=5, backoff_factor=0.4, backoff_max=10
346-
)
347-
session.mount(
348-
"https://", requests.adapters.HTTPAdapter(max_retries=retries)
349-
)
350-
session.request(
351-
method="put",
352-
url=blob_url,
353-
headers={"x-ms-blob-type": "BlockBlob"},
354-
data=fp,
355-
timeout=(30, None),
356-
).raise_for_status()
352+
session.request(
353+
method="put",
354+
url=blob_url,
355+
headers={"x-ms-blob-type": "BlockBlob"},
356+
data=fp,
357+
timeout=(30, None),
358+
).raise_for_status()
357359
return

0 commit comments

Comments
 (0)