1515
1616log = logging .getLogger (__name__ )
1717
18+ _BLOBSTORAGE_SESSION = requests .Session ()
19+ _BLOBSTORAGE_SESSION .mount (
20+ "https://" ,
21+ requests .adapters .HTTPAdapter (
22+ max_retries = requests .adapters .Retry (total = 5 , backoff_factor = 0.4 , backoff_max = 10 )
23+ ),
24+ )
25+
1826
1927def _encode_for_path_safety (value ):
2028 return str (base64 .urlsafe_b64encode (str (value ).encode ()).decode ())
@@ -276,7 +284,7 @@ def _evict_from_cache(self):
276284 )
277285
278286
279- def _blob_to_df (blob_url ):
287+ def _blob_to_df (blob_url , session = _BLOBSTORAGE_SESSION ):
280288 """
281289 Download blob from remote storage and present as a Pandas Series.
282290
@@ -285,6 +293,8 @@ def _blob_to_df(blob_url):
285293 blob_url : str
286294 Fully formated URL to the blob. Must contain all the required parameters
287295 in the URL.
296+ session : requests.Session, default _BLOBSTORAGE_SESSION
297+ Session object to make HTTP calls.
288298
289299 Return
290300 ------
@@ -293,19 +303,15 @@ def _blob_to_df(blob_url):
293303 (``Int64``) and column ``values`` are ``str`` or ``float64``.
294304 """
295305
296- with requests .Session () as session :
297- retries = requests .adapters .Retry (total = 5 , backoff_factor = 0.4 , backoff_max = 10 )
298- session .mount ("https://" , requests .adapters .HTTPAdapter (max_retries = retries ))
299- response = session .request (method = "get" , url = blob_url , timeout = 30 , stream = True )
300- response .raise_for_status ()
301-
302- response .encoding = "utf-8" # enforce encoding
306+ response = session .request (method = "get" , url = blob_url , timeout = 30 , stream = True )
307+ response .raise_for_status ()
308+ response .encoding = "utf-8" # enforce encoding
303309
304- content = [
305- line .split ("," , maxsplit = 1 )
306- for line in response .iter_lines (decode_unicode = True )
307- if line
308- ]
310+ content = [
311+ line .split ("," , maxsplit = 1 )
312+ for line in response .iter_lines (decode_unicode = True )
313+ if line
314+ ]
309315
310316 df = (
311317 pd .DataFrame (content , columns = ("index" , "values" ), copy = False )
@@ -316,7 +322,7 @@ def _blob_to_df(blob_url):
316322 return df
317323
318324
319- def _df_to_blob (df , blob_url ):
325+ def _df_to_blob (df , blob_url , session = _BLOBSTORAGE_SESSION ):
320326 """
321327 Upload a Pandas Dataframe as blob to a remote storage.
322328
@@ -328,6 +334,9 @@ def _df_to_blob(df, blob_url):
328334 df : pandas.DataFrame
329335 Pandas DataFrame where column ``index`` is nano-seconds since epoch
330336 (``Int64``) and column ``values`` are ``str`` or ``float64``.
337+ session : requests.Session, default _BLOBSTORAGE_SESSION
338+ Session object to make HTTP calls.
339+
331340 """
332341 if not isinstance (df , pd .DataFrame ):
333342 raise ValueError
@@ -340,18 +349,11 @@ def _df_to_blob(df, blob_url):
340349 df .to_csv (fp , line_terminator = "\n " , ** kwargs )
341350 fp .seek (0 )
342351
343- with requests .Session () as session :
344- retries = requests .adapters .Retry (
345- total = 5 , backoff_factor = 0.4 , backoff_max = 10
346- )
347- session .mount (
348- "https://" , requests .adapters .HTTPAdapter (max_retries = retries )
349- )
350- session .request (
351- method = "put" ,
352- url = blob_url ,
353- headers = {"x-ms-blob-type" : "BlockBlob" },
354- data = fp ,
355- timeout = (30 , None ),
356- ).raise_for_status ()
352+ session .request (
353+ method = "put" ,
354+ url = blob_url ,
355+ headers = {"x-ms-blob-type" : "BlockBlob" },
356+ data = fp ,
357+ timeout = (30 , None ),
358+ ).raise_for_status ()
357359 return
0 commit comments