1+ """
2+ This module is to load CGM timeseries from device specific files.
3+ It is inspired by https://github.com/cafoala/diametrics/blob/main/src/diametrics/transform.py
4+ """
5+
6+ from pathlib import Path
7+ import pandas as pd
8+
9+
10+ def load_libre (file_path : str ) -> pd .Series :
11+ """
12+ Load Libre timeseries from file.
13+
14+ Parameters
15+ ----------
16+ file_path : str
17+ Path to the Libre device file.
18+
19+ Returns
20+ -------
21+ pd.Series
22+ Series with datetime index and glucose values.(in mg/dL)
23+
24+ Examples
25+ --------
26+ >>> load_libre("tests/data/libre_amer_01.csv")
27+ """
28+ df = _open_file (file_path )
29+
30+ # Set third row as column headers
31+ df .columns = df .iloc [2 ]
32+ # Drop top rows
33+ df = df .iloc [3 :]
34+ df .reset_index (inplace = True , drop = True )
35+ # Keep important columns based on column names
36+ convert = False
37+ if 'Historic Glucose(mmol/L)' in df .columns :
38+ df = df .loc [:, ('Meter Timestamp' , 'Historic Glucose(mmol/L)' , 'Scan Glucose(mmol/L)' )]
39+ format = '%d-%m-%Y %H:%M'
40+ convert = True
41+ elif 'Historic Glucose(mg/dL)' in df .columns :
42+ df = df .loc [:, ('Meter Timestamp' , 'Historic Glucose(mg/dL)' , 'Scan Glucose(mg/dL)' )]
43+ format = '%m-%d-%Y %H:%M'
44+ elif 'Historic Glucose mmol/L' in df .columns :
45+ df = df .loc [:, ('Device Timestamp' , 'Historic Glucose mmol/L' , 'Scan Glucose mmol/L' )]
46+ format = '%d-%m-%Y %I:%M %p'
47+ convert = True
48+ else :
49+ df = df = df .loc [:, ('Device Timestamp' , 'Historic Glucose mg/dL' , 'Scan Glucose mg/dL' )]
50+ format = '%m-%d-%Y %I:%M %p'
51+ # Rename columns
52+ df .columns = ['time' , 'glc' , 'scan_glc' ]
53+
54+ # Convert 'time' column to datetime
55+ df ['time' ] = pd .to_datetime (df ['time' ], format = format )
56+
57+ # convert to mg/dL if needed
58+ if convert :
59+ df ['glc' ] = df ['glc' ] * 18.01559
60+
61+ # Drop NaN values and sort by 'time'
62+ df = df .dropna (subset = ['time' , 'glc' ]).sort_values ('time' ).reset_index (drop = True )
63+
64+ # convert into timeseries
65+ timeseries = df .set_index ('time' )['glc' ]
66+
67+ return timeseries
68+
69+
70+
71+
72+ def load_dexcom (file_path : str ) -> pd .Series :
73+ """
74+ Load Dexcom timeseries from file.
75+
76+ Parameters
77+ ----------
78+ file_path : str
79+ Path to the Dexcom device file.
80+
81+ Returns
82+ -------
83+ pd.Series
84+ Series with datetime index and glucose values (in mg/dL)
85+
86+ Examples
87+ --------
88+ >>> load_dexcom("tests/data/dexcom_eur_01.xlsx")
89+ """
90+ df = _open_file (file_path )
91+
92+ # Set first row as column headers
93+ df .columns = df .iloc [0 ]
94+ # Drop top rows
95+ df = df .iloc [1 :]
96+ df .reset_index (inplace = True , drop = True )
97+
98+ # Find timestamp column
99+ timestamp_cols = [col for col in df .columns if 'Timestamp' in str (col )]
100+ if not timestamp_cols :
101+ raise ValueError ("No timestamp column found in Dexcom data" )
102+ timestamp_col = timestamp_cols [0 ]
103+
104+ # Find glucose column
105+ glucose_cols = [col for col in df .columns if 'Glucose' in str (col )]
106+ if not glucose_cols :
107+ raise ValueError ("No glucose column found in Dexcom data" )
108+ glucose_col = glucose_cols [0 ]
109+
110+ # Check if conversion is needed (mmol/L to mg/dL)
111+ convert = False
112+ if 'mmol/L' in str (glucose_col ):
113+ convert = True
114+
115+ # Select relevant columns
116+ df = df .loc [:, [timestamp_col , glucose_col ]]
117+
118+ # Rename columns
119+ df .columns = ['time' , 'glc' ]
120+
121+ # Convert 'time' column to datetime
122+ df ['time' ] = pd .to_datetime (df ['time' ], errors = 'coerce' )
123+
124+ # Convert glucose values to numeric
125+ df ['glc' ] = pd .to_numeric (df ['glc' ], errors = 'coerce' )
126+
127+ # Convert to mg/dL if needed
128+ if convert :
129+ df ['glc' ] = df ['glc' ] * 18.01559
130+
131+ # Drop NaN values and sort by 'time'
132+ df = df .dropna (subset = ['time' , 'glc' ]).sort_values ('time' ).reset_index (drop = True )
133+
134+ # Convert into timeseries
135+ timeseries = df .set_index ('time' )['glc' ]
136+
137+ return timeseries
138+
139+
140+
141+ def _open_file (filepath : str ) -> pd .DataFrame :
142+ """
143+ Open a file and read its contents into a pandas DataFrame.
144+
145+ Args:
146+ filepath (str): The path to the file.
147+
148+ Returns:
149+ pandas.DataFrame: The DataFrame containing the file data.
150+
151+ Raises:
152+ Exception: If an error occurs while reading the file.
153+ """
154+ # TODO: handle S3 path
155+
156+ if not Path (filepath ).exists ():
157+ raise FileNotFoundError (f"File not found: { filepath } " )
158+
159+
160+ # Get file extension using basename
161+ extension = Path (filepath ).suffix
162+
163+ try :
164+ if extension == '.csv' :
165+ # Assume that the user uploaded a CSV file
166+ df = pd .read_csv (filepath , header = None , names = [i for i in range (0 , 20 )])
167+ elif extension == '.xls' or extension == '.xlsx' :
168+ # Assume that the user uploaded an Excel file
169+ df = pd .read_excel (filepath , header = None , names = [i for i in range (0 , 20 )])
170+ elif extension == '.txt' or extension == '.tsv' :
171+ # Assume that the user uploaded a text file
172+ df = pd .read_table (filepath , header = None , names = [i for i in range (0 , 20 )])
173+ else :
174+ raise ValueError (f"Unsupported file extension: { extension } " )
175+
176+ return df
177+ except Exception as e :
178+ raise ValueError (f"Error reading file: { filepath } " ) from e
0 commit comments