Skip to content

Commit 96f9c5e

Browse files
committed
more details in Discrepancies notebook
1 parent 5bbe795 commit 96f9c5e

2 files changed

Lines changed: 129 additions & 25 deletions

File tree

iglu_r_discrepancies.ipynb

Lines changed: 128 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"\n",
2020
"import pandas as pd\n",
2121
"import rpy2.robjects as ro\n",
22+
"import iglu_py\n",
2223
"from iglu_py import bridge"
2324
]
2425
},
@@ -80,6 +81,13 @@
8081
" return result\n"
8182
]
8283
},
84+
{
85+
"cell_type": "markdown",
86+
"metadata": {},
87+
"source": [
88+
"## Simple test "
89+
]
90+
},
8391
{
8492
"cell_type": "markdown",
8593
"metadata": {},
@@ -182,18 +190,19 @@
182190
},
183191
{
184192
"cell_type": "code",
185-
"execution_count": 5,
193+
"execution_count": 6,
186194
"metadata": {},
187195
"outputs": [
188196
{
189197
"name": "stdout",
190198
"output_type": "stream",
191199
"text": [
192-
"(2, 288)\n",
193-
"[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')]\n",
194-
"5.0\n",
200+
"gd2d.shape=(2, 288) \t/ expected (1,288)\n",
201+
"actual_dates=[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')] \t/ expected [Timestamp('2020-01-01 00:00:00')]\n",
202+
"dt0=5.0\n",
203+
"gd2d[:,0:5]=\n",
195204
"[[155. 160. 165. nan nan]\n",
196-
" [ nan nan nan nan nan]]\n"
205+
" [ nan nan nan nan nan]] \t/ expected [[150. 155. 160. 165. nan]]\n"
197206
]
198207
}
199208
],
@@ -204,11 +213,10 @@
204213
"actual_dates = r_result['actual_dates']\n",
205214
"dt0 = r_result['dt0']\n",
206215
"\n",
207-
"print(gd2d.shape) # expected (1,288)\n",
208-
"print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n",
209-
"print(dt0) # expected 5\n",
210-
"\n",
211-
"print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]\n",
216+
"print(f\"gd2d.shape={gd2d.shape} \\t/ expected (1,288)\") # expected (1,288)\n",
217+
"print(f\"actual_dates={actual_dates} \\t/ expected [Timestamp('2020-01-01 00:00:00')]\") # expected [datetime.date(2020, 1, 1)]\n",
218+
"print(f\"dt0={dt0}\") # expected 5\n",
219+
"print(f\"gd2d[:,0:5]=\\n{gd2d[:,0:5]} \\t/ expected [[150. 155. 160. 165. nan]]\") # expected [[150. 155. 160. 165. nan]]\n",
212220
"\n",
213221
"\n",
214222
"\n"
@@ -218,14 +226,99 @@
218226
"cell_type": "markdown",
219227
"metadata": {},
220228
"source": [
221-
"**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4\n",
229+
"**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4"
230+
]
231+
},
232+
{
233+
"cell_type": "markdown",
234+
"metadata": {},
235+
"source": [
236+
"### Impact \n",
237+
"\n",
238+
"While these discrepancies may appear minor, they can significantly impact certain metric calculations.\n",
239+
"\n",
240+
"For example, when calculating AUC on synthetic data (shown below), we expect a result of 100, \n",
241+
"but the AUC metric returns 102.2222 due to these interpolation differences."
242+
]
243+
},
244+
{
245+
"cell_type": "code",
246+
"execution_count": 7,
247+
"metadata": {},
248+
"outputs": [
249+
{
250+
"data": {
251+
"text/html": [
252+
"<div>\n",
253+
"<style scoped>\n",
254+
" .dataframe tbody tr th:only-of-type {\n",
255+
" vertical-align: middle;\n",
256+
" }\n",
257+
"\n",
258+
" .dataframe tbody tr th {\n",
259+
" vertical-align: top;\n",
260+
" }\n",
261+
"\n",
262+
" .dataframe thead th {\n",
263+
" text-align: right;\n",
264+
" }\n",
265+
"</style>\n",
266+
"<table border=\"1\" class=\"dataframe\">\n",
267+
" <thead>\n",
268+
" <tr style=\"text-align: right;\">\n",
269+
" <th></th>\n",
270+
" <th>id</th>\n",
271+
" <th>hourly_auc</th>\n",
272+
" </tr>\n",
273+
" </thead>\n",
274+
" <tbody>\n",
275+
" <tr>\n",
276+
" <th>1</th>\n",
277+
" <td>subject1</td>\n",
278+
" <td>102.222222</td>\n",
279+
" </tr>\n",
280+
" </tbody>\n",
281+
"</table>\n",
282+
"</div>"
283+
],
284+
"text/plain": [
285+
" id hourly_auc\n",
286+
"1 subject1 102.222222"
287+
]
288+
},
289+
"execution_count": 7,
290+
"metadata": {},
291+
"output_type": "execute_result"
292+
}
293+
],
294+
"source": [
295+
"hours = 1\n",
296+
"dt0 = 5\n",
297+
"samples = int(hours*60/dt0)\n",
298+
"times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n",
299+
"glucose_values = [80,120]* int(samples/2)\n",
300+
"\n",
301+
"syntheticdata = pd.DataFrame({\n",
302+
" 'id': ['subject1'] * samples,\n",
303+
" 'time': times,\n",
304+
" 'gl': glucose_values\n",
305+
"})\n",
222306
"\n",
307+
"synthetic_iglu_auc_results = iglu_py.auc(syntheticdata)\n",
308+
"synthetic_iglu_auc_results"
309+
]
310+
},
311+
{
312+
"cell_type": "markdown",
313+
"metadata": {},
314+
"source": [
315+
"## UTC timezone \n",
223316
"Now, lets try to localize to UTC timezone. "
224317
]
225318
},
226319
{
227320
"cell_type": "code",
228-
"execution_count": 6,
321+
"execution_count": 8,
229322
"metadata": {},
230323
"outputs": [
231324
{
@@ -265,12 +358,13 @@
265358
"cell_type": "markdown",
266359
"metadata": {},
267360
"source": [
361+
"## Midday test\n",
268362
"Lets try with a 4 measurement at 10am. On 5 min grid, 10am measurement has to be 10*(60/5)=120 position. "
269363
]
270364
},
271365
{
272366
"cell_type": "code",
273-
"execution_count": 7,
367+
"execution_count": 9,
274368
"metadata": {},
275369
"outputs": [
276370
{
@@ -356,7 +450,7 @@
356450
},
357451
{
358452
"cell_type": "code",
359-
"execution_count": 8,
453+
"execution_count": 10,
360454
"metadata": {},
361455
"outputs": [
362456
{
@@ -402,12 +496,14 @@
402496
"cell_type": "markdown",
403497
"metadata": {},
404498
"source": [
499+
"## Midnight test with UTC\n",
500+
"\n",
405501
"Lets look now on data that spans two consecutive days"
406502
]
407503
},
408504
{
409505
"cell_type": "code",
410-
"execution_count": 9,
506+
"execution_count": 19,
411507
"metadata": {},
412508
"outputs": [
413509
{
@@ -525,7 +621,7 @@
525621
},
526622
{
527623
"cell_type": "code",
528-
"execution_count": 10,
624+
"execution_count": 22,
529625
"metadata": {},
530626
"outputs": [
531627
{
@@ -537,6 +633,9 @@
537633
"5.0\n",
538634
"[[155. 160. 165. nan nan]\n",
539635
" [155. 160. 165. nan nan]\n",
636+
" [ nan nan nan nan nan]]\n",
637+
"[[ nan nan nan nan 150.]\n",
638+
" [ nan nan nan nan nan]\n",
540639
" [ nan nan nan nan nan]]\n"
541640
]
542641
}
@@ -552,26 +651,28 @@
552651
"print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n",
553652
"print(dt0) # expected 5\n",
554653
"\n",
555-
"print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]"
654+
"print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]\n",
655+
"print(gd2d[:,283:])"
556656
]
557657
},
558658
{
559659
"cell_type": "markdown",
560660
"metadata": {},
561661
"source": [
562-
"**Note:** gd2d.shape is (3,288) instead of expected (2,288) and start date shifted to 2019-12-31"
662+
"**Note:** gd2d.shape is (3,288) instead of expected (2,288), second day sample shifted to teh first day and start date shifted to 2019-12-31"
563663
]
564664
},
565665
{
566666
"cell_type": "markdown",
567667
"metadata": {},
568668
"source": [
669+
"## Cross over midnight with UTC\n",
569670
"Lets test two-days records that cross over midnight "
570671
]
571672
},
572673
{
573674
"cell_type": "code",
574-
"execution_count": 11,
675+
"execution_count": 17,
575676
"metadata": {},
576677
"outputs": [
577678
{
@@ -689,7 +790,7 @@
689790
},
690791
{
691792
"cell_type": "code",
692-
"execution_count": 12,
793+
"execution_count": 18,
693794
"metadata": {},
694795
"outputs": [
695796
{
@@ -699,8 +800,10 @@
699800
"(2, 288)\n",
700801
"[Timestamp('2019-12-31 00:00:00'), Timestamp('2020-01-01 00:00:00')]\n",
701802
"5.0\n",
702-
"[[ nan nan nan nan nan]\n",
703-
" [175. 180. 185. nan nan]]\n"
803+
"[[ nan nan nan 150. 155. 160. 165. 170.]\n",
804+
" [ nan nan nan nan nan nan nan nan]]\n",
805+
"[[ nan nan nan nan nan nan nan nan]\n",
806+
" [175. 180. 185. nan nan nan nan nan]]\n"
704807
]
705808
}
706809
],
@@ -715,14 +818,15 @@
715818
"print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n",
716819
"print(dt0) # expected 5\n",
717820
"\n",
718-
"print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]"
821+
"print(gd2d[:,280:]) \n",
822+
"print(gd2d[:,:8]) # expected [[150. 155. 160. 165. nan]]"
719823
]
720824
},
721825
{
722826
"cell_type": "markdown",
723827
"metadata": {},
724828
"source": [
725-
"**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day."
829+
"**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day and 2020-01-02 disappeared from actual dates"
726830
]
727831
},
728832
{

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "iglu_python"
7-
version = "0.1.5"
7+
version = "0.1.6"
88
description = "Python implementation of the iglu package for continuous glucose monitoring data analysis"
99
readme = "README.md"
1010
requires-python = ">=3.11"

0 commit comments

Comments
 (0)