|
19 | 19 | "\n", |
20 | 20 | "import pandas as pd\n", |
21 | 21 | "import rpy2.robjects as ro\n", |
| 22 | + "import iglu_py\n", |
22 | 23 | "from iglu_py import bridge" |
23 | 24 | ] |
24 | 25 | }, |
|
80 | 81 | " return result\n" |
81 | 82 | ] |
82 | 83 | }, |
| 84 | + { |
| 85 | + "cell_type": "markdown", |
| 86 | + "metadata": {}, |
| 87 | + "source": [ |
| 88 | + "## Simple test " |
| 89 | + ] |
| 90 | + }, |
83 | 91 | { |
84 | 92 | "cell_type": "markdown", |
85 | 93 | "metadata": {}, |
|
182 | 190 | }, |
183 | 191 | { |
184 | 192 | "cell_type": "code", |
185 | | - "execution_count": 5, |
| 193 | + "execution_count": 6, |
186 | 194 | "metadata": {}, |
187 | 195 | "outputs": [ |
188 | 196 | { |
189 | 197 | "name": "stdout", |
190 | 198 | "output_type": "stream", |
191 | 199 | "text": [ |
192 | | - "(2, 288)\n", |
193 | | - "[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')]\n", |
194 | | - "5.0\n", |
| 200 | + "gd2d.shape=(2, 288) \t/ expected (1,288)\n", |
| 201 | + "actual_dates=[Timestamp('2020-01-01 00:00:00'), Timestamp('2020-01-02 00:00:00')] \t/ expected [Timestamp('2020-01-01 00:00:00')]\n", |
| 202 | + "dt0=5.0\n", |
| 203 | + "gd2d[:,0:5]=\n", |
195 | 204 | "[[155. 160. 165. nan nan]\n", |
196 | | - " [ nan nan nan nan nan]]\n" |
| 205 | + " [ nan nan nan nan nan]] \t/ expected [[150. 155. 160. 165. nan]]\n" |
197 | 206 | ] |
198 | 207 | } |
199 | 208 | ], |
|
204 | 213 | "actual_dates = r_result['actual_dates']\n", |
205 | 214 | "dt0 = r_result['dt0']\n", |
206 | 215 | "\n", |
207 | | - "print(gd2d.shape) # expected (1,288)\n", |
208 | | - "print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n", |
209 | | - "print(dt0) # expected 5\n", |
210 | | - "\n", |
211 | | - "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]\n", |
| 216 | + "print(f\"gd2d.shape={gd2d.shape} \\t/ expected (1,288)\") # expected (1,288)\n", |
| 217 | + "print(f\"actual_dates={actual_dates} \\t/ expected [Timestamp('2020-01-01 00:00:00')]\") # expected [datetime.date(2020, 1, 1)]\n", |
| 218 | + "print(f\"dt0={dt0}\") # expected 5\n", |
| 219 | + "print(f\"gd2d[:,0:5]=\\n{gd2d[:,0:5]} \\t/ expected [[150. 155. 160. 165. nan]]\") # expected [[150. 155. 160. 165. nan]]\n", |
212 | 220 | "\n", |
213 | 221 | "\n", |
214 | 222 | "\n" |
|
218 | 226 | "cell_type": "markdown", |
219 | 227 | "metadata": {}, |
220 | 228 | "source": [ |
221 | | - "**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4\n", |
| 229 | + "**Note:** gd2d.shape is (2, 288) instead of (1, 288) and gd2d[0,:] has only 3 non-nan values instead of expected 4" |
| 230 | + ] |
| 231 | + }, |
| 232 | + { |
| 233 | + "cell_type": "markdown", |
| 234 | + "metadata": {}, |
| 235 | + "source": [ |
| 236 | + "### Impact \n", |
| 237 | + "\n", |
| 238 | + "While these discrepancies may appear minor, they can significantly impact certain metric calculations.\n", |
| 239 | + "\n", |
| 240 | + "For example, when calculating AUC on synthetic data (shown below), we expect a result of 100, \n", |
| 241 | + "but the AUC metric returns 102.2222 due to these interpolation differences." |
| 242 | + ] |
| 243 | + }, |
| 244 | + { |
| 245 | + "cell_type": "code", |
| 246 | + "execution_count": 7, |
| 247 | + "metadata": {}, |
| 248 | + "outputs": [ |
| 249 | + { |
| 250 | + "data": { |
| 251 | + "text/html": [ |
| 252 | + "<div>\n", |
| 253 | + "<style scoped>\n", |
| 254 | + " .dataframe tbody tr th:only-of-type {\n", |
| 255 | + " vertical-align: middle;\n", |
| 256 | + " }\n", |
| 257 | + "\n", |
| 258 | + " .dataframe tbody tr th {\n", |
| 259 | + " vertical-align: top;\n", |
| 260 | + " }\n", |
| 261 | + "\n", |
| 262 | + " .dataframe thead th {\n", |
| 263 | + " text-align: right;\n", |
| 264 | + " }\n", |
| 265 | + "</style>\n", |
| 266 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 267 | + " <thead>\n", |
| 268 | + " <tr style=\"text-align: right;\">\n", |
| 269 | + " <th></th>\n", |
| 270 | + " <th>id</th>\n", |
| 271 | + " <th>hourly_auc</th>\n", |
| 272 | + " </tr>\n", |
| 273 | + " </thead>\n", |
| 274 | + " <tbody>\n", |
| 275 | + " <tr>\n", |
| 276 | + " <th>1</th>\n", |
| 277 | + " <td>subject1</td>\n", |
| 278 | + " <td>102.222222</td>\n", |
| 279 | + " </tr>\n", |
| 280 | + " </tbody>\n", |
| 281 | + "</table>\n", |
| 282 | + "</div>" |
| 283 | + ], |
| 284 | + "text/plain": [ |
| 285 | + " id hourly_auc\n", |
| 286 | + "1 subject1 102.222222" |
| 287 | + ] |
| 288 | + }, |
| 289 | + "execution_count": 7, |
| 290 | + "metadata": {}, |
| 291 | + "output_type": "execute_result" |
| 292 | + } |
| 293 | + ], |
| 294 | + "source": [ |
| 295 | + "hours = 1\n", |
| 296 | + "dt0 = 5\n", |
| 297 | + "samples = int(hours*60/dt0)\n", |
| 298 | + "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", |
| 299 | + "glucose_values = [80,120]* int(samples/2)\n", |
| 300 | + "\n", |
| 301 | + "syntheticdata = pd.DataFrame({\n", |
| 302 | + " 'id': ['subject1'] * samples,\n", |
| 303 | + " 'time': times,\n", |
| 304 | + " 'gl': glucose_values\n", |
| 305 | + "})\n", |
222 | 306 | "\n", |
| 307 | + "synthetic_iglu_auc_results = iglu_py.auc(syntheticdata)\n", |
| 308 | + "synthetic_iglu_auc_results" |
| 309 | + ] |
| 310 | + }, |
| 311 | + { |
| 312 | + "cell_type": "markdown", |
| 313 | + "metadata": {}, |
| 314 | + "source": [ |
| 315 | + "## UTC timezone \n", |
223 | 316 | "Now, lets try to localize to UTC timezone. " |
224 | 317 | ] |
225 | 318 | }, |
226 | 319 | { |
227 | 320 | "cell_type": "code", |
228 | | - "execution_count": 6, |
| 321 | + "execution_count": 8, |
229 | 322 | "metadata": {}, |
230 | 323 | "outputs": [ |
231 | 324 | { |
|
265 | 358 | "cell_type": "markdown", |
266 | 359 | "metadata": {}, |
267 | 360 | "source": [ |
| 361 | + "## Midday test\n", |
268 | 362 | "Lets try with a 4 measurement at 10am. On 5 min grid, 10am measurement has to be 10*(60/5)=120 position. " |
269 | 363 | ] |
270 | 364 | }, |
271 | 365 | { |
272 | 366 | "cell_type": "code", |
273 | | - "execution_count": 7, |
| 367 | + "execution_count": 9, |
274 | 368 | "metadata": {}, |
275 | 369 | "outputs": [ |
276 | 370 | { |
|
356 | 450 | }, |
357 | 451 | { |
358 | 452 | "cell_type": "code", |
359 | | - "execution_count": 8, |
| 453 | + "execution_count": 10, |
360 | 454 | "metadata": {}, |
361 | 455 | "outputs": [ |
362 | 456 | { |
|
402 | 496 | "cell_type": "markdown", |
403 | 497 | "metadata": {}, |
404 | 498 | "source": [ |
| 499 | + "## Midnight test with UTC\n", |
| 500 | + "\n", |
405 | 501 | "Lets look now on data that spans two consecutive days" |
406 | 502 | ] |
407 | 503 | }, |
408 | 504 | { |
409 | 505 | "cell_type": "code", |
410 | | - "execution_count": 9, |
| 506 | + "execution_count": 19, |
411 | 507 | "metadata": {}, |
412 | 508 | "outputs": [ |
413 | 509 | { |
|
525 | 621 | }, |
526 | 622 | { |
527 | 623 | "cell_type": "code", |
528 | | - "execution_count": 10, |
| 624 | + "execution_count": 22, |
529 | 625 | "metadata": {}, |
530 | 626 | "outputs": [ |
531 | 627 | { |
|
537 | 633 | "5.0\n", |
538 | 634 | "[[155. 160. 165. nan nan]\n", |
539 | 635 | " [155. 160. 165. nan nan]\n", |
| 636 | + " [ nan nan nan nan nan]]\n", |
| 637 | + "[[ nan nan nan nan 150.]\n", |
| 638 | + " [ nan nan nan nan nan]\n", |
540 | 639 | " [ nan nan nan nan nan]]\n" |
541 | 640 | ] |
542 | 641 | } |
|
552 | 651 | "print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n", |
553 | 652 | "print(dt0) # expected 5\n", |
554 | 653 | "\n", |
555 | | - "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]" |
| 654 | + "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]\n", |
| 655 | + "print(gd2d[:,283:])" |
556 | 656 | ] |
557 | 657 | }, |
558 | 658 | { |
559 | 659 | "cell_type": "markdown", |
560 | 660 | "metadata": {}, |
561 | 661 | "source": [ |
562 | | - "**Note:** gd2d.shape is (3,288) instead of expected (2,288) and start date shifted to 2019-12-31" |
| 662 | + "**Note:** gd2d.shape is (3,288) instead of expected (2,288), second day sample shifted to teh first day and start date shifted to 2019-12-31" |
563 | 663 | ] |
564 | 664 | }, |
565 | 665 | { |
566 | 666 | "cell_type": "markdown", |
567 | 667 | "metadata": {}, |
568 | 668 | "source": [ |
| 669 | + "## Cross over midnight with UTC\n", |
569 | 670 | "Lets test two-days records that cross over midnight " |
570 | 671 | ] |
571 | 672 | }, |
572 | 673 | { |
573 | 674 | "cell_type": "code", |
574 | | - "execution_count": 11, |
| 675 | + "execution_count": 17, |
575 | 676 | "metadata": {}, |
576 | 677 | "outputs": [ |
577 | 678 | { |
|
689 | 790 | }, |
690 | 791 | { |
691 | 792 | "cell_type": "code", |
692 | | - "execution_count": 12, |
| 793 | + "execution_count": 18, |
693 | 794 | "metadata": {}, |
694 | 795 | "outputs": [ |
695 | 796 | { |
|
699 | 800 | "(2, 288)\n", |
700 | 801 | "[Timestamp('2019-12-31 00:00:00'), Timestamp('2020-01-01 00:00:00')]\n", |
701 | 802 | "5.0\n", |
702 | | - "[[ nan nan nan nan nan]\n", |
703 | | - " [175. 180. 185. nan nan]]\n" |
| 803 | + "[[ nan nan nan 150. 155. 160. 165. 170.]\n", |
| 804 | + " [ nan nan nan nan nan nan nan nan]]\n", |
| 805 | + "[[ nan nan nan nan nan nan nan nan]\n", |
| 806 | + " [175. 180. 185. nan nan nan nan nan]]\n" |
704 | 807 | ] |
705 | 808 | } |
706 | 809 | ], |
|
715 | 818 | "print(actual_dates) # expected [datetime.date(2020, 1, 1)]\n", |
716 | 819 | "print(dt0) # expected 5\n", |
717 | 820 | "\n", |
718 | | - "print(gd2d[:,0:5]) # expected [[150. 155. 160. 165. nan]]" |
| 821 | + "print(gd2d[:,280:]) \n", |
| 822 | + "print(gd2d[:,:8]) # expected [[150. 155. 160. 165. nan]]" |
719 | 823 | ] |
720 | 824 | }, |
721 | 825 | { |
722 | 826 | "cell_type": "markdown", |
723 | 827 | "metadata": {}, |
724 | 828 | "source": [ |
725 | | - "**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day." |
| 829 | + "**Note:** Now we have (as expected) gd2d.shape==(2, 288), but midnight measurement shifted to a previous day and 2020-01-02 disappeared from actual dates" |
726 | 830 | ] |
727 | 831 | }, |
728 | 832 | { |
|
0 commit comments