|
18 | 18 | }, |
19 | 19 | { |
20 | 20 | "cell_type": "code", |
21 | | - "execution_count": 2, |
| 21 | + "execution_count": 1, |
22 | 22 | "metadata": {}, |
23 | 23 | "outputs": [], |
24 | 24 | "source": [ |
|
51 | 51 | }, |
52 | 52 | { |
53 | 53 | "cell_type": "code", |
54 | | - "execution_count": 3, |
| 54 | + "execution_count": 2, |
55 | 55 | "metadata": {}, |
56 | 56 | "outputs": [ |
57 | 57 | { |
|
153 | 153 | }, |
154 | 154 | { |
155 | 155 | "cell_type": "code", |
156 | | - "execution_count": 4, |
| 156 | + "execution_count": null, |
157 | 157 | "metadata": {}, |
158 | 158 | "outputs": [], |
159 | 159 | "source": [ |
|
167 | 167 | }, |
168 | 168 | { |
169 | 169 | "cell_type": "code", |
170 | | - "execution_count": 5, |
| 170 | + "execution_count": 4, |
171 | 171 | "metadata": {}, |
172 | 172 | "outputs": [ |
173 | 173 | { |
|
194 | 194 | "print(f\"rpy2 version: {version('rpy2')}\")" |
195 | 195 | ] |
196 | 196 | }, |
| 197 | + { |
| 198 | + "cell_type": "markdown", |
| 199 | + "metadata": {}, |
| 200 | + "source": [ |
| 201 | + "## Test on synthetic data\n", |
| 202 | + "\n", |
| 203 | + "- Samples - every 5 min\n", |
| 204 | + "- duration - 1h\n", |
| 205 | + "- values [80,120] repeated for sampling duration\n", |
| 206 | + "\n", |
| 207 | + "Expected hourly AUC = 100 mg.h/dL" |
| 208 | + ] |
| 209 | + }, |
| 210 | + { |
| 211 | + "cell_type": "code", |
| 212 | + "execution_count": 5, |
| 213 | + "metadata": {}, |
| 214 | + "outputs": [ |
| 215 | + { |
| 216 | + "data": { |
| 217 | + "text/html": [ |
| 218 | + "<div>\n", |
| 219 | + "<style scoped>\n", |
| 220 | + " .dataframe tbody tr th:only-of-type {\n", |
| 221 | + " vertical-align: middle;\n", |
| 222 | + " }\n", |
| 223 | + "\n", |
| 224 | + " .dataframe tbody tr th {\n", |
| 225 | + " vertical-align: top;\n", |
| 226 | + " }\n", |
| 227 | + "\n", |
| 228 | + " .dataframe thead th {\n", |
| 229 | + " text-align: right;\n", |
| 230 | + " }\n", |
| 231 | + "</style>\n", |
| 232 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 233 | + " <thead>\n", |
| 234 | + " <tr style=\"text-align: right;\">\n", |
| 235 | + " <th></th>\n", |
| 236 | + " <th>id</th>\n", |
| 237 | + " <th>hourly_auc</th>\n", |
| 238 | + " </tr>\n", |
| 239 | + " </thead>\n", |
| 240 | + " <tbody>\n", |
| 241 | + " <tr>\n", |
| 242 | + " <th>1</th>\n", |
| 243 | + " <td>subject1</td>\n", |
| 244 | + " <td>102.222222</td>\n", |
| 245 | + " </tr>\n", |
| 246 | + " </tbody>\n", |
| 247 | + "</table>\n", |
| 248 | + "</div>" |
| 249 | + ], |
| 250 | + "text/plain": [ |
| 251 | + " id hourly_auc\n", |
| 252 | + "1 subject1 102.222222" |
| 253 | + ] |
| 254 | + }, |
| 255 | + "execution_count": 5, |
| 256 | + "metadata": {}, |
| 257 | + "output_type": "execute_result" |
| 258 | + } |
| 259 | + ], |
| 260 | + "source": [ |
| 261 | + "hours = 1\n", |
| 262 | + "dt0 = 5\n", |
| 263 | + "samples = int(hours*60/dt0)\n", |
| 264 | + "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", |
| 265 | + "glucose_values = [80,120]* int(samples/2)\n", |
| 266 | + "\n", |
| 267 | + "syntheticdata = pd.DataFrame({\n", |
| 268 | + " 'id': ['subject1'] * samples,\n", |
| 269 | + " 'time': times,\n", |
| 270 | + " 'gl': glucose_values\n", |
| 271 | + "})\n", |
| 272 | + "\n", |
| 273 | + "synthetic_iglu_auc_results = iglu_py.auc(syntheticdata)\n", |
| 274 | + "synthetic_iglu_auc_results" |
| 275 | + ] |
| 276 | + }, |
| 277 | + { |
| 278 | + "cell_type": "markdown", |
| 279 | + "metadata": {}, |
| 280 | + "source": [ |
| 281 | + "**Note:** Incorrect AUC calculation is a result of CGMS2DayByDay function bugs:\n", |
| 282 | + "- one sample shift in interpolation - results in 11 samples instead of 12\n", |
| 283 | + "- actual_dates returns 2 dates instead of one\n", |
| 284 | + "\n", |
| 285 | + "Additional suspicious code is in AUC itself: `day = rep(data_ip[[2]], 1440/dt0),` - IMHO it resample sequential gl to different days, instead of sequential sampling for each day before sampling for the next \n" |
| 286 | + ] |
| 287 | + }, |
| 288 | + { |
| 289 | + "cell_type": "markdown", |
| 290 | + "metadata": {}, |
| 291 | + "source": [ |
| 292 | + "## Test on example data " |
| 293 | + ] |
| 294 | + }, |
197 | 295 | { |
198 | 296 | "cell_type": "code", |
199 | 297 | "execution_count": 6, |
|
280 | 378 | } |
281 | 379 | ], |
282 | 380 | "source": [ |
| 381 | + "test_data = \"../tests/data/example_data_5_subject.csv\"\n", |
283 | 382 | "# load test data into DF\n", |
284 | 383 | "df = pd.read_csv(test_data, index_col=0)\n", |
285 | 384 | "\n", |
|
298 | 397 | "cell_type": "markdown", |
299 | 398 | "metadata": {}, |
300 | 399 | "source": [ |
301 | | - "Lets try to run AUC on simulated data with easily calculatable AUC" |
| 400 | + "## Conclusions \n", |
| 401 | + "IGLU AUC calculations are substantially differ from expected ranges suggested by ChatGPT\n" |
| 402 | + ] |
| 403 | + }, |
| 404 | + { |
| 405 | + "cell_type": "markdown", |
| 406 | + "metadata": {}, |
| 407 | + "source": [ |
| 408 | + "# IGLU_PYTHON results" |
| 409 | + ] |
| 410 | + }, |
| 411 | + { |
| 412 | + "cell_type": "code", |
| 413 | + "execution_count": 7, |
| 414 | + "metadata": {}, |
| 415 | + "outputs": [], |
| 416 | + "source": [ |
| 417 | + "# Add project directory to PYTHONPATH\n", |
| 418 | + "import os\n", |
| 419 | + "import sys\n", |
| 420 | + "import pandas as pd\n", |
| 421 | + "sys.path.append(os.path.abspath('..'))\n", |
| 422 | + "import iglu_python\n" |
| 423 | + ] |
| 424 | + }, |
| 425 | + { |
| 426 | + "cell_type": "markdown", |
| 427 | + "metadata": {}, |
| 428 | + "source": [ |
| 429 | + "## Test on synthetic data" |
302 | 430 | ] |
303 | 431 | }, |
304 | 432 | { |
305 | 433 | "cell_type": "code", |
306 | | - "execution_count": 18, |
| 434 | + "execution_count": 8, |
307 | 435 | "metadata": {}, |
308 | 436 | "outputs": [ |
309 | 437 | { |
|
333 | 461 | " </thead>\n", |
334 | 462 | " <tbody>\n", |
335 | 463 | " <tr>\n", |
336 | | - " <th>1</th>\n", |
| 464 | + " <th>0</th>\n", |
337 | 465 | " <td>subject1</td>\n", |
338 | | - " <td>102.222222</td>\n", |
| 466 | + " <td>100.0</td>\n", |
339 | 467 | " </tr>\n", |
340 | 468 | " </tbody>\n", |
341 | 469 | "</table>\n", |
342 | 470 | "</div>" |
343 | 471 | ], |
344 | 472 | "text/plain": [ |
345 | 473 | " id hourly_auc\n", |
346 | | - "1 subject1 102.222222" |
| 474 | + "0 subject1 100.0" |
347 | 475 | ] |
348 | 476 | }, |
349 | | - "execution_count": 18, |
| 477 | + "execution_count": 8, |
350 | 478 | "metadata": {}, |
351 | 479 | "output_type": "execute_result" |
352 | 480 | } |
353 | 481 | ], |
354 | 482 | "source": [ |
355 | | - "hours = 1\n", |
356 | | - "dt0 = 5\n", |
357 | | - "samples = int(hours*60/dt0)\n", |
358 | | - "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", |
359 | | - "glucose_values = [80,120]* int(samples/2)\n", |
360 | | - "\n", |
361 | | - "data = pd.DataFrame({\n", |
362 | | - " 'id': ['subject1'] * samples,\n", |
363 | | - " 'time': times,\n", |
364 | | - " 'gl': glucose_values\n", |
365 | | - "})\n", |
366 | | - "\n", |
367 | | - "iglu_auc_results = iglu_py.auc(data)\n", |
368 | | - "iglu_auc_results" |
| 483 | + "synthetic_iglu_auc_results = iglu_python.auc(syntheticdata)\n", |
| 484 | + "synthetic_iglu_auc_results" |
369 | 485 | ] |
370 | 486 | }, |
371 | 487 | { |
372 | 488 | "cell_type": "markdown", |
373 | 489 | "metadata": {}, |
374 | 490 | "source": [ |
375 | | - "## Conclusions \n", |
376 | | - "IGLU AUC calculations are substantially differ from expected ranges suggested by ChatGPT\n" |
| 491 | + "**Note:** Result match expected" |
377 | 492 | ] |
378 | 493 | }, |
379 | 494 | { |
380 | 495 | "cell_type": "markdown", |
381 | 496 | "metadata": {}, |
382 | 497 | "source": [ |
383 | | - "# IGLU_PYTHON results" |
| 498 | + "## Test on Example data" |
384 | 499 | ] |
385 | 500 | }, |
386 | 501 | { |
387 | 502 | "cell_type": "code", |
388 | | - "execution_count": 7, |
389 | | - "metadata": {}, |
390 | | - "outputs": [], |
391 | | - "source": [ |
392 | | - "# Add project directory to PYTHONPATH\n", |
393 | | - "import os\n", |
394 | | - "import sys\n", |
395 | | - "\n", |
396 | | - "sys.path.append(os.path.abspath('..'))" |
397 | | - ] |
398 | | - }, |
399 | | - { |
400 | | - "cell_type": "code", |
401 | | - "execution_count": 12, |
| 503 | + "execution_count": 9, |
402 | 504 | "metadata": {}, |
403 | 505 | "outputs": [ |
404 | 506 | { |
|
501 | 603 | } |
502 | 604 | ], |
503 | 605 | "source": [ |
504 | | - "import pandas as pd\n", |
505 | | - "\n", |
506 | | - "import iglu_python\n", |
507 | | - "\n", |
508 | 606 | "# load test data into DF\n", |
509 | 607 | "df = pd.read_csv(test_data, index_col=0)\n", |
510 | 608 | "\n", |
511 | | - "iglu_python.IGLU_R_COMPATIBLE = False\n", |
512 | 609 | "iglu_python_auc_results = iglu_python.auc(df)\n", |
513 | 610 | "iglu_python_auc_results = iglu_python_auc_results.round(0)\n", |
514 | 611 | "\n", |
|
518 | 615 | "iglu_python_auc_results['Difference to IGLU(%)'] = ((iglu_python_auc_results['IGLU PYTHON AUC (mg*h/dL)'] - iglu_python_auc_results['IGLU AUC (mg*h/dL)']) / iglu_python_auc_results['IGLU AUC (mg*h/dL)'] * 100).round(1)\n", |
519 | 616 | "iglu_python_auc_results['Difference to ChatGPt(%)'] = ((iglu_python_auc_results['IGLU PYTHON AUC (mg*h/dL)'] - iglu_python_auc_results['ChatGPT AUC (mg*h/dL)']) / iglu_python_auc_results['ChatGPT AUC (mg*h/dL)'] * 100).round(1)\n", |
520 | 617 | "\n", |
521 | | - "\n", |
522 | | - "\n", |
523 | | - "display(iglu_python_auc_results)\n", |
524 | | - "\n", |
525 | | - "\n", |
526 | | - "\n" |
527 | | - ] |
528 | | - }, |
529 | | - { |
530 | | - "cell_type": "code", |
531 | | - "execution_count": 21, |
532 | | - "metadata": {}, |
533 | | - "outputs": [ |
534 | | - { |
535 | | - "data": { |
536 | | - "text/html": [ |
537 | | - "<div>\n", |
538 | | - "<style scoped>\n", |
539 | | - " .dataframe tbody tr th:only-of-type {\n", |
540 | | - " vertical-align: middle;\n", |
541 | | - " }\n", |
542 | | - "\n", |
543 | | - " .dataframe tbody tr th {\n", |
544 | | - " vertical-align: top;\n", |
545 | | - " }\n", |
546 | | - "\n", |
547 | | - " .dataframe thead th {\n", |
548 | | - " text-align: right;\n", |
549 | | - " }\n", |
550 | | - "</style>\n", |
551 | | - "<table border=\"1\" class=\"dataframe\">\n", |
552 | | - " <thead>\n", |
553 | | - " <tr style=\"text-align: right;\">\n", |
554 | | - " <th></th>\n", |
555 | | - " <th>id</th>\n", |
556 | | - " <th>hourly_auc</th>\n", |
557 | | - " </tr>\n", |
558 | | - " </thead>\n", |
559 | | - " <tbody>\n", |
560 | | - " <tr>\n", |
561 | | - " <th>0</th>\n", |
562 | | - " <td>subject1</td>\n", |
563 | | - " <td>100.0</td>\n", |
564 | | - " </tr>\n", |
565 | | - " </tbody>\n", |
566 | | - "</table>\n", |
567 | | - "</div>" |
568 | | - ], |
569 | | - "text/plain": [ |
570 | | - " id hourly_auc\n", |
571 | | - "0 subject1 100.0" |
572 | | - ] |
573 | | - }, |
574 | | - "execution_count": 21, |
575 | | - "metadata": {}, |
576 | | - "output_type": "execute_result" |
577 | | - } |
578 | | - ], |
579 | | - "source": [ |
580 | | - "hours = 1\n", |
581 | | - "dt0 = 5\n", |
582 | | - "samples = int(hours*60/dt0)\n", |
583 | | - "times = pd.date_range('2020-01-01', periods=samples, freq=f\"{dt0}min\")\n", |
584 | | - "glucose_values = [80,120]* int(samples/2)\n", |
585 | | - "\n", |
586 | | - "data = pd.DataFrame({\n", |
587 | | - " 'id': ['subject1'] * samples,\n", |
588 | | - " 'time': times,\n", |
589 | | - " 'gl': glucose_values\n", |
590 | | - "})\n", |
591 | | - "\n", |
592 | | - "iglu_python.IGLU_R_COMPATIBLE = True\n", |
593 | | - "iglu_python_auc_results = iglu_python.auc(data)\n", |
594 | | - "iglu_python_auc_results" |
| 618 | + "display(iglu_python_auc_results)\n" |
595 | 619 | ] |
596 | 620 | }, |
597 | 621 | { |
598 | 622 | "cell_type": "markdown", |
599 | 623 | "metadata": {}, |
600 | 624 | "source": [ |
601 | 625 | "## Conclusions \n", |
602 | | - "IGLU_PYTHON AUC calculations are close to IGLU calculations (-5%), and closer to suggested by ChatGPT\n", |
| 626 | + "IGLU_PYTHON AUC calculations are close to IGLU calculations (-0.5%)\n", |
603 | 627 | "\n" |
604 | 628 | ] |
605 | 629 | } |
|
0 commit comments