Skip to content

Commit 0e8411c

Browse files
authored
Merge pull request #1 from staskh/test_daily
Full IGLU-R test compatibility
2 parents 65502b6 + 050258b commit 0e8411c

26 files changed

Lines changed: 110124 additions & 1813 deletions

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ on:
55
branches: [ main, develop ]
66
pull_request:
77
branches: [ main, develop ]
8+
workflow_dispatch:
89

910
jobs:
1011
test:

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,6 @@ cython_debug/
176176
.vscode
177177
iglu_python.code-workspace
178178
sandbox.ipynb
179+
tests/data/day1.csv
180+
tests/data/day2.csv
181+
tests/data/day5.csv

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ A significant focus of this project has been ensuring compatibility with the ori
1919
This approach ensures that the Python implementation produces results consistent with the original R package.
2020

2121
## Unit Test Status
22-
Unless noted, iglu-r test is considered successful if it achives precision of 1e-3
22+
Unless noted, iglu-r test is considered successful if it achieves precision of 0.001
2323

2424
| Function | IGLU-R test compatibility | array/list/Series | TZ | Comments |
2525
|----------|---------------------------|-------------------|----|----------|
2626
| above_percent || |||
2727
| active_percent ||
2828
| adrr ||
29-
| auc| 🟡 (0.1 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)|
29+
| auc| 🟡 (0.01 precision) | || see [auc_evaluation.ipynb](https://github.com/staskh/iglu_python/blob/main/notebooks/auc_evaluation.ipynb)|
3030
| below_percent||
3131
| cogi ||
3232
| conga ||
@@ -48,8 +48,8 @@ Unless noted, iglu-r test is considered successful if it achives precision of 1e
4848
| j_index ||
4949
| lbgi ||
5050
| mad_glu ||
51-
| mag | 🟡 (0.1 precision)|
52-
| mage | 🟡 (0.2 precision) | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
51+
| mag | | || IMHO, Original R implementation has an error |
52+
| mage | | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
5353
| mean_glu ||
5454
| median_glu ||
5555
| modd ||
@@ -60,6 +60,8 @@ Unless noted, iglu-r test is considered successful if it achives precision of 1e
6060
| sd_glu ||
6161
| sd_measures ||
6262
| sd_roc || |||
63+
| process_data ||
64+
| summary_glu ||
6365
| CGMS2DayByDay ||
6466

6567
# Installation

R_REVIEW.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
## MAG
2+
3+
[(length(na.omit(idx_gl))*n/60)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/mag.R#L60) has to be
4+
```
5+
diffs = abs(diff(idx_gl))
6+
mag = sum(diffs, na.rm = TRUE)/
7+
(length(na.omit(diffs))*n/60)
8+
```
9+
10+
## CGMS2DayByDay
11+
12+
[ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")) + 1)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L208) has to be ndays = ceiling(as.double(difftime(max(tr), min(tr), units = "days")))`
13+
14+
15+
grid omits the first measurement of input data and shift timeline -dt0
16+
[dti_cum = cumsum(dti)](https://github.com/irinagain/iglu/blob/82e4d1a39901847881d5402d1ac61b3e678d2a5e/R/utils.R#L210C13-L210C19) has to be `dti_cum = c(0,cumsum(dti))`
17+

iglu_python/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@
2929
from .median_glu import median_glu
3030
from .modd import modd
3131
from .pgs import pgs
32+
from .process_data import process_data
3233
from .quantile_glu import quantile_glu
3334
from .range_glu import range_glu
3435
from .roc import roc
3536
from .sd_glu import sd_glu
37+
from .sd_measures import sd_measures
3638
from .sd_roc import sd_roc
39+
from .summary_glu import summary_glu
3740
from .utils import IGLU_R_COMPATIBLE, CGMS2DayByDay, check_data_columns, gd2d_to_df
3841

3942
__all__ = [
@@ -72,9 +75,12 @@
7275
"median_glu",
7376
"modd",
7477
"pgs",
78+
"process_data",
7579
"quantile_glu",
7680
"range_glu",
7781
"roc",
7882
"sd_glu",
83+
"sd_measures",
7984
"sd_roc",
85+
"summary_glu",
8086
]

iglu_python/active_percent.py

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def active_percent(
3737
Number of days to consider in the calculation.
3838
consistent_end_date : Optional[Union[str, datetime]], default=None
3939
End date to be used for every subject. If None, each subject will have their own end date.
40+
Used only in range_type=='manual' mode
4041
4142
Returns
4243
-------
@@ -118,31 +119,35 @@ def active_percent(
118119
active_percent = (
119120
(theoretical_gl_vals - missing_gl_vals) / theoretical_gl_vals
120121
) * 100
121-
122+
elif range_type == "manual":
122123
# Handle consistent end date if provided
123124
if consistent_end_date is not None:
124125
end_date = localize_naive_timestamp(pd.to_datetime(consistent_end_date))
125-
start_date = end_date - pd.Timedelta(days=int(ndays))
126-
127-
# Filter data to the specified date range
128-
mask = (sub_data["time"] >= start_date) & (sub_data["time"] <= end_date)
129-
sub_data = sub_data[mask]
130-
131-
# Recalculate active percentage for the specified range
132-
active_percent = (len(sub_data) / (ndays * (24 * (60 / dt0)))) * 100
133-
min_time = start_date
134-
max_time = end_date
135-
ndays = (end_date - start_date).total_seconds() / (24 * 3600)
136-
137-
active_perc_data.append(
138-
{
139-
"id": subject,
140-
"active_percent": active_percent,
141-
"ndays": round(ndays, 1),
142-
"start_date": min_time,
143-
"end_date": max_time,
144-
}
145-
)
126+
else:
127+
end_date = sub_data["time"].max()
128+
start_date = end_date - pd.Timedelta(days=int(ndays))
129+
130+
# Filter data to the specified date range
131+
mask = (sub_data["time"] >= start_date) & (sub_data["time"] <= end_date)
132+
sub_data = sub_data[mask]
133+
134+
# Recalculate active percentage for the specified range
135+
active_percent = (len(sub_data) / (ndays * (24 * (60 / dt0)))) * 100
136+
min_time = start_date
137+
max_time = end_date
138+
ndays = (end_date - start_date).total_seconds() / (24 * 3600)
139+
else:
140+
raise ValueError(f"Invalid range_type: {range_type}")
141+
142+
active_perc_data.append(
143+
{
144+
"id": subject,
145+
"active_percent": active_percent,
146+
"ndays": round(ndays, 1),
147+
"start_date": min_time,
148+
"end_date": max_time,
149+
}
150+
)
146151

147152
# Convert to DataFrame
148153
result = pd.DataFrame(active_perc_data)

iglu_python/auc.py

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as np
33
import pandas as pd
44

5-
from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df
5+
from .utils import CGMS2DayByDay, check_data_columns, gd2d_to_df, IGLU_R_COMPATIBLE
66

77

88
def auc(data: pd.DataFrame, tz: str = "") -> pd.DataFrame:
@@ -64,25 +64,41 @@ def auc_single(subject_data: pd.DataFrame) -> float:
6464
gd2d, actual_dates, dt0 = CGMS2DayByDay(subject_data, tz=tz)
6565

6666
# Convert gd2d to DataFrame
67-
hourly_data = gd2d_to_df(gd2d, actual_dates, dt0)
68-
# Add hour column by rounding time to nearest hour
69-
hourly_data['hour'] = hourly_data['time'].dt.floor('h')
70-
71-
hourly_data['gl_next'] = hourly_data['gl'].shift(-1)
72-
73-
# Calculate AUC for each hour using trapezoidal rule (mg*min/dL)
74-
hourly_auc = hourly_data.groupby("hour").apply(
75-
lambda x: np.nansum(
76-
(dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
77-
),
78-
include_groups=False
79-
)
80-
# 0 mean no data in this hour, replace with nan
81-
hourly_auc = hourly_auc.replace(0, np.nan)
82-
83-
hourly_avg = hourly_auc.mean(skipna=True)
84-
# Return mean of daily hourly averages
85-
return hourly_avg
67+
input_data = gd2d_to_df(gd2d, actual_dates, dt0)
68+
if IGLU_R_COMPATIBLE:
69+
input_data['day'] = input_data['time'].dt.floor('d')
70+
input_data['gl_next'] = input_data['gl'].shift(-1)
71+
each_day_area = input_data.groupby("day").apply(
72+
lambda x: np.nansum(
73+
(dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
74+
),
75+
include_groups=False
76+
)
77+
# calculate number of not nan trapezoids in total (number of not nan gl and gl_next)
78+
n_trapezoids = (~np.isnan(input_data["gl"]) & ~np.isnan(input_data["gl_next"])).sum()
79+
hours = dt0/60 * n_trapezoids
80+
daily_area = each_day_area.sum()
81+
hourly_avg = daily_area/hours
82+
return hourly_avg
83+
else:
84+
# Add hour column by rounding time to nearest hour
85+
input_data['hour'] = input_data['time'].dt.floor('h')
86+
87+
input_data['gl_next'] = input_data['gl'].shift(-1)
88+
89+
# Calculate AUC for each hour using trapezoidal rule (mg*min/dL)
90+
hourly_auc = input_data.groupby("hour").apply(
91+
lambda x: np.nansum(
92+
(dt0/60)*(x["gl"].values + x["gl_next"].values) / 2
93+
),
94+
include_groups=False
95+
)
96+
# 0 mean no data in this hour, replace with nan
97+
hourly_auc = hourly_auc.replace(0, np.nan)
98+
99+
hourly_avg = hourly_auc.mean(skipna=True)
100+
# Return mean of daily hourly averages
101+
return hourly_avg
86102

87103
# Process each subject
88104
result = []

iglu_python/mag.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
import pandas as pd
55

6-
from .utils import CGMS2DayByDay, check_data_columns
6+
from .utils import CGMS2DayByDay, check_data_columns, IGLU_R_COMPATIBLE
77

88

99
def mag(
@@ -92,16 +92,31 @@ def mag_single(data: pd.DataFrame, n: int) -> float:
9292

9393
# Calculate absolute differences between readings n minutes apart
9494
lag = readings_per_interval
95-
diffs = gl_values[lag:] - gl_values[:-lag]
96-
diffs = np.abs(diffs)
97-
diffs = diffs[~np.isnan(diffs)]
9895

99-
# Calculate MAG: sum of absolute differences divided by total time in hours
100-
total_time_hours = ((len(diffs)) * n) / 60
101-
if total_time_hours == 0:
102-
return 0.0
103-
104-
return float(np.sum(diffs) / total_time_hours)
96+
if IGLU_R_COMPATIBLE:
97+
idx = np.arange(0,len(gl_values),lag)
98+
gl_values_idx = gl_values[idx]
99+
diffs = gl_values_idx[1:] - gl_values_idx[:-1]
100+
diffs = np.abs(diffs)
101+
diffs = diffs[~np.isnan(diffs)]
102+
# to be IGLU-R test compatible, imho they made error.
103+
# has to be total_time_hours = ((len(diffs)) * n) / 60
104+
total_time_hours = ((len(gl_values_idx[~np.isnan(gl_values_idx)])) * n) / 60
105+
if total_time_hours == 0:
106+
return 0.0
107+
mag = float(np.sum(diffs) / total_time_hours)
108+
else:
109+
diffs = gl_values[lag:] - gl_values[:-lag]
110+
diffs = np.abs(diffs)
111+
diffs = diffs[~np.isnan(diffs)]
112+
113+
# Calculate MAG: sum of absolute differences divided by total time in hours
114+
total_time_hours = ((len(diffs)) * n) / 60
115+
if total_time_hours == 0:
116+
return 0.0
117+
mag = float(np.sum(diffs) / total_time_hours)
118+
119+
return mag
105120

106121
# Handle Series input
107122
if isinstance(data, pd.Series):

0 commit comments

Comments
 (0)