Skip to content

Commit dfe07ef

Browse files
committed
MAGE fully compatible with IGLU-R
1 parent e030fff commit dfe07ef

3 files changed

Lines changed: 129 additions & 70 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ Unless noted, iglu-r test is considered successful if it achieves precision of 0
4949
| lbgi ||
5050
| mad_glu ||
5151
| mag || || IMHO, Original R implementation has an error |
52-
| mage | 🟡 (0.2 precision) | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
52+
| mage | | || See algorithm at [MAGE](https://github.com/irinagain/iglu/blob/master/vignettes/MAGE.Rmd) |
5353
| mean_glu ||
5454
| median_glu ||
5555
| modd ||

iglu_python/mage.py

Lines changed: 110 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -169,14 +169,12 @@ def mage_ma_single(data: pd.DataFrame, short_ma: int, long_ma: int,
169169
# Drop NA rows after last glucose reading
170170
last_valid_idx = interpolated_data['gl'].last_valid_index()
171171
if last_valid_idx is not None:
172-
interpolated_data = interpolated_data.iloc[:last_valid_idx]
172+
interpolated_data = interpolated_data.iloc[:last_valid_idx+1]
173173
# Add gap column to mark NA values as 1
174174
interpolated_data['gap'] = interpolated_data['gl'].isna().astype(int)
175175

176176
# 4. Time Series Segmentation: split gaps > max_gap into separate segments
177-
# ToDo - for now we assume all data is in a single segment ( no gaps over 12h)
178-
dfs = []
179-
dfs.append(interpolated_data)
177+
dfs = segment_time_series(interpolated_data,max_gap) # note: max_gap is in minutes
180178

181179
# 5. Calculate MAGE on each identified segment
182180
return_val = pd.DataFrame(columns=["start", "end", "mage", "plus_or_minus", "first_excursion"])
@@ -221,22 +219,41 @@ def mage_atomic(data, short_ma,long_ma):
221219
data["MA_Short"] = data["gl"].rolling(window=short_ma, min_periods=1).mean()
222220
data["MA_Long"] = data["gl"].rolling(window=long_ma, min_periods=1).mean()
223221
# Fill leading NAs (forward fill first valid value)
224-
data['MA_Short'].iloc[:short_ma] = data['MA_Short'].iloc[short_ma-1]
225-
data['MA_Long'].iloc[:long_ma] = data['MA_Long'].iloc[long_ma-1]
222+
if short_ma > len(data):
223+
data['MA_Short'].iloc[:short_ma] = data['MA_Short'].iloc[-1]
224+
else:
225+
data['MA_Short'].iloc[:short_ma] = data['MA_Short'].iloc[short_ma-1]
226+
if long_ma > len(data):
227+
data['MA_Long'].iloc[:long_ma] = data['MA_Long'].iloc[-1]
228+
else:
229+
data['MA_Long'].iloc[:long_ma] = data['MA_Long'].iloc[long_ma-1]
226230
# Calculate difference
227231
data['DELTA_SHORT_LONG'] = data['MA_Short'] - data['MA_Long']
228232
data = data.reset_index(drop=True)
233+
nmeasurements = len(data)
229234

235+
# Sanity check
236+
if (
237+
data['gl'].isnull().all() or
238+
nmeasurements < 7 or
239+
nmeasurements < short_ma or
240+
np.std(data['gl'], ddof=1) < 1
241+
):
242+
return pd.DataFrame({
243+
'start': [data['time'].iloc[0]],
244+
'end': [data['time'].iloc[-1]],
245+
'MAGE': [np.nan],
246+
'plus_or_minus': [np.nan],
247+
'first_excursion': [np.nan]
248+
})
230249

231250
# 2d. Create a preallocated list of crossing point ids & type
232251
# Find crossing points
233252
# Detect trend reversal points in glucose data using DELTA signal.
234253
# Initialize variables
235254
idx = list(data.index) # R: idx = as.numeric(rownames(.data))
236255
types = {'REL_MIN': 0, 'REL_MAX': 1} # R: types = list2env(list(REL_MIN=0, REL_MAX=1))
237-
238-
nmeasurements = len(data)
239-
256+
240257
# Create storage lists - R: list_cross <- list("id" = rep.int(NA, nmeasurements), "type" = rep.int(NA, nmeasurements))
241258
list_cross = {
242259
'id': [np.nan] * nmeasurements,
@@ -248,6 +265,9 @@ def mage_atomic(data, short_ma,long_ma):
248265
list_cross['type'][0] = types['REL_MAX'] if data['DELTA_SHORT_LONG'].iloc[0] > 0 else types['REL_MIN']
249266
count = 1 # Python uses 0-based indexing, so count starts at 1
250267

268+
# treat DELTA_SHORT_LONG==0 as NaN ( so we can skip its multiplication)
269+
data.loc[data['DELTA_SHORT_LONG'] == 0, 'DELTA_SHORT_LONG'] = np.nan
270+
251271
# Main loop - R: for(i in 2:length(.data$DELTA_SHORT_LONG))
252272
for i in range(1, len(data['DELTA_SHORT_LONG'])):
253273
# Check data validity
@@ -265,25 +285,25 @@ def mage_atomic(data, short_ma,long_ma):
265285
list_cross['type'][count] = types['REL_MAX']
266286
count += 1
267287

268-
# Gap handling: needed for gaps, where DELTA_SHORT_LONG(i-1 | i-2) = NaN
269-
elif (not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and
270-
count > 1): # Make sure we have a previous crossover
288+
# Gap handling: needed for gaps, where DELTA_SHORT_LONG(i-1 | i-2) = NaN
289+
elif (not pd.isna(data['DELTA_SHORT_LONG'].iloc[i]) and
290+
count >= 1): # Make sure we have a previous crossover
291+
292+
# R: match(list_cross$id[count-1], idx) - find index of previous crossover
293+
try:
294+
prev_cross_idx = idx.index(list_cross['id'][count-1])
295+
prev_delta = data['DELTA_SHORT_LONG'].iloc[prev_cross_idx]
271296

272-
# R: match(list_cross$id[count-1], idx) - find index of previous crossover
273-
try:
274-
prev_cross_idx = idx.index(list_cross['id'][count-1])
275-
prev_delta = data['DELTA_SHORT_LONG'].iloc[prev_cross_idx]
276-
277-
if (data['DELTA_SHORT_LONG'].iloc[i] * prev_delta < 0):
278-
list_cross['id'][count] = idx[i]
279-
if data['DELTA_SHORT_LONG'].iloc[i] < prev_delta:
280-
list_cross['type'][count] = types['REL_MIN']
281-
else:
282-
list_cross['type'][count] = types['REL_MAX']
283-
count += 1
284-
except ValueError:
285-
# Handle case where previous crossover id not found in idx
286-
pass
297+
if (data['DELTA_SHORT_LONG'].iloc[i] * prev_delta < 0):
298+
list_cross['id'][count] = idx[i]
299+
if data['DELTA_SHORT_LONG'].iloc[i] < prev_delta:
300+
list_cross['type'][count] = types['REL_MIN']
301+
else:
302+
list_cross['type'][count] = types['REL_MAX']
303+
count += 1
304+
except ValueError:
305+
# Handle case where previous crossover id not found in idx
306+
pass
287307

288308
# Add last point to capture excursion at end
289309
# R: utils::tail(idx, 1)
@@ -318,21 +338,20 @@ def mage_atomic(data, short_ma,long_ma):
318338
# Define search boundaries
319339
# R: s1 <- ifelse(i == 1, crosses[i, 1], indexes[i-1])
320340
if i == 0: # First extrema
321-
s1 = int(crosses.iloc[i, 0]) # crosses[i, 1] in R (1-indexed)
341+
s1 = int(crosses.iloc[i]['id']) # crosses[i, 1] in R (1-indexed)
322342
else:
323-
s1 = int(indexes[i-1])
343+
s1 = int(indexes[i-1]) # last minmax index
324344

325345
# R: s2 <- crosses[i+1,1]
326-
s2 = int(crosses.iloc[i+1, 0]) # crosses[i+1, 1] in R
346+
s2 = int(crosses.iloc[i+1]['id']) # crosses[i+1, 1] in R
327347

328348
# Extract glucose segment - R: .data[as.character(s1:s2), ]$gl
329-
# Convert to 0-based indexing for Python
330-
segment_start = s1 - 1 if s1 > 0 else 0
349+
segment_start = s1
331350
segment_end = s2
332-
glucose_segment = data['gl'].iloc[segment_start:segment_end]
351+
glucose_segment = data['gl'].iloc[segment_start:segment_end+1] # including next cross point
333352

334353
# Find min or max based on crossover type
335-
if crosses.iloc[i, 1] == types['REL_MIN']: # crosses[i, "type"] in R
354+
if crosses.iloc[i]['type'] == types['REL_MIN']: # crosses[i, "type"] in R
336355
# R: min(.data[as.character(s1:s2), ]$gl, na.rm = TRUE)
337356
minmax[i] = glucose_segment.min()
338357
# R: which.min(.data[as.character(s1:s2), ]$gl)+s1-1
@@ -358,13 +377,13 @@ def mage_atomic(data, short_ma,long_ma):
358377
mage_minus_heights, mage_minus_tp_pairs = calculate_mage_minus(differences, minmax, standardD)
359378

360379
if len(mage_minus_heights) == 0 and len(mage_plus_heights) == 0:
361-
return {
362-
'start': data['time'][0],
363-
'end': data['time'][-1],
364-
'MAGE': np.nan,
365-
'plus_or_minus': np.nan,
366-
'first_excursion': np.nan
367-
}
380+
return pd.DataFrame({
381+
'start': [data['time'].iloc[0]],
382+
'end': [data['time'].iloc[-1]],
383+
'MAGE': [np.nan],
384+
'plus_or_minus': [np.nan],
385+
'first_excursion': [np.nan]
386+
}, index=[0])
368387

369388
# Determine which excursion type occurs first
370389
if (len(mage_plus_heights) > 0 and
@@ -417,7 +436,8 @@ def mage_atomic(data, short_ma,long_ma):
417436
}
418437
)
419438
if version == "ma":
420-
result = mage_ma_single(data_df, short_ma, long_ma)
439+
mage_val = mage_ma_single(data_df, short_ma, long_ma, direction, return_type='num')
440+
result = pd.DataFrame({"MAGE": [mage_val]})
421441
else:
422442
result = pd.DataFrame({"MAGE": [mage_naive(data_df)]})
423443
return result
@@ -433,8 +453,11 @@ def mage_atomic(data, short_ma,long_ma):
433453
continue
434454

435455
if version == "ma":
436-
mage_val = mage_ma_single(subject_data, short_ma, long_ma, direction, return_type = "num")
437-
subject_result_dict = {"MAGE": mage_val}
456+
mage_val = mage_ma_single(subject_data, short_ma, long_ma, direction, return_type)
457+
if return_type == "df" :
458+
subject_result_dict = mage_val.to_dict()
459+
else:
460+
subject_result_dict = {"MAGE": mage_val}
438461
else:
439462
mage_val = mage_naive(subject_data)
440463
subject_result_dict = {"MAGE": mage_val}
@@ -469,13 +492,13 @@ def calculate_mage_plus(differences, minmax, standardD):
469492
continue
470493

471494
max_v = np.max(delta) # Find maximum upward movement
472-
i = np.argmax(delta) + prev_j # Index of extrema creating maximum
495+
i = int(np.argmax(delta) + prev_j) # Index of extrema creating maximum
473496

474-
if max_v >= standardD:
497+
if max_v > standardD:
475498
# Found significant upward excursion (nadir to peak > SD)
476499
k = j
477500
while k < N:
478-
if minmax[k] > minmax[j]:
501+
if minmax[k] >= minmax[j]:
479502
j = k # Continue riding the peak upward
480503

481504
# Check if excursion ends (significant drop or end of data)
@@ -522,10 +545,10 @@ def calculate_mage_minus(differences, minmax, standardD):
522545
min_v = np.min(delta) # Find maximum downward movement (most negative)
523546
i = np.argmin(delta) + prev_j # Index of extrema creating minimum
524547

525-
if min_v <= -standardD: # Found significant downward excursion
548+
if min_v < -standardD: # Found significant downward excursion
526549
k = j
527550
while k < N:
528-
if minmax[k] < minmax[j]:
551+
if minmax[k] <= minmax[j]:
529552
j = k # Continue riding the nadir downward
530553

531554
# Check if excursion ends (significant rise or end of data)
@@ -543,3 +566,41 @@ def calculate_mage_minus(differences, minmax, standardD):
543566
j += 1
544567

545568
return mage_minus_heights, mage_minus_tp_pairs
569+
570+
def segment_time_series(data, max_gap_minutes):
571+
"""
572+
Split glucose time series into segments based on large gaps
573+
Simpler approach using time differences
574+
"""
575+
# Calculate time differences
576+
577+
# Calculate time differences between consecutive non-NA glucose readings
578+
data['time_diff'] = np.nan
579+
valid_indices = data['gl'].notna()
580+
if valid_indices.any():
581+
# Get timestamps of valid readings
582+
valid_times = data.loc[valid_indices, 'time']
583+
# Calculate differences between consecutive valid readings
584+
time_diffs = valid_times.diff().dt.total_seconds() / 60 # Convert to minutes
585+
# Assign differences back to original dataframe at valid indices
586+
data.loc[valid_indices, 'time_diff'] = time_diffs
587+
588+
# Identify where gaps exceed threshold
589+
large_gaps = data['time_diff'] > max_gap_minutes
590+
591+
# Create segment labels by cumulatively summing large gaps
592+
# This creates a new segment ID each time we encounter a large gap
593+
data['segment_id'] = large_gaps.cumsum()
594+
595+
# Group by segment and return list of DataFrames
596+
segments = []
597+
for segment_id, group in data.groupby('segment_id'):
598+
# Drop the temporary columns we added
599+
group = group.drop(['time_diff', 'segment_id'], axis=1)
600+
# Drop rows with NA glucose values at the end of the segment
601+
while len(group) > 0 and pd.isna(group['gl'].iloc[-1]):
602+
group = group.iloc[:-1]
603+
segments.append(group.reset_index(drop=True))
604+
605+
return segments
606+
# Identify where gaps exceed threshold

tests/test_mage.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -78,26 +78,24 @@ def base_data():
7878
"""Fixture providing base test data for MAGE calculations"""
7979
return pd.DataFrame(
8080
{
81-
"id": [
82-
"subject1",
83-
"subject1",
84-
"subject1",
85-
"subject1",
86-
"subject2",
87-
"subject2"
88-
],
81+
"id": ["subject1"] * 8 + ["subject2"]*2,
8982
"time": pd.to_datetime(
9083
[
84+
"2020-01-01 00:00:00",
85+
"2020-01-01 00:20:00",
86+
"2020-01-01 00:40:00",
87+
"2020-01-01 01:00:00",
88+
"2020-01-01 01:20:00",
89+
"2020-01-01 01:40:00",
90+
"2020-01-01 02:00:00",
91+
"2020-01-01 02:20:00",
92+
9193
"2020-01-01 00:00:00", # 0 min
9294
"2020-01-01 00:05:00", # 5 min
93-
"2020-01-01 00:10:00", # 10 min
94-
"2020-01-01 00:15:00", # 15 min
95-
"2020-01-01 00:00:00", # subject2
96-
"2020-01-01 00:05:00", # subject2
9795

9896
]
9997
),
100-
"gl": [150, 200, 180, 160, 140, 190],
98+
"gl": [150, 200, 180, 160, 140, 190, 170, 210] + [150, 200],
10199
}
102100
)
103101

@@ -111,13 +109,13 @@ def multi_point_data():
111109
"time": pd.to_datetime(
112110
[
113111
"2020-01-01 00:00:00",
114-
"2020-01-01 00:05:00",
115-
"2020-01-01 00:10:00",
116-
"2020-01-01 00:15:00",
117112
"2020-01-01 00:20:00",
118-
"2020-01-01 00:25:00",
119-
"2020-01-01 00:30:00",
120-
"2020-01-01 00:35:00",
113+
"2020-01-01 00:40:00",
114+
"2020-01-01 01:00:00",
115+
"2020-01-01 01:20:00",
116+
"2020-01-01 01:40:00",
117+
"2020-01-01 02:00:00",
118+
"2020-01-01 02:20:00",
121119
]
122120
),
123121
"gl": [150, 200, 180, 160, 140, 190, 170, 210],
@@ -170,7 +168,7 @@ def test_mage_constant_glucose():
170168
)
171169
result = iglu.mage(single_subject)
172170
assert len(result) == 1
173-
assert result["MAGE"].iloc[0] == 0
171+
assert np.isnan(result["MAGE"].iloc[0])
174172

175173

176174
def test_mage_missing_values():

0 commit comments

Comments
 (0)