@@ -1002,13 +1002,17 @@ def reflect(
10021002 what_worked : Optional [str ] = None ,
10031003 what_failed : Optional [str ] = None ,
10041004 key_decision : Optional [str ] = None ,
1005+ outcome_score : Optional [float ] = None ,
10051006 ) -> List [Insight ]:
10061007 """Agent-triggered reflection. Synthesizes insights from experience.
10071008
10081009 Called when an agent completes a task or wants to record learnings.
10091010 This is the explicit version of DGM-H's persistent memory —
10101011 the agent tells Dhee what it learned, and Dhee stores it as
10111012 transferable insight.
1013+
1014+ If outcome_score is provided, policy utility is updated using the
1015+ performance delta between the moving-average baseline and actual score.
10121016 """
10131017 new_insights = []
10141018
@@ -1076,16 +1080,31 @@ def reflect(
10761080 except Exception :
10771081 pass
10781082
1079- # Phase 3: Extract policy from task outcomes
1083+ # Phase 3: Extract policy from task outcomes, with utility deltas
1084+ # Compute baseline from moving average for utility scoring (D2Skill)
1085+ baseline_score = None
1086+ if outcome_score is not None :
1087+ try :
1088+ key = f"{ user_id } :{ task_type } "
1089+ records = self ._performance .get (key , [])
1090+ if len (records ) >= 2 :
1091+ recent = records [- min (10 , len (records )):]
1092+ baseline_score = sum (r ["score" ] for r in recent ) / len (recent )
1093+ except Exception :
1094+ pass
1095+
10801096 if what_worked :
10811097 try :
10821098 p_store = self ._get_policy_store ()
1083- # Record success for any matching active policies
10841099 matched = p_store .match_policies (user_id , task_type , f"{ task_type } task" )
10851100 for policy in matched :
1086- p_store .record_outcome (policy .id , success = True )
1101+ p_store .record_outcome (
1102+ policy .id ,
1103+ success = True ,
1104+ baseline_score = baseline_score ,
1105+ actual_score = outcome_score ,
1106+ )
10871107
1088- # If we have enough task history, try to extract a new policy
10891108 ts_store = self ._get_task_state_store ()
10901109 completed = ts_store .get_tasks_by_type (user_id , task_type , limit = 10 )
10911110 if len (completed ) >= 3 :
@@ -1099,7 +1118,12 @@ def reflect(
10991118 p_store = self ._get_policy_store ()
11001119 matched = p_store .match_policies (user_id , task_type , f"{ task_type } task" )
11011120 for policy in matched :
1102- p_store .record_outcome (policy .id , success = False )
1121+ p_store .record_outcome (
1122+ policy .id ,
1123+ success = False ,
1124+ baseline_score = baseline_score ,
1125+ actual_score = outcome_score ,
1126+ )
11031127 except Exception :
11041128 pass
11051129
0 commit comments