Skip to content

Commit e7e948b

Browse files
authored
Merge pull request #15 from Sankhya-AI/alpha
V2.2.2
2 parents 5e430ad + fac351d commit e7e948b

5 files changed

Lines changed: 520 additions & 27 deletions

File tree

dhee/adapters/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ def checkpoint(
264264
user_id=uid, task_type=task_type or "general",
265265
what_worked=what_worked, what_failed=what_failed,
266266
key_decision=key_decision,
267+
outcome_score=score if score is not None else None,
267268
)
268269
result["insights_created"] = len(insights)
269270

dhee/core/buddhi.py

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,13 +1002,17 @@ def reflect(
10021002
what_worked: Optional[str] = None,
10031003
what_failed: Optional[str] = None,
10041004
key_decision: Optional[str] = None,
1005+
outcome_score: Optional[float] = None,
10051006
) -> List[Insight]:
10061007
"""Agent-triggered reflection. Synthesizes insights from experience.
10071008
10081009
Called when an agent completes a task or wants to record learnings.
10091010
This is the explicit version of DGM-H's persistent memory —
10101011
the agent tells Dhee what it learned, and Dhee stores it as
10111012
transferable insight.
1013+
1014+
If outcome_score is provided, policy utility is updated using the
1015+
performance delta between the moving-average baseline and actual score.
10121016
"""
10131017
new_insights = []
10141018

@@ -1076,16 +1080,31 @@ def reflect(
10761080
except Exception:
10771081
pass
10781082

1079-
# Phase 3: Extract policy from task outcomes
1083+
# Phase 3: Extract policy from task outcomes, with utility deltas
1084+
# Compute baseline from moving average for utility scoring (D2Skill)
1085+
baseline_score = None
1086+
if outcome_score is not None:
1087+
try:
1088+
key = f"{user_id}:{task_type}"
1089+
records = self._performance.get(key, [])
1090+
if len(records) >= 2:
1091+
recent = records[-min(10, len(records)):]
1092+
baseline_score = sum(r["score"] for r in recent) / len(recent)
1093+
except Exception:
1094+
pass
1095+
10801096
if what_worked:
10811097
try:
10821098
p_store = self._get_policy_store()
1083-
# Record success for any matching active policies
10841099
matched = p_store.match_policies(user_id, task_type, f"{task_type} task")
10851100
for policy in matched:
1086-
p_store.record_outcome(policy.id, success=True)
1101+
p_store.record_outcome(
1102+
policy.id,
1103+
success=True,
1104+
baseline_score=baseline_score,
1105+
actual_score=outcome_score,
1106+
)
10871107

1088-
# If we have enough task history, try to extract a new policy
10891108
ts_store = self._get_task_state_store()
10901109
completed = ts_store.get_tasks_by_type(user_id, task_type, limit=10)
10911110
if len(completed) >= 3:
@@ -1099,7 +1118,12 @@ def reflect(
10991118
p_store = self._get_policy_store()
11001119
matched = p_store.match_policies(user_id, task_type, f"{task_type} task")
11011120
for policy in matched:
1102-
p_store.record_outcome(policy.id, success=False)
1121+
p_store.record_outcome(
1122+
policy.id,
1123+
success=False,
1124+
baseline_score=baseline_score,
1125+
actual_score=outcome_score,
1126+
)
11031127
except Exception:
11041128
pass
11051129

0 commit comments

Comments
 (0)