Skip to content

Commit 9d4f120

Browse files
committed
Improved efficiency and introduced a smart ratio - Version 1.4
1 parent d8666b9 commit 9d4f120

23 files changed

Lines changed: 532 additions & 135 deletions

diffrays/__init__.py

100644100755
File mode changed.

diffrays/analyzer.py

100644100755
Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
#!/usr/bin/env python3
22

33
import re
4+
import time
45
import ida_domain
56
from ida_domain.database import IdaCommandOptions
67
from ida_domain.names import DemangleFlags, SetNameFlags
7-
from diffrays.database import insert_function, insert_function_with_meta, compress_pseudo, init_db, upsert_binary_metadata
8+
from diffrays.database import insert_function, insert_function_with_meta, compress_pseudo, init_db, upsert_binary_metadata, compute_and_store_diffs
89
from diffrays.explorer import explore_database
910
from diffrays.log import log
1011

@@ -64,6 +65,7 @@ def analyze_binary(db_path: str, version: str, debug: bool = False):
6465

6566
def run_diff(old_path, new_path, db_path):
6667
print("[+] Analyzing the binaries using IDA PRO!")
68+
start_ts = time.perf_counter()
6769
conn = init_db(db_path)
6870
try:
6971
# Explore and save OLD metadata
@@ -146,6 +148,14 @@ def run_diff(old_path, new_path, db_path):
146148
log.info(f"Decompiled {new_count} functions from new binary")
147149
log.info(f"Total functions processed: {old_count + new_count}")
148150

151+
# After exporting functions, compute and store diffs to a dedicated table
152+
try:
153+
log.info("Computing diffs and populating diff_results table ...")
154+
compute_and_store_diffs(conn)
155+
log.info("Diff computation completed")
156+
except Exception as e:
157+
log.error(f"Failed to compute/store diffs: {e}")
158+
149159
except Exception as e:
150160
log.error(f"Critical error: {e}")
151161
import traceback
@@ -154,4 +164,14 @@ def run_diff(old_path, new_path, db_path):
154164
finally:
155165
conn.close()
156166
print()
157-
print(f"[+] Database written to {db_path}")
167+
print(f"[+] Database written to {db_path}")
168+
elapsed = time.perf_counter() - start_ts
169+
hours, remainder = divmod(int(elapsed), 3600)
170+
minutes, seconds = divmod(remainder, 60)
171+
172+
if hours > 0:
173+
print(f"[+] Time taken: {hours}h {minutes}m {seconds}s")
174+
elif minutes > 0:
175+
print(f"[+] Time taken: {minutes}m {seconds}s")
176+
else:
177+
print(f"[+] Time taken: {seconds}s")

diffrays/cli.py

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
| |/ /| | | | | | |\ \ (_| | |_| \__ \
1616
|___/ |_|_| |_| \_| \_\__,_|\__, |___/
1717
__/ |
18-
|___/ v1.1 Lambda
18+
|___/ v1.4 Xi
1919
"""
2020

2121
def generate_db_name(old_path: str, new_path: str) -> str:

diffrays/database.py

100644100755
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,23 @@
2727
metadata_blob BLOB NOT NULL,
2828
UNIQUE(binary_version)
2929
);
30+
31+
CREATE TABLE IF NOT EXISTS diff_results (
32+
id INTEGER PRIMARY KEY AUTOINCREMENT,
33+
function_name TEXT NOT NULL,
34+
old_pseudocode BLOB NOT NULL,
35+
new_pseudocode BLOB NOT NULL,
36+
old_address INTEGER,
37+
new_address INTEGER,
38+
old_blocks INTEGER,
39+
new_blocks INTEGER,
40+
old_signature TEXT,
41+
new_signature TEXT,
42+
ratio REAL,
43+
smart_ratio REAL,
44+
modification_level TEXT,
45+
UNIQUE(function_name)
46+
);
3047
"""
3148

3249
def compress_pseudo(pseudo_lines: list[str]) -> bytes:
@@ -55,6 +72,17 @@ def init_db(db_path: str):
5572
conn.execute(stmt)
5673
except Exception as e:
5774
log.warning(f"Migration step failed: {stmt}: {e}")
75+
# Ensure diff_results table exists (older DBs won't have it)
76+
conn.execute("CREATE TABLE IF NOT EXISTS diff_results (\n id INTEGER PRIMARY KEY AUTOINCREMENT,\n function_name TEXT NOT NULL,\n old_pseudocode BLOB NOT NULL,\n new_pseudocode BLOB NOT NULL,\n old_address INTEGER,\n new_address INTEGER,\n old_blocks INTEGER,\n new_blocks INTEGER,\n old_signature TEXT,\n new_signature TEXT,\n ratio REAL,\n smart_ratio REAL,\n modification_level TEXT,\n UNIQUE(function_name)\n)")
77+
# Add modification_level column if it doesn't exist (migration)
78+
try:
79+
# Check if modification_level column exists
80+
cols = {r[1] for r in conn.execute("PRAGMA table_info(diff_results)").fetchall()}
81+
if "modification_level" not in cols:
82+
conn.execute("ALTER TABLE diff_results ADD COLUMN modification_level TEXT")
83+
log.info("Added modification_level column to diff_results table")
84+
except Exception as e:
85+
log.warning(f"Could not add modification_level column: {e}")
5886
except Exception as e:
5987
log.warning(f"Could not run PRAGMA table_info migration checks: {e}")
6088
conn.commit()
@@ -104,3 +132,141 @@ def upsert_binary_metadata(conn, version: str, address_min: int, address_max: in
104132
(version, address_min, address_max, function_count, metadata_blob),
105133
)
106134
conn.commit()
135+
136+
137+
def _safe_ratio(a: str | None, b: str | None) -> float:
138+
try:
139+
import difflib
140+
if not a or not b:
141+
return 0.0
142+
if a == b:
143+
return 1.0
144+
return difflib.SequenceMatcher(None, a, b).ratio()
145+
except Exception:
146+
return 0.0
147+
148+
149+
def _compute_smart_ratio(text_old: str | None, text_new: str | None, blocks_old: int | None, blocks_new: int | None) -> float:
150+
try:
151+
base_sim = _safe_ratio(text_old, text_new)
152+
if blocks_old is None or blocks_new is None:
153+
return 1.0 - base_sim
154+
if blocks_old == 0 or blocks_new == 0:
155+
return 1.0 - base_sim
156+
157+
delta_blocks = abs(blocks_old - blocks_new)
158+
159+
if delta_blocks == 0:
160+
change_score = (1.0 - base_sim) * 0.05 # Very low for no block changes
161+
else:
162+
# Use absolute block delta as primary score
163+
block_score = delta_blocks / 50.0 # Scale down for readability
164+
text_score = (1.0 - base_sim) * 0.2
165+
change_score = block_score + text_score
166+
167+
return change_score
168+
except Exception:
169+
return 0.0
170+
171+
172+
def _determine_modification_level(score: float) -> str:
173+
"""Categorize the modification level based on score"""
174+
if score == 0.0:
175+
return "unchanged"
176+
elif score < 0.1:
177+
return "minor"
178+
elif score < 0.3:
179+
return "moderate"
180+
elif score < 0.6:
181+
return "significant"
182+
else:
183+
return "major"
184+
185+
186+
def compute_and_store_diffs(conn: sqlite3.Connection):
187+
"""
188+
Populate diff_results with pairs that exist in both old and new and differ.
189+
Leaves unmatched and unchanged entries in the original functions table.
190+
"""
191+
# Find candidate names with both versions
192+
cursor = conn.execute(
193+
"""
194+
SELECT f_old.function_name
195+
FROM functions AS f_old
196+
INNER JOIN functions AS f_new
197+
ON f_new.function_name = f_old.function_name
198+
AND f_new.binary_version = 'new'
199+
WHERE f_old.binary_version = 'old'
200+
"""
201+
)
202+
names = [r[0] for r in cursor.fetchall()]
203+
print("\n[+] Preparing diff computation …")
204+
print(f"[+] Total matched functions: {len(names)}")
205+
if not names:
206+
print("[+] No matched functions found. Skipping diff computation.")
207+
return
208+
209+
inserted_names: list[str] = []
210+
for name in names:
211+
# Fetch both rows parameterized
212+
old_row = conn.execute(
213+
"SELECT pseudocode, address, blocks, signature FROM functions WHERE function_name = ? AND binary_version = 'old'",
214+
(name,),
215+
).fetchone()
216+
new_row = conn.execute(
217+
"SELECT pseudocode, address, blocks, signature FROM functions WHERE function_name = ? AND binary_version = 'new'",
218+
(name,),
219+
).fetchone()
220+
if not old_row or not new_row:
221+
continue
222+
try:
223+
text_old = decompress_pseudo(old_row[0]) if old_row[0] is not None else None
224+
text_new = decompress_pseudo(new_row[0]) if new_row[0] is not None else None
225+
except Exception:
226+
# Skip corrupt entries
227+
continue
228+
# Only store when contents differ
229+
if not text_old or not text_new or text_old == text_new:
230+
continue
231+
ratio = _safe_ratio(text_old, text_new)
232+
smart = _compute_smart_ratio(text_old, text_new, old_row[2], new_row[2])
233+
modification_score = 1.0 - ratio
234+
level = _determine_modification_level(modification_score)
235+
try:
236+
conn.execute(
237+
"""
238+
INSERT OR IGNORE INTO diff_results (
239+
function_name,
240+
old_pseudocode, new_pseudocode,
241+
old_address, new_address,
242+
old_blocks, new_blocks,
243+
old_signature, new_signature,
244+
ratio, smart_ratio, modification_level
245+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
246+
""",
247+
(
248+
name,
249+
old_row[0], new_row[0],
250+
old_row[1], new_row[1],
251+
old_row[2], new_row[2],
252+
old_row[3], new_row[3],
253+
ratio, smart, level,
254+
),
255+
)
256+
inserted_names.append(name)
257+
except Exception as e:
258+
log.warning(f"Failed inserting diff_results for {name}: {e}")
259+
conn.commit()
260+
261+
print(f"[+] Diff computation completed, found {len(inserted_names)} functions as changed")
262+
263+
if inserted_names:
264+
# Delete matched rows from functions (leave unmatched and unchanged)
265+
try:
266+
conn.executemany(
267+
"DELETE FROM functions WHERE function_name = ?",
268+
[(n,) for n in inserted_names],
269+
)
270+
conn.commit()
271+
except Exception as e:
272+
log.warning(f"Failed to prune matched rows from functions: {e}")

diffrays/explorer.py

100644100755
File mode changed.

diffrays/log.py

100644100755
File mode changed.

0 commit comments

Comments
 (0)