66from git import Repo
77
88from vulnerabilities .importer import AdvisoryData
9- from vulnerabilities .importer import ReferenceV2
9+ from vulnerabilities .importer import AffectedPackageV2
10+ from vulnerabilities .importer import PackageCommitPatchData
1011from vulnerabilities .pipelines import VulnerableCodeBaseImporterPipelineV2
1112
1213SECURITY_PATTERNS = [
@@ -22,7 +23,7 @@ class CollectRepoFixCommitPipeline(VulnerableCodeBaseImporterPipelineV2):
2223 Pipeline to collect fix commits from any git repository.
2324 """
2425
25- pipeline_id = "repo_fix_commit "
26+ pipeline_id = "collect_fix_commit "
2627
2728 @classmethod
2829 def steps (cls ):
@@ -34,23 +35,26 @@ def steps(cls):
3435
3536 def clone (self ):
3637 """Clone the repository."""
37- self .repo_url = "https://github.com/torvalds/linux"
38- repo_path = tempfile .mkdtemp ()
38+ self .repo_url = self .inputs ["repo_url" ]
39+ if not self .repo_url :
40+ raise ValueError ("Repo is required for CollectRepoFixCommitPipeline" )
41+
42+ self .purl = self .inputs ["purl" ]
3943 self .repo = Repo .clone_from (
4044 url = self .repo_url ,
41- to_path = repo_path ,
45+ to_path = tempfile . mkdtemp () ,
4246 bare = True ,
4347 no_checkout = True ,
4448 multi_options = ["--filter=blob:none" ],
4549 )
4650
4751 def advisories_count (self ) -> int :
48- return int ( self . repo . git . rev_list ( "--count" , "HEAD" ))
52+ return 0
4953
50- def classify_commit_type (self , commit ) -> list [str ]:
54+ def extract_vulnerability_id (self , commit ) -> list [str ]:
5155 """
52- Extract vulnerability identifiers from a commit message.
53- Returns a list of matched vulnerability IDs (normalized to uppercase).
56+ Extract vulnerability id from a commit message.
57+ Returns a list of matched vulnerability IDs
5458 """
5559 matches = []
5660 for pattern in SECURITY_PATTERNS :
@@ -67,7 +71,7 @@ def collect_fix_commits(self):
6771
6872 grouped_commits = defaultdict (list )
6973 for commit in self .repo .iter_commits ("--all" ):
70- matched_ids = self .classify_commit_type (commit )
74+ matched_ids = self .extract_vulnerability_id (commit )
7175 if not matched_ids :
7276 continue
7377
@@ -87,16 +91,30 @@ def collect_advisories(self):
8791 """
8892 self .log ("Generating AdvisoryData objects from grouped commits." )
8993 grouped_commits = self .collect_fix_commits ()
90- for vuln_id , commits in grouped_commits .items ():
91- references = [ReferenceV2 (url = f"{ self .repo_url } /commit/{ cid } " ) for cid , _ in commits ]
94+ for vuln_id , commits_data in grouped_commits .items ():
95+ if not commits_data or not vuln_id :
96+ continue
9297
93- summary_lines = [f"- { cid } : { msg } " for cid , msg in commits ]
98+ summary_lines = []
99+ for c_hash , msg in commits_data :
100+ summary_lines .append (f"{ c_hash } : { msg } " )
94101 summary = f"Commits fixing { vuln_id } :\n " + "\n " .join (summary_lines )
102+
103+ commit_hash_set = {commit_hash for commit_hash , _ in commits_data }
104+ affected_packages = [
105+ AffectedPackageV2 (
106+ package = self .purl ,
107+ fixed_by_commit_patches = [
108+ PackageCommitPatchData (vcs_url = self .repo_url , commit_hash = commit_hash )
109+ for commit_hash in commit_hash_set
110+ ],
111+ )
112+ ]
113+
95114 yield AdvisoryData (
96115 advisory_id = vuln_id ,
97- aliases = [vuln_id ],
98116 summary = summary ,
99- references_v2 = references ,
117+ affected_packages = affected_packages ,
100118 url = self .repo_url ,
101119 )
102120
0 commit comments