Skip to content

Commit 539b7f6

Browse files
committed
Flag ghost packages
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent d870b4f commit 539b7f6

5 files changed

Lines changed: 123 additions & 101 deletions

File tree

vulnerabilities/improvers/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from vulnerabilities.improvers import valid_versions
1111
from vulnerabilities.improvers import vulnerability_kev
1212
from vulnerabilities.improvers import vulnerability_status
13-
from vulnerabilities.pipelines import remove_ghost_packages
13+
from vulnerabilities.pipelines import flag_ghost_packages
1414

1515
IMPROVERS_REGISTRY = [
1616
valid_versions.GitHubBasicImprover,
@@ -30,7 +30,7 @@
3030
valid_versions.GithubOSVImprover,
3131
vulnerability_status.VulnerabilityStatusImprover,
3232
vulnerability_kev.VulnerabilityKevImprover,
33-
remove_ghost_packages.RemoveGhostPackagePipeline,
33+
flag_ghost_packages.FlagGhostPackagePipeline,
3434
]
3535

3636
IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from traceback import format_exc as traceback_format_exc
12+
13+
from aboutcode.pipeline import LoopProgress
14+
from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS
15+
from fetchcode.package_versions import versions
16+
from packageurl import PackageURL
17+
from univers.version_range import RANGE_CLASS_BY_SCHEMES
18+
19+
from vulnerabilities.models import Package
20+
from vulnerabilities.pipelines import VulnerableCodePipeline
21+
22+
23+
class FlagGhostPackagePipeline(VulnerableCodePipeline):
24+
"""Detect and flag packages that do not exist upstream."""
25+
26+
@classmethod
27+
def steps(cls):
28+
return (cls.flag_ghost_packages,)
29+
30+
def flag_ghost_packages(self):
31+
detect_and_flag_ghost_packages(logger=self.log)
32+
33+
34+
def detect_and_flag_ghost_packages(logger=None):
35+
"""Use fetchcode to validate the package indeed exists upstream."""
36+
interesting_packages_qs = (
37+
Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS)
38+
.filter(qualifiers="")
39+
.filter(subpath="")
40+
)
41+
42+
distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct(
43+
"type", "namespace", "name"
44+
)
45+
46+
distinct_packages_count = distinct_packages.count()
47+
package_iterator = distinct_packages.iterator(chunk_size=2000)
48+
progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger)
49+
50+
ghost_package_count = 0
51+
52+
for package in progress.iter(package_iterator):
53+
ghost_package_count += flag_ghost_package(
54+
package_dict=package,
55+
interesting_packages_qs=interesting_packages_qs,
56+
logger=logger,
57+
)
58+
59+
if logger:
60+
logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages")
61+
62+
63+
def flag_ghost_package(package_dict, interesting_packages_qs, logger=None):
64+
"""
65+
Check if all the versions of the package described by `package_dict` (type, namespace, name)
66+
are available upstream. If they are not available, update the status to 'ghost'.
67+
Otherwise, update the status to 'valid'.
68+
"""
69+
if not package_dict["type"] in RANGE_CLASS_BY_SCHEMES:
70+
return 0
71+
72+
known_versions = get_versions(**package_dict, logger=logger)
73+
if not known_versions:
74+
return 0
75+
76+
version_class = RANGE_CLASS_BY_SCHEMES[package_dict["type"]].version_class
77+
package_versions = interesting_packages_qs.filter(**package_dict).filter(status="unknown")
78+
79+
ghost_packages = 0
80+
for pkg in package_versions:
81+
if version_class(pkg.version) not in known_versions:
82+
pkg.status = "ghost"
83+
pkg.save()
84+
ghost_packages += 1
85+
86+
valid_package_versions = package_versions.exclude(status="ghost")
87+
valid_package_versions.update(status="valid")
88+
89+
return ghost_packages
90+
91+
92+
def get_versions(type, namespace, name, logger=None):
93+
"""Return set of known versions for the given package type, namespace, and name."""
94+
versionless_purl = PackageURL(type=type, namespace=namespace, name=name)
95+
version_class = RANGE_CLASS_BY_SCHEMES[type].version_class
96+
97+
try:
98+
return {version_class(v.value) for v in versions(str(versionless_purl))}
99+
except Exception as e:
100+
if logger:
101+
logger(
102+
f"Error while fetching known versions for {versionless_purl!r}: {e!r} \n {traceback_format_exc()}",
103+
level=logging.ERROR,
104+
)
105+
return

vulnerabilities/pipelines/remove_ghost_packages.py

Lines changed: 0 additions & 84 deletions
This file was deleted.

vulnerabilities/templates/package_details.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
<td class="two-col-left">
6767
<span
6868
class="has-tooltip-multiline has-tooltip-black has-tooltip-arrow has-tooltip-text-left"
69-
data-tooltip="The package url or purl is a URL string used to identify and locate a software package.">
69+
data-tooltip="The status of the package can be Malicious, Ghost, Yanked, Valid, or Unknown.">
7070
status
7171
</span>
7272
</td>

vulnerabilities/tests/pipelines/test_remove_ghost_packages.py renamed to vulnerabilities/tests/pipelines/test_flag_ghost_packages.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
from fetchcode.package_versions import PackageVersion
1616

1717
from vulnerabilities.models import Package
18-
from vulnerabilities.pipelines import remove_ghost_packages
18+
from vulnerabilities.pipelines import flag_ghost_packages
1919

2020

21-
class RemoveGhostPackagePipelineTest(TestCase):
21+
class FlagGhostPackagePipelineTest(TestCase):
2222
data = Path(__file__).parent.parent / "test_data"
2323

24-
@mock.patch("vulnerabilities.pipelines.remove_ghost_packages.versions")
25-
def test_remove_ghost_package(self, mock_fetchcode_versions):
24+
@mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions")
25+
def test_flag_ghost_package(self, mock_fetchcode_versions):
2626
Package.objects.create(type="pypi", name="foo", version="2.3.0")
2727
Package.objects.create(type="pypi", name="foo", version="3.0.0")
2828

@@ -36,17 +36,17 @@ def test_remove_ghost_package(self, mock_fetchcode_versions):
3636
"name": "foo",
3737
}
3838

39-
self.assertEqual(2, Package.objects.count())
39+
self.assertEqual(0, Package.objects.filter(status="ghost").count())
4040

41-
removed_package_count = remove_ghost_packages.remove_ghost_package(
42-
package=target_package,
41+
flagged_package_count = flag_ghost_packages.flag_ghost_package(
42+
package_dict=target_package,
4343
interesting_packages_qs=interesting_packages_qs,
4444
)
45-
self.assertEqual(1, removed_package_count)
46-
self.assertEqual(1, Package.objects.count())
45+
self.assertEqual(1, flagged_package_count)
46+
self.assertEqual(1, Package.objects.filter(status="ghost").count())
4747

48-
@mock.patch("vulnerabilities.pipelines.remove_ghost_packages.versions")
49-
def test_remove_ghost_package(self, mock_fetchcode_versions):
48+
@mock.patch("vulnerabilities.pipelines.flag_ghost_packages.versions")
49+
def test_detect_and_flag_ghost_packages(self, mock_fetchcode_versions):
5050
Package.objects.create(type="pypi", name="foo", version="2.3.0")
5151
Package.objects.create(type="pypi", name="foo", version="3.0.0")
5252
Package.objects.create(
@@ -62,10 +62,11 @@ def test_remove_ghost_package(self, mock_fetchcode_versions):
6262
]
6363

6464
self.assertEqual(3, Package.objects.count())
65+
self.assertEqual(0, Package.objects.filter(status="ghost").count())
6566

6667
buffer = io.StringIO()
67-
remove_ghost_packages.detect_and_remove_ghost_packages(logger=buffer.write)
68-
expected = "Successfully removed 1 ghost Packages"
68+
flag_ghost_packages.detect_and_flag_ghost_packages(logger=buffer.write)
69+
expected = "Successfully flagged 1 ghost Packages"
6970

7071
self.assertIn(expected, buffer.getvalue())
71-
self.assertEqual(2, Package.objects.count())
72+
self.assertEqual(1, Package.objects.filter(status="ghost").count())

0 commit comments

Comments
 (0)