|
| 1 | +# |
| 2 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 3 | +# VulnerableCode is a trademark of nexB Inc. |
| 4 | +# SPDX-License-Identifier: Apache-2.0 |
| 5 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 6 | +# See https://github.com/nexB/vulnerablecode for support or download. |
| 7 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 8 | +# |
| 9 | + |
| 10 | +import logging |
| 11 | +from traceback import format_exc as traceback_format_exc |
| 12 | + |
| 13 | +from aboutcode.pipeline import LoopProgress |
| 14 | +from fetchcode.package_versions import SUPPORTED_ECOSYSTEMS |
| 15 | +from fetchcode.package_versions import versions |
| 16 | +from packageurl import PackageURL |
| 17 | +from univers.version_range import RANGE_CLASS_BY_SCHEMES |
| 18 | + |
| 19 | +from vulnerabilities.models import Package |
| 20 | +from vulnerabilities.pipelines import VulnerableCodePipeline |
| 21 | + |
| 22 | + |
| 23 | +class FlagGhostPackagePipeline(VulnerableCodePipeline): |
| 24 | + """Detect and flag packages that do not exist upstream.""" |
| 25 | + |
| 26 | + @classmethod |
| 27 | + def steps(cls): |
| 28 | + return (cls.flag_ghost_packages,) |
| 29 | + |
| 30 | + def flag_ghost_packages(self): |
| 31 | + detect_and_flag_ghost_packages(logger=self.log) |
| 32 | + |
| 33 | + |
| 34 | +def detect_and_flag_ghost_packages(logger=None): |
| 35 | + """Use fetchcode to validate the package indeed exists upstream.""" |
| 36 | + interesting_packages_qs = ( |
| 37 | + Package.objects.filter(type__in=SUPPORTED_ECOSYSTEMS) |
| 38 | + .filter(qualifiers="") |
| 39 | + .filter(subpath="") |
| 40 | + ) |
| 41 | + |
| 42 | + distinct_packages = interesting_packages_qs.values("type", "namespace", "name").distinct( |
| 43 | + "type", "namespace", "name" |
| 44 | + ) |
| 45 | + |
| 46 | + distinct_packages_count = distinct_packages.count() |
| 47 | + package_iterator = distinct_packages.iterator(chunk_size=2000) |
| 48 | + progress = LoopProgress(total_iterations=distinct_packages_count, logger=logger) |
| 49 | + |
| 50 | + ghost_package_count = 0 |
| 51 | + |
| 52 | + for package in progress.iter(package_iterator): |
| 53 | + ghost_package_count += flag_ghost_package( |
| 54 | + package_dict=package, |
| 55 | + interesting_packages_qs=interesting_packages_qs, |
| 56 | + logger=logger, |
| 57 | + ) |
| 58 | + |
| 59 | + if logger: |
| 60 | + logger(f"Successfully flagged {ghost_package_count:,d} ghost Packages") |
| 61 | + |
| 62 | + |
| 63 | +def flag_ghost_package(package_dict, interesting_packages_qs, logger=None): |
| 64 | + """ |
| 65 | + Check if all the versions of the package described by `package_dict` (type, namespace, name) |
| 66 | + are available upstream. If they are not available, update the status to 'ghost'. |
| 67 | + Otherwise, update the status to 'valid'. |
| 68 | + """ |
| 69 | + if not package_dict["type"] in RANGE_CLASS_BY_SCHEMES: |
| 70 | + return 0 |
| 71 | + |
| 72 | + known_versions = get_versions(**package_dict, logger=logger) |
| 73 | + if not known_versions: |
| 74 | + return 0 |
| 75 | + |
| 76 | + version_class = RANGE_CLASS_BY_SCHEMES[package_dict["type"]].version_class |
| 77 | + package_versions = interesting_packages_qs.filter(**package_dict).filter(status="unknown") |
| 78 | + |
| 79 | + ghost_packages = 0 |
| 80 | + for pkg in package_versions: |
| 81 | + if version_class(pkg.version) not in known_versions: |
| 82 | + pkg.status = "ghost" |
| 83 | + pkg.save() |
| 84 | + ghost_packages += 1 |
| 85 | + |
| 86 | + valid_package_versions = package_versions.exclude(status="ghost") |
| 87 | + valid_package_versions.update(status="valid") |
| 88 | + |
| 89 | + return ghost_packages |
| 90 | + |
| 91 | + |
| 92 | +def get_versions(type, namespace, name, logger=None): |
| 93 | + """Return set of known versions for the given package type, namespace, and name.""" |
| 94 | + versionless_purl = PackageURL(type=type, namespace=namespace, name=name) |
| 95 | + version_class = RANGE_CLASS_BY_SCHEMES[type].version_class |
| 96 | + |
| 97 | + try: |
| 98 | + return {version_class(v.value) for v in versions(str(versionless_purl))} |
| 99 | + except Exception as e: |
| 100 | + if logger: |
| 101 | + logger( |
| 102 | + f"Error while fetching known versions for {versionless_purl!r}: {e!r} \n {traceback_format_exc()}", |
| 103 | + level=logging.ERROR, |
| 104 | + ) |
| 105 | + return |
0 commit comments