Skip to content

Commit 4c168a8

Browse files
committed
Extend purl2url and url2purl support for parsing more Git host providers
Signed-off-by: ziad hany <ziadhany2016@gmail.com>
1 parent a13141e commit 4c168a8

4 files changed

Lines changed: 322 additions & 1 deletion

File tree

src/packageurl/contrib/purl2url.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
# Visit https://github.com/package-url/packageurl-python for support and
2525
# download.
2626

27+
import re
28+
2729
from packageurl import PackageURL
2830
from packageurl.contrib.route import NoRouteAvailable
2931
from packageurl.contrib.route import Router
@@ -172,6 +174,60 @@ def build_gitlab_repo_url(purl):
172174
return f"https://gitlab.com/{namespace}/{name}"
173175

174176

177+
GIT_REPO_GENERIC = {
178+
# cgit
179+
(
180+
r"git\.kernel\.org",
181+
r"gitweb\.gentoo\.org",
182+
): "https://{namespace}/{name}.git/commit/?id={version}",
183+
# gitiles
184+
(
185+
r"android\.googlesource\.com",
186+
r"aomedia\.googlesource\.com",
187+
r"chromium\.googlesource\.com",
188+
): "https://{namespace}/{name}/+/{version}",
189+
# allura
190+
(r"sourceforge\.net", r"forge-allura\.apache\.org"): "https://{namespace}/{name}/ci/{version}",
191+
# gitweb
192+
(
193+
r"gcc\.gnu\.org",
194+
r"git\.postgresql\.org",
195+
): "https://{namespace}/?p={name}.git;a=commit;h={version}",
196+
# gitea / forgejo
197+
(
198+
r"codeberg\.org",
199+
r"gitea\.com",
200+
): "https://{namespace}/{name}/commit/{version}",
201+
# sub gitlab ( excludes gitlab.com )
202+
(
203+
r"git\.codelinaro\.org",
204+
r"gitlab\.(?!com\b)[^/]+",
205+
): "https://{namespace}/{name}/-/commit/{version}",
206+
}
207+
208+
@repo_router.route("pkg:generic/.*")
209+
def build_generic_repo_url(purl):
210+
"""
211+
Return a Commit URL from the `purl` string.
212+
"""
213+
purl_data = PackageURL.from_string(purl)
214+
name = purl_data.name
215+
namespace = purl_data.namespace
216+
version = purl_data.version
217+
218+
if not (namespace and name and version):
219+
return
220+
221+
for patterns, template_url in GIT_REPO_GENERIC.items():
222+
for pattern in patterns:
223+
if not re.match(pattern, namespace):
224+
continue
225+
226+
return template_url.format(namespace=namespace, name=name, version=version)
227+
228+
return None
229+
230+
175231
@repo_router.route("pkg:(gem|rubygems)/.*")
176232
def build_rubygems_repo_url(purl):
177233
"""

src/packageurl/contrib/url2purl.py

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,233 @@ def build_bitbucket_purl(url):
667667
)
668668

669669

670+
def build_route_regex(domain_patterns, path_suffix="/.*"):
671+
"""
672+
Build a route regex from a list of domains
673+
"""
674+
domain_pattern = "|".join(domain_patterns)
675+
return rf"https?://({domain_pattern}){path_suffix}"
676+
677+
678+
SUB_GITLAB_DOMAINS = [r"git\.codelinaro\.org", r"gitlab\.(?!com\b)[^/]+"]
679+
SUB_GITLAB_ROUTE_REGEX = build_route_regex(SUB_GITLAB_DOMAINS)
680+
681+
682+
@purl_router.route(SUB_GITLAB_ROUTE_REGEX)
683+
def build_gitlab_sub_purl(url):
684+
"""
685+
Return a PackageURL object from a GitLab Sub domains commit URL
686+
For example:
687+
https://git.codelinaro.org/linaro/qcom/project/-/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
688+
"""
689+
690+
gitlab_sub_commit_pattern = (
691+
r"^https?://"
692+
r"(?P<domain>[^/]+)/"
693+
r"(?P<namespace>.+?)/(?P<name>(?!-/)[^/]+)/(?:-/)?commit/(?P<version>[0-9a-fA-F]{7,64})/?$"
694+
)
695+
696+
commit_match = re.search(gitlab_sub_commit_pattern, url)
697+
if commit_match:
698+
domain = commit_match.group("domain")
699+
raw_namespace = commit_match.group("namespace").strip("/")
700+
namespace = f"{domain}/{raw_namespace}"
701+
702+
return PackageURL(
703+
type="generic",
704+
namespace=namespace,
705+
name=commit_match.group("name"),
706+
version=commit_match.group("version"),
707+
)
708+
709+
return None
710+
711+
712+
GITEA_DOMAINS = ["codeberg\.org", "gitea\.com"]
713+
GITEA_ROUTE_REGEX = build_route_regex(GITEA_DOMAINS)
714+
715+
716+
@purl_router.route(GITEA_ROUTE_REGEX)
717+
def build_gitea_purl(url):
718+
"""
719+
Return a PackageURL object from a gitea/forgejo url
720+
For example:
721+
https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831
722+
https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c
723+
"""
724+
725+
gitea_commit_pattern = (
726+
r"^https?://"
727+
r"(?P<domain>[^/]+)/"
728+
r"(?P<namespace>[^/]+)/(?P<name>[^/]+)/commit/(?P<version>[0-9a-fA-F]{7,64})/?$"
729+
)
730+
731+
commit_match = re.search(gitea_commit_pattern, url)
732+
if commit_match:
733+
domain = commit_match.group("domain")
734+
namespace = f"{domain}/{commit_match.group('namespace')}"
735+
736+
return PackageURL(
737+
type="generic",
738+
namespace=namespace,
739+
name=commit_match.group("name"),
740+
version=commit_match.group("version"),
741+
)
742+
743+
return None
744+
745+
746+
CGIT_DOMAINS = [r"git\.kernel\.org", r"gitweb\.gentoo\.org", "cgit\.git\.savannah\.gnu\.org"]
747+
CGIT_ROUTE_REGEX = build_route_regex(CGIT_DOMAINS)
748+
749+
750+
@purl_router.route(CGIT_ROUTE_REGEX)
751+
def build_cgit_purl(url):
752+
"""
753+
Return a PackageURL object from a cgit url
754+
For example:
755+
https://git.kernel.org/pub/scm/bluetooth/bluez.git/commit/?id=74770b1fd2be612f9c2cf807db81fcdcc35e6560
756+
https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d
757+
https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825
758+
"""
759+
760+
cgit_project_pattern = (
761+
r"^https?://"
762+
r"(?P<domain>[^/]+)/"
763+
r"(?P<namespace>.+)/"
764+
r"(?P<name>[^/]+?)"
765+
r"(?:\.git)?"
766+
r"/commit/\?id="
767+
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
768+
)
769+
770+
commit_match = re.search(cgit_project_pattern, url)
771+
if commit_match:
772+
domain = commit_match.group("domain")
773+
namespace = f"{domain}/{commit_match.group('namespace')}"
774+
return PackageURL(
775+
type="generic",
776+
namespace=namespace,
777+
name=commit_match.group("name"),
778+
version=commit_match.group("version"),
779+
qualifiers={},
780+
subpath="",
781+
)
782+
783+
784+
GITILES_DOMAINS = [
785+
r"android\.googlesource\.com",
786+
r"aomedia\.googlesource\.com",
787+
r"chromium\.googlesource\.com",
788+
]
789+
GITILES_ROUTE_REGEX = build_route_regex(GITILES_DOMAINS)
790+
791+
792+
@purl_router.route(GITILES_ROUTE_REGEX)
793+
def build_gitiles_purl(url):
794+
"""
795+
Return a PackageURL object from Gitiles url
796+
For example:
797+
https://android.googlesource.com/platform/packages/apps/Settings/+/2968ccc911956fa5813a9a6a5e5c8970e383a60f
798+
https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3
799+
"""
800+
801+
gitiles_project_pattern = (
802+
r"^https?://"
803+
r"(?P<domain>[^/]+)/"
804+
r"(?:(?P<namespace>.+)/)?"
805+
r"(?P<name>[^/]+?)"
806+
r"/\+/"
807+
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
808+
)
809+
810+
match = re.search(gitiles_project_pattern, url)
811+
if match:
812+
raw_namespace = match.group("namespace")
813+
domain = match.group("domain")
814+
namespace = f"{domain}/{raw_namespace}" if raw_namespace else domain
815+
return PackageURL(
816+
type="generic",
817+
namespace=namespace,
818+
name=match.group("name"),
819+
version=match.group("version"),
820+
qualifiers={},
821+
subpath="",
822+
)
823+
824+
825+
ALLURA_DOMAINS = [r"sourceforge\.net", r"forge-allura\.apache\.org"]
826+
ALLURA_ROUTE_REGEX = build_route_regex(ALLURA_DOMAINS, "/p/.*")
827+
828+
829+
@purl_router.route(ALLURA_ROUTE_REGEX)
830+
def build_allura_purl(url):
831+
"""
832+
Return a PackageURL object from an Apache Allura url (e.g., SourceForge).
833+
For example:
834+
https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d
835+
https://sourceforge.net/p/infrarecorder/code/ci/9361b6f267e7b1c1576c48f6dac6dec18d8a93e0/
836+
https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946/
837+
"""
838+
839+
allura_pattern = (
840+
r"^https?://"
841+
r"(?P<domain>[^/]+)"
842+
r"(?P<namespace>.+)/"
843+
r"(?P<name>[^/]+?)"
844+
r"/ci/"
845+
r"(?P<version>[0-9a-fA-F]{7,64})/?$"
846+
)
847+
848+
commit_match = re.search(allura_pattern, url)
849+
if commit_match:
850+
domain = commit_match.group("domain")
851+
namespace = f"{domain}/{commit_match.group('namespace')}"
852+
return PackageURL(
853+
type="generic",
854+
namespace=namespace,
855+
name=commit_match.group("name"),
856+
version=commit_match.group("version"),
857+
qualifiers={},
858+
subpath="",
859+
)
860+
861+
862+
GITWEB_DOMAINS = [r"gcc\.gnu\.org/git", r"git\.postgresql\.org/gitweb"]
863+
GITWEB_ROUTE_REGEX = build_route_regex(GITWEB_DOMAINS)
864+
865+
866+
@purl_router.route(GITWEB_ROUTE_REGEX)
867+
def build_gitweb_purl(url):
868+
"""
869+
Return a PackageURL object from a Gitweb url.
870+
For example:
871+
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339
872+
https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2
873+
"""
874+
875+
gitweb_pattern = (
876+
r"^https?://"
877+
r"(?P<namespace>[^?]+?)"
878+
r"/?(?=\?)"
879+
r"(?=.*[?;&]p=(?P<name>[^;&]+?)(?:\.git)?(?:[;&]|$))"
880+
r"(?=.*[?;&]h=(?P<version>[0-9a-fA-F]{7,64}))"
881+
)
882+
883+
commit_match = re.search(gitweb_pattern, url)
884+
if commit_match:
885+
namespace = commit_match.group("namespace")
886+
name = commit_match.group("name")
887+
return PackageURL(
888+
type="generic",
889+
namespace=namespace,
890+
name=name,
891+
version=commit_match.group("version"),
892+
qualifiers={},
893+
subpath="",
894+
)
895+
896+
670897
@purl_router.route("https?://gitlab\\.com/(?!.*/archive/).*")
671898
def build_gitlab_purl(url):
672899
"""

tests/contrib/data/url2purl.json

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -277,5 +277,27 @@
277277
"https://packagemanager.rstudio.com/cran/2022-06-23/src/contrib/curl_4.3.2.tar.gz": "pkg:cran/curl@4.3.2?download_url=https://packagemanager.rstudio.com/cran/2022-06-23/src/contrib/curl_4.3.2.tar.gz",
278278
"https://github.com/TG1999/first_repo/commit/98e516011d6e096e25247b82fc5f196bbeecff10": "pkg:github/tg1999/first_repo@98e516011d6e096e25247b82fc5f196bbeecff10",
279279
"https://gitlab.com/TG1999/first_repo/-/commit/bf04e5f289885cf2f20a92b387bcc6df33e30809": "pkg:gitlab/tg1999/first_repo@bf04e5f289885cf2f20a92b387bcc6df33e30809",
280-
"https://bitbucket.org/TG1999/first_repo/commits/16a60c4a74ef477cd8c16ca82442eaab2fbe8c86": "pkg:bitbucket/tg1999/first_repo@16a60c4a74ef477cd8c16ca82442eaab2fbe8c86"
280+
281+
"https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm/-/commit/a5f07894058c4198f61e533d727b343c5be879b0": "pkg:generic/git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm@a5f07894058c4198f61e533d727b343c5be879b0",
282+
"https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365": "pkg:generic/gitlab.gnome.org/GNOME/gimp@112a5e038f0646eae5ae314988ec074433d2b365",
283+
"https://gitlab.freedesktop.org/poppler/poppler/-/commit/8677500399fc2548fa816b619580c2c07915a98c": "pkg:generic/gitlab.freedesktop.org/poppler/poppler@8677500399fc2548fa816b619580c2c07915a98c",
284+
"https://gitea.com/htc47/entur/commit/271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c": "pkg:generic/gitea.com/htc47/entur@271b852cfb761a1fe257aa0f0a12ff38bd8bfd1c",
285+
"https://codeberg.org/alpinelinux/aports/commit/a40a9732c840e5a324fba78b0ff7980b497c3831": "pkg:generic/codeberg.org/alpinelinux/aports@a40a9732c840e5a324fba78b0ff7980b497c3831",
286+
287+
"https://git.kernel.org/pub/scm/utils/b4/b4.git/commit/?id=477734000555ffc24bf873952e40367deee26f17": "pkg:generic/git.kernel.org/pub/scm/utils/b4/b4@477734000555ffc24bf873952e40367deee26f17",
288+
"https://cgit.git.savannah.gnu.org/cgit/uddf.git/commit/?id=98c41e131dc952aee43d4ec392b80ca4c426be8d": "pkg:generic/cgit.git.savannah.gnu.org/cgit/uddf@98c41e131dc952aee43d4ec392b80ca4c426be8d",
289+
"https://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git/commit/?id=7457fe9541b5162f285454947448d553a5d5a531": "pkg:generic/git.kernel.org/pub/scm/virt/kvm/mst/qemu@7457fe9541b5162f285454947448d553a5d5a531",
290+
291+
"https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825": "pkg:generic/gitweb.gentoo.org/dev/darkside@8d4b0836f3b6ab7075212926d9aad0b50246d825",
292+
"https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=f73ae47c5e48010f504f3f55567152258f3013ae": "pkg:generic/gitweb.gentoo.org/repo/gentoo@f73ae47c5e48010f504f3f55567152258f3013ae",
293+
"https://android.googlesource.com/platform/frameworks/base/+/b4da73a935a8c906ff5df562155824d63ac849ab": "pkg:generic/android.googlesource.com/platform/frameworks/base@b4da73a935a8c906ff5df562155824d63ac849ab",
294+
"https://android.googlesource.com/device/generic/vulkan-cereal/+/240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0": "pkg:generic/android.googlesource.com/device/generic/vulkan-cereal@240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0",
295+
"https://chromium.googlesource.com/aosp/platform/external/dbus-binding-generator/+/7574c671c7c64aab957dc507fffff3c8c38dc7cb": "pkg:generic/chromium.googlesource.com/aosp/platform/external/dbus-binding-generator@7574c671c7c64aab957dc507fffff3c8c38dc7cb",
296+
"https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3": "pkg:generic/aomedia.googlesource.com/libavifinfo@43716e9c34d3389b4882fbd1a81c04543ed04fe3",
297+
298+
"https://sourceforge.net/p/djvu/djvulibre-git/ci/e15d51510048927f172f1bf1f27ede65907d940d": "pkg:generic/sourceforge.net/p/djvu/djvulibre-git@e15d51510048927f172f1bf1f27ede65907d940d",
299+
"https://sourceforge.net/p/expat/code_git/ci/f0bec73b018caa07d3e75ec8dd967f3785d71bde": "pkg:generic/sourceforge.net/p/expat/code_git@f0bec73b018caa07d3e75ec8dd967f3785d71bde",
300+
"https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946": "pkg:generic/forge-allura.apache.org/p/allura/git@674e070e5ca7db7c75cf61d8efd2a3e3e49bd946",
301+
"https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339": "pkg:generic/gcc.gnu.org/git/gcc@82cc94e5fb69d1c45a386f83798251de5bff9339",
302+
"https://git.postgresql.org/gitweb/?p=hamn.git;a=commit;h=a796b71a5b3fe7f751f1086a08cb114b9877dea2": "pkg:generic/git.postgresql.org/gitweb/hamn@a796b71a5b3fe7f751f1086a08cb114b9877dea2"
281303
}

tests/contrib/test_purl2url.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,22 @@ def test_purl2url_get_repo_url():
7070
"pkg:cocoapods/MapsIndoors@3.24.0": "https://cocoapods.org/pods/MapsIndoors",
7171
"pkg:maven/org.apache.commons/commons-io@1.3.2": "https://repo.maven.apache.org/maven2/org/apache/commons/commons-io/1.3.2",
7272
"pkg:maven/org.apache.commons/commons-io@1.3.2?repository_url=https://repo1.maven.org/maven2": "https://repo1.maven.org/maven2/org/apache/commons/commons-io/1.3.2",
73+
"pkg:generic/git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm@a5f07894058c4198f61e533d727b343c5be879b0": "https://git.codelinaro.org/clo/qsdk/oss/kernel/linux-msm/-/commit/a5f07894058c4198f61e533d727b343c5be879b0",
74+
"pkg:generic/gitlab.gnome.org/GNOME/gimp@112a5e038f0646eae5ae314988ec074433d2b365": "https://gitlab.gnome.org/GNOME/gimp/-/commit/112a5e038f0646eae5ae314988ec074433d2b365",
75+
"pkg:generic/gitlab.freedesktop.org/poppler/poppler@8677500399fc2548fa816b619580c2c07915a98c": "https://gitlab.freedesktop.org/poppler/poppler/-/commit/8677500399fc2548fa816b619580c2c07915a98c",
76+
"pkg:generic/git.kernel.org/pub/scm/libs/liba2i/liba2i@4fc8196d7811c26abefaf3a3ae6b5c67c4c9cbc9": "https://git.kernel.org/pub/scm/libs/liba2i/liba2i.git/commit/?id=4fc8196d7811c26abefaf3a3ae6b5c67c4c9cbc9",
77+
"pkg:generic/git.kernel.org/pub/scm/linux/kernel/git/a.hindborg/configfs@bc3372351d0c8b2726b7d4229b878342e3e6b0e8": "https://git.kernel.org/pub/scm/linux/kernel/git/a.hindborg/configfs.git/commit/?id=bc3372351d0c8b2726b7d4229b878342e3e6b0e8",
78+
"pkg:generic/gitweb.gentoo.org/dev/darkside@8d4b0836f3b6ab7075212926d9aad0b50246d825": "https://gitweb.gentoo.org/dev/darkside.git/commit/?id=8d4b0836f3b6ab7075212926d9aad0b50246d825",
79+
"pkg:generic/gitweb.gentoo.org/repo/gentoo@f73ae47c5e48010f504f3f55567152258f3013ae": "https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=f73ae47c5e48010f504f3f55567152258f3013ae",
80+
"pkg:generic/android.googlesource.com/accessories/manifest@9ad7ef740dc39834a88bf95c69f35f18b8f45543": "https://android.googlesource.com/accessories/manifest/+/9ad7ef740dc39834a88bf95c69f35f18b8f45543",
81+
"pkg:generic/aomedia.googlesource.com/libavifinfo@43716e9c34d3389b4882fbd1a81c04543ed04fe3": "https://aomedia.googlesource.com/libavifinfo/+/43716e9c34d3389b4882fbd1a81c04543ed04fe3",
82+
"pkg:generic/chromium.googlesource.com/infra/infra@9ad7ef740dc39834a88bf95c69f35f18b8f45543": "https://chromium.googlesource.com/infra/infra/+/9ad7ef740dc39834a88bf95c69f35f18b8f45543",
83+
"pkg:generic/android.googlesource.com/device/generic/vulkan-cereal@240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0": "https://android.googlesource.com/device/generic/vulkan-cereal/+/240dedcb0fa917b3d2dcc4a9d4c332697c5e48a0",
84+
"pkg:generic/sourceforge.net/p/infrarecorder/code@8fab704119ff23691f075f6a281521b6c7d7e55f": "https://sourceforge.net/p/infrarecorder/code/ci/8fab704119ff23691f075f6a281521b6c7d7e55f",
85+
"pkg:generic/sourceforge.net/p/expat/code_git@f0bec73b018caa07d3e75ec8dd967f3785d71bde": "https://sourceforge.net/p/expat/code_git/ci/f0bec73b018caa07d3e75ec8dd967f3785d71bde",
86+
"pkg:generic/forge-allura.apache.org/p/allura/git@674e070e5ca7db7c75cf61d8efd2a3e3e49bd946": "https://forge-allura.apache.org/p/allura/git/ci/674e070e5ca7db7c75cf61d8efd2a3e3e49bd946",
87+
"pkg:generic/gcc.gnu.org/git/gcc@82cc94e5fb69d1c45a386f83798251de5bff9339": "https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=82cc94e5fb69d1c45a386f83798251de5bff9339",
88+
"pkg:generic/git.postgresql.org/gitweb/p/infrarecorder/hamn@4f4fed18770ff15da3c7ab1e81854b75181ab5d0": "https://git.postgresql.org/gitweb/p/infrarecorder/?p=hamn.git;a=commit;h=4f4fed18770ff15da3c7ab1e81854b75181ab5d0"
7389
}
7490

7591
for purl, url in purls_url.items():

0 commit comments

Comments
 (0)