Skip to content

Commit 0a165e2

Browse files
committed
Add validation generator script and validate function
Signed-off-by: Tushar Goel <tushar.goel.dav@gmail.com>
1 parent 162e356 commit 0a165e2

4 files changed

Lines changed: 962 additions & 32 deletions

File tree

etc/scripts/generate_validators.py

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
# Generate a simple script based on provided list for package types
2+
3+
"""
4+
{
5+
"$schema": "https://packageurl.org/schemas/purl-type-definition.schema-1.0.json",
6+
"$id": "https://packageurl.org/types/pypi-definition.json",
7+
"type": "pypi",
8+
"type_name": "PyPI",
9+
"description": "Python packages",
10+
"repository": {
11+
"use_repository": true,
12+
"default_repository_url": "https://pypi.org",
13+
"note": "Previously https://pypi.python.org"
14+
},
15+
"namespace_definition": {
16+
"requirement": "prohibited",
17+
"note": "there is no namespace"
18+
},
19+
"name_definition": {
20+
"native_name": "name",
21+
"case_sensitive": false,
22+
"normalization_rules": [
23+
"Replace underscore _ with dash -",
24+
"Replace dot . with underscore _ when used in distribution (sdist, wheel) names"
25+
],
26+
"note": "PyPI treats - and _ as the same character and is not case sensitive. Therefore a PyPI package name must be lowercased and underscore _ replaced with a dash -. Note that PyPI itself is preserving the case of package names. When used in distribution and wheel names, the dot . is replaced with an underscore _"
27+
},
28+
"version_definition": {
29+
"case_sensitive": false,
30+
"native_name": "version"
31+
},
32+
"qualifiers_definition": [
33+
{
34+
"key": "file_name",
35+
"requirement": "optional",
36+
"description": "The file_name qualifier selects a particular distribution file (case-sensitive). For naming convention, see the Python Packaging User Guide on source distributions https://packaging.python.org/en/latest/specifications/source-distribution-format/#source-distribution-file-name and on binary distributions https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-name-convention and the rules for platform compatibility tags https://packaging.python.org/en/latest/specifications/platform-compatibility-tags/"
37+
}
38+
],
39+
"examples": [
40+
"pkg:pypi/django@1.11.1",
41+
"pkg:pypi/django@1.11.1?filename=Django-1.11.1.tar.gz",
42+
"pkg:pypi/django@1.11.1?filename=Django-1.11.1-py2.py3-none-any.whl",
43+
"pkg:pypi/django-allauth@12.23"
44+
]
45+
}
46+
"""
47+
from packageurl import PackageURL
48+
from pathlib import Path
49+
import json
50+
51+
HEADER = '''# Copyright (c) the purl authors
52+
# SPDX-License-Identifier: MIT
53+
#
54+
# Permission is hereby granted, free of charge, to any person obtaining a copy
55+
# of this software and associated documentation files (the "Software"), to deal
56+
# in the Software without restriction, including without limitation the rights
57+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
58+
# copies of the Software, and to permit persons to whom the Software is
59+
# furnished to do so, subject to the following conditions:
60+
#
61+
# The above copyright notice and this permission notice shall be included in all
62+
# copies or substantial portions of the Software.
63+
#
64+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
65+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
66+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
67+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
68+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
69+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
70+
# SOFTWARE.
71+
72+
# Visit https://github.com/package-url/packageurl-python for support and
73+
# download.
74+
75+
from packageurl import PackageURL
76+
from packageurl import normalize
77+
from packageurl.contrib.route import Router
78+
79+
"""
80+
Validate each type according to the PURL spec type definitions
81+
"""
82+
83+
class TypeValidator:
84+
@classmethod
85+
def validate(cls, purl: PackageURL, strict=False):
86+
if not strict:
87+
purl = cls.normalize(purl)
88+
89+
if cls.namespace_requirement == "prohibited" and purl.namespace:
90+
yield f"Namespace is prohibited for purl type: {cls.type!r}"
91+
92+
if not cls.namespace_case_sensitive and purl.namespace and purl.namespace.lower() != purl.name:
93+
yield f"Namespace is not lowercased for purl type: {cls.type!r}"
94+
95+
if not cls.name_case_sensitive and purl.name and purl.name.lower() != purl.name:
96+
yield f"Name is not lowercased for purl type: {cls.type!r}"
97+
98+
if not cls.version_case_sensitive and purl.version and purl.version.lower() != purl.version:
99+
yield f"Version is not lowercased for purl type: {cls.type!r}"
100+
101+
yield from cls.validate_type(purl)
102+
103+
@classmethod
104+
def normalize_type(cls, type: str):
105+
return type
106+
107+
@classmethod
108+
def normalize_namespace(cls, namespace: str):
109+
return namespace
110+
111+
@classmethod
112+
def normalize_name(cls, name: str):
113+
return name
114+
115+
@classmethod
116+
def normalize_version(cls, version: str):
117+
return version
118+
119+
@classmethod
120+
def normalize_qualifiers(cls, qualifiers: dict):
121+
return qualifiers
122+
123+
@classmethod
124+
def normalize_subpath(cls, subpath: str):
125+
return subpath
126+
127+
@classmethod
128+
def normalize(cls, purl: PackageURL):
129+
type_norm, namespace_norm, name_norm, version_norm, qualifiers_norm, subpath_norm = normalize(purl.type,
130+
purl.namespace,
131+
purl.name,
132+
purl.version,
133+
purl.qualifiers,
134+
purl.subpath,
135+
encode=False,
136+
)
137+
138+
return PackageURL(
139+
type = type_norm,
140+
namespace = namespace_norm,
141+
name = name_norm,
142+
version = version_norm,
143+
qualifiers = qualifiers_norm,
144+
subpath = subpath_norm,
145+
)
146+
147+
@classmethod
148+
def validate_type(cls, purl: PackageURL):
149+
yield from cls.validate_qualifiers(purl=purl)
150+
151+
@classmethod
152+
def validate_qualifiers(cls, purl: PackageURL):
153+
if not purl.qualifiers:
154+
return
155+
156+
purl_qualifiers_keys = set(purl.qualifiers.keys())
157+
allowed_qualifiers_set = cls.allowed_qualifiers
158+
159+
disallowed = purl_qualifiers_keys - allowed_qualifiers_set
160+
161+
if disallowed:
162+
yield (f"Invalid qualifiers found: {', '.join(disallowed)}. "
163+
f"Allowed qualifiers are: {', '.join(allowed_qualifiers_set)}"
164+
)
165+
'''
166+
167+
def validate_qualifiers(allowed_qualifiers, purl: PackageURL):
168+
if not purl.qualifiers:
169+
return True
170+
171+
purl_qualifiers_keys = set(purl.qualifiers.keys())
172+
allowed_qualifiers_set = set(allowed_qualifiers)
173+
174+
disallowed = purl_qualifiers_keys - allowed_qualifiers_set
175+
176+
if disallowed:
177+
yield (f"Invalid qualifiers found: {', '.join(disallowed)}. "
178+
f"Allowed qualifiers are: {', '.join(allowed_qualifiers_set)}"
179+
)
180+
181+
182+
183+
TEMPLATE = """
184+
class {class_name}({validator_class}):
185+
type = "{type}"
186+
type_name = "{type_name}"
187+
description = '''{description}'''
188+
use_repository = {use_repository}
189+
default_repository_url = "{default_repository_url}"
190+
namespace_requirement = "{namespace_requirement}"
191+
allowed_qualifiers = {allowed_qualifiers}
192+
namespace_case_sensitive = {namespace_case_sensitive}
193+
name_case_sensitive = {name_case_sensitive}
194+
version_case_sensitive = {version_case_sensitive}
195+
purl_pattern = "{purl_pattern}"
196+
"""
197+
198+
TEMPLATE_NAME_RULES = '''
199+
@override
200+
@classmethod
201+
def normalize_name(cls, name: str):
202+
"""
203+
Normalize name according to type rules
204+
{rules}
205+
"""
206+
raise NotImplementedError()
207+
'''
208+
209+
def generate_validators():
210+
"""
211+
Generate validators for all package types defined in the packageurl specification.
212+
"""
213+
214+
base_dir = Path(__file__).parent.parent.parent
215+
216+
types_dir = base_dir / "spec" / "types"
217+
218+
script_parts = [HEADER]
219+
220+
validators_by_type = {}
221+
222+
for type in sorted(types_dir.glob("*.json")):
223+
type_def = json.loads(type.read_text())
224+
225+
_type = type_def["type"]
226+
standard_validator_class = "TypeValidator"
227+
228+
class_prefix = _type.capitalize()
229+
class_name = f"{class_prefix}{standard_validator_class}"
230+
validators_by_type[_type] = class_name
231+
name_normalization_rules=type_def["name_definition"].get("normalization_rules") or []
232+
allowed_qualifiers = [defintion.get("key") for defintion in type_def.get("qualifiers_definition") or []]
233+
namespace_case_sensitive = type_def["namespace_definition"].get("case_sensitive") or False
234+
name_case_sensitive = type_def["name_definition"].get("case_sensitive") or False
235+
version_definition = type_def.get("version_definition") or {}
236+
version_case_sensitive = version_definition.get("case_sensitive") or False
237+
repository = type_def.get("repository")
238+
use_repository_url = repository.get("use_repository") or False
239+
240+
if use_repository_url and "repsitory_url" not in allowed_qualifiers:
241+
allowed_qualifiers.append("repository_url")
242+
243+
allowed_qualifiers = set(allowed_qualifiers)
244+
245+
type_validator = TEMPLATE.format(**dict(
246+
class_name=class_name,
247+
validator_class=standard_validator_class,
248+
type=_type,
249+
type_name=type_def["type_name"],
250+
description=type_def["description"],
251+
use_repository=type_def["repository"]["use_repository"],
252+
default_repository_url=type_def["repository"].get("default_repository_url") or "",
253+
namespace_requirement=type_def["namespace_definition"]["requirement"],
254+
name_normalization_rules=name_normalization_rules,
255+
allowed_qualifiers=allowed_qualifiers or [],
256+
namespace_case_sensitive=namespace_case_sensitive,
257+
name_case_sensitive=name_case_sensitive,
258+
version_case_sensitive=version_case_sensitive,
259+
purl_pattern=f"pkg:{_type}/.*"
260+
))
261+
262+
script_parts.append(type_validator)
263+
264+
# if name_normalization_rules:
265+
# name_overrides = get_name_norm_rules(name_normalization_rules)
266+
# script_parts.append(name_overrides)
267+
268+
script_parts.append(generate_validators_by_type(validators_by_type=validators_by_type))
269+
script_parts.append(attach_router(validators_by_type.values()))
270+
271+
validate_script = base_dir / "src" / "packageurl" / "validate.py"
272+
273+
validate_script.write_text("\n".join(script_parts))
274+
275+
276+
def get_name_norm_rules(name_normalization_rules):
277+
rules = "\n".join(name_normalization_rules)
278+
return TEMPLATE_NAME_RULES.format(rules=rules)
279+
280+
281+
def generate_validators_by_type(validators_by_type):
282+
"""
283+
Return a python snippet that maps a type to it's TypeValidator class
284+
"""
285+
snippets = []
286+
for type, class_name in validators_by_type.items():
287+
snippet = f" {type!r} : {class_name},"
288+
snippets.append(snippet)
289+
290+
snippets = "\n".join(snippets)
291+
start = "VALIDATORS_BY_TYPE = {"
292+
end = "}"
293+
return f"{start}\n{snippets}\n{end}"
294+
295+
def attach_router(classes):
296+
snippets = []
297+
for class_name in classes:
298+
snippet = f" {class_name},"
299+
snippets.append(snippet)
300+
snippets = "\n".join(snippets)
301+
start = "PACKAGE_REGISTRY = [ \n"
302+
end = "\n ]"
303+
classes = f"{start}{snippets}{end}"
304+
router_code = '''
305+
validate_router = Router()
306+
307+
for pkg_class in PACKAGE_REGISTRY:
308+
validate_router.append(pattern=pkg_class.purl_pattern, endpoint=pkg_class.validate)
309+
'''
310+
return f"{classes}{router_code}"
311+
312+
313+
if __name__ == "__main__":
314+
generate_validators()

src/packageurl/__init__.py

Lines changed: 11 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
from urllib.parse import unquote as _percent_unquote
3838
from urllib.parse import urlsplit as _urlsplit
3939

40+
from packageurl.contrib.route import NoRouteAvailable
41+
from packageurl.validate import validate_router
42+
4043
if TYPE_CHECKING:
4144
from collections.abc import Callable
4245
from collections.abc import Iterable
@@ -470,39 +473,16 @@ def to_string(self, encode: bool | None = True) -> str:
470473

471474
return "".join(purl)
472475

473-
474-
@classmethod
475-
def validate_alpm(cls):
476-
"""Type-specific validation for ALPM PURLs."""
477-
if cls.namespace.lower() != cls.namespace:
478-
return False, "Namespace must be lowercase."
479-
if cls.name.lower() != cls.name:
480-
return False, "Package name must be lowercase."
481-
if not re.match(r"^[0-9]*:?[\w\.\+\-]+$", cls.version):
482-
return False, f"Invalid version format '{cls.version}'."
483-
484-
if cls.qualifiers:
485-
for key in cls.qualifiers:
486-
if key != "arch":
487-
return False, f"Unknown qualifier '{key}', only 'arch' is allowed."
488-
if not cls.qualifiers[key]:
489-
return False, "Qualifier 'arch' cannot be empty."
490-
491-
492-
@classmethod
493-
def validate(cls):
476+
def validate(self) -> list[str]:
494477
"""
495-
Main validation function.
496-
Runs basic validation first, then dispatches to type-specific validators.
497-
Yields error messages only.
478+
Validate this PackageURL object and return a list of validation error messages.
498479
"""
499-
yield from cls.validate_basic()
500-
501-
validator_by_type: dict[str, Callable[[str], Iterable[str]]] = {
502-
"alpm": cls.validate_alpm,
503-
}
504-
505-
yield from validator_by_type()
480+
if self:
481+
try:
482+
messages = list(validate_router.process(self))
483+
return messages
484+
except NoRouteAvailable:
485+
return [f"Given type: {self.type} can not be validated"]
506486

507487
@classmethod
508488
def from_string(cls, purl: str) -> Self:

0 commit comments

Comments
 (0)