@@ -116,15 +116,18 @@ def normalize_namespace(
116116
117117 namespace_str = namespace if isinstance (namespace , str ) else namespace .decode ("utf-8" )
118118 namespace_str = namespace_str .strip ().strip ("/" )
119- if ptype in ("bitbucket" , "github" , "pypi" , "gitlab" ):
119+ if ptype in ("bitbucket" , "github" , "pypi" , "gitlab" , "composer" ):
120120 namespace_str = namespace_str .lower ()
121121 segments = [seg for seg in namespace_str .split ("/" ) if seg .strip ()]
122122 segments_quoted = map (get_quoter (encode ), segments )
123123 return "/" .join (segments_quoted ) or None
124124
125125
126126def normalize_name (
127- name : AnyStr | None , ptype : str | None , encode : bool | None = True
127+ name : AnyStr | None ,
128+ qualifiers : str | dict | None ,
129+ ptype : str | None ,
130+ encode : bool | None = True ,
128131) -> str | None :
129132 if not name :
130133 return None
@@ -133,20 +136,38 @@ def normalize_name(
133136 quoter = get_quoter (encode )
134137 name_str = quoter (name_str )
135138 name_str = name_str .strip ().strip ("/" )
136- if ptype in ("bitbucket" , "github" , "pypi" , "gitlab" , "huggingface" ):
139+ if ptype and ptype in ("mlflow" ):
140+ # MLflow purl names are case-sensitive for Azure ML, it is case sensitive and must be kept as-is in the package URL
141+ # For Databricks, it is case insensitive and must be lowercased in the package URL
142+ if isinstance (qualifiers , dict ):
143+ repo_url = qualifiers .get ("repository_url" )
144+ if repo_url and "azureml" in repo_url .lower ():
145+ return name_str
146+ if repo_url and "databricks" in repo_url .lower ():
147+ return name_str .lower ()
148+ if isinstance (qualifiers , str ):
149+ if "azureml" in qualifiers .lower ():
150+ return name_str
151+ if "databricks" in qualifiers .lower ():
152+ return name_str .lower ()
153+ if ptype in ("bitbucket" , "github" , "pypi" , "gitlab" , "composer" ):
137154 name_str = name_str .lower ()
138155 if ptype == "pypi" :
139156 name_str = name_str .replace ("_" , "-" )
140157 return name_str or None
141158
142159
143- def normalize_version (version : AnyStr | None , encode : bool | None = True ) -> str | None :
160+ def normalize_version (
161+ version : AnyStr | None , ptype : str | None , encode : bool | None = True
162+ ) -> str | None :
144163 if not version :
145164 return None
146165
147166 version_str = version if isinstance (version , str ) else version .decode ("utf-8" )
148167 quoter = get_quoter (encode )
149168 version_str = quoter (version_str .strip ())
169+ if ptype and ptype in ("huggingface" ):
170+ return version_str .lower ()
150171 return version_str or None
151172
152173
@@ -304,8 +325,8 @@ def normalize(
304325 """
305326 type_norm = normalize_type (type , encode )
306327 namespace_norm = normalize_namespace (namespace , type_norm , encode )
307- name_norm = normalize_name (name , type_norm , encode )
308- version_norm = normalize_version (version , encode )
328+ name_norm = normalize_name (name , qualifiers , type_norm , encode )
329+ version_norm = normalize_version (version , type , encode )
309330 qualifiers_norm = normalize_qualifiers (qualifiers , encode )
310331 subpath_norm = normalize_subpath (subpath , encode )
311332 return type_norm , namespace_norm , name_norm , version_norm , qualifiers_norm , subpath_norm
0 commit comments