Skip to content

Commit 59e2089

Browse files
committed
gh-117829 : addressed comments, purely CLI flags of glob patterns passed into the create_archive interface
1 parent d5ffee3 commit 59e2089

5 files changed

Lines changed: 89 additions & 157 deletions

File tree

Doc/library/zipapp.rst

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,15 @@ The following options are understood:
9494
this case, any other options are ignored and SOURCE must be an archive, not a
9595
directory.
9696

97-
.. option:: --include-pattern
97+
.. option:: --include
9898

99-
Accept a regex filtering for files to be allowed in output archive. This will run
100-
first if :option:`--exclude-pattern` is also used.
99+
Accept glob-like filtering for files to be allowed in output archive. This will run
100+
first if :option:`--exclude` is also used.
101101

102-
.. option:: --exclude-pattern
102+
.. option:: --exclude
103103

104-
Accept a regex filtering files to be denied inclusion in output archive. This will
105-
run second if :option:`--include-pattern` is also used.
104+
Accept glob-like filtering files to be denied inclusion in output archive. This will
105+
run second if :option:`--include` is also used.
106106

107107

108108
.. option:: -h, --help
@@ -241,21 +241,20 @@ fits in memory::
241241
>>> f.write(temp.getvalue())
242242

243243
To filter an allow-list or deny-list of files in the directory being zipped, make use
244-
of :option:`--exclude-pattern` and/or :option:`--include-pattern`.
244+
of :option:`--exclude` and/or :option:`--include` with glob-style patterns.
245245

246246
.. code-block:: shell-session
247247
248248
$ ls myapp
249249
__main__.py helper.py notthis.py
250250
251-
$ python -m zipapp myapp -o myapp.pyz --exclude-pattern='.*notthis.*'
251+
$ python -m zipapp myapp -o myapp.pyz --include "help*" --include "not*" --exclude "n*"
252252
$ unzip myapp.pyz -d extracted_myapp
253253
Archive: myapp.pyz
254-
extracting: extracted_myapp/__main__.py
255254
extracting: extracted_myapp/helper.py
256255
257256
$ ls extracted_myapp
258-
__main__.py helper.py
257+
helper.py
259258
260259
261260
.. _zipapp-specifying-the-interpreter:

Lib/test/test_zipapp.py

Lines changed: 0 additions & 125 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import io
44
import pathlib
5-
import re
65
import stat
76
import sys
87
import tempfile
@@ -354,73 +353,6 @@ def test_content_of_copied_archive(self):
354353
with zipfile.ZipFile(new_target, 'r') as z:
355354
self.assertEqual(set(z.namelist()), {'__main__.py'})
356355

357-
def test_create_archive_with_include_pattern(self):
358-
source = self.tmpdir / 'source'
359-
source.mkdir()
360-
(source / '.DS_Store').touch()
361-
(source / 'zed.py').touch()
362-
(source / 'bin').mkdir()
363-
(source / 'bin' / 'qux').touch()
364-
(source / 'bin' / 'baz').touch()
365-
(source / '__main__.py').touch()
366-
367-
target = io.BytesIO()
368-
zipapp.create_archive(
369-
source=str(source),
370-
target=target,
371-
include_pattern=re.compile(r".*\.py")
372-
)
373-
374-
target.seek(0)
375-
with zipfile.ZipFile(target, 'r') as zf:
376-
self.assertEqual(zf.namelist(),
377-
["__main__.py", "zed.py"])
378-
379-
def test_create_archive_with_exclude_pattern(self):
380-
source = self.tmpdir / 'source'
381-
source.mkdir()
382-
(source / '.DS_Store').touch()
383-
(source / 'zed.py').touch()
384-
(source / 'bin').mkdir()
385-
(source / 'bin' / 'qux').touch()
386-
(source / 'bin' / 'baz').touch()
387-
(source / '__main__.py').touch()
388-
389-
target = io.BytesIO()
390-
zipapp.create_archive(
391-
source=str(source),
392-
target=target,
393-
exclude_pattern=re.compile(r".*\.py")
394-
)
395-
396-
target.seek(0)
397-
with zipfile.ZipFile(target, 'r') as zf:
398-
self.assertEqual(zf.namelist(),
399-
[".DS_Store", "bin/", "bin/baz", "bin/qux"])
400-
401-
def test_create_archive_with_include_and_exclude_pattern(self):
402-
source = self.tmpdir / 'source'
403-
source.mkdir()
404-
(source / '.DS_Store').touch()
405-
(source / 'zed.py').touch()
406-
(source / 'bin').mkdir()
407-
(source / 'bin' / 'qux').touch()
408-
(source / 'bin' / 'baz').touch()
409-
(source / '__main__.py').touch()
410-
411-
target = io.BytesIO()
412-
zipapp.create_archive(
413-
source=str(source),
414-
target=target,
415-
include_pattern=re.compile(r".*\.py"),
416-
exclude_pattern=re.compile(r".*zed\.py")
417-
)
418-
419-
target.seek(0)
420-
with zipfile.ZipFile(target, 'r') as zf:
421-
self.assertEqual(zf.namelist(),
422-
["__main__.py"])
423-
424356
# (Unix only) tests that archives with shebang lines are made executable
425357
@unittest.skipIf(sys.platform == 'win32',
426358
'Windows does not support an executable bit')
@@ -522,63 +454,6 @@ def test_info_error(self):
522454
# Program should exit with a non-zero return code.
523455
self.assertTrue(cm.exception.code)
524456

525-
def test_cmdline_create_with_include_pattern(self):
526-
source = self.tmpdir / 'source'
527-
source.mkdir()
528-
(source / '.DS_Store').touch()
529-
(source / 'zed.py').touch()
530-
(source / 'bin').mkdir()
531-
(source / 'bin' / 'qux').touch()
532-
(source / 'bin' / 'baz').touch()
533-
(source / '__main__.py').touch()
534-
535-
args = [str(source), '--include-pattern', r'.*\.py']
536-
zipapp.main(args)
537-
target = source.with_suffix('.pyz')
538-
self.assertTrue(target.is_file())
539-
540-
with zipfile.ZipFile(target, 'r') as zf:
541-
self.assertEqual(zf.namelist(),
542-
["__main__.py", "zed.py"])
543-
544-
def test_cmdline_create_with_exclude_pattern(self):
545-
source = self.tmpdir / 'source'
546-
source.mkdir()
547-
(source / '.DS_Store').touch()
548-
(source / 'zed.py').touch()
549-
(source / 'bin').mkdir()
550-
(source / 'bin' / 'qux').touch()
551-
(source / 'bin' / 'baz').touch()
552-
(source / '__main__.py').touch()
553-
554-
args = [str(source), '--exclude-pattern', r'.*\.py']
555-
zipapp.main(args)
556-
target = source.with_suffix('.pyz')
557-
self.assertTrue(target.is_file())
558-
559-
with zipfile.ZipFile(target, 'r') as zf:
560-
self.assertEqual(zf.namelist(),
561-
[".DS_Store", "bin/", "bin/baz", "bin/qux"])
562-
563-
def test_cmdline_create_with_include_and_exclude_pattern(self):
564-
source = self.tmpdir / 'source'
565-
source.mkdir()
566-
(source / '.DS_Store').touch()
567-
(source / 'zed.py').touch()
568-
(source / 'bin').mkdir()
569-
(source / 'bin' / 'qux').touch()
570-
(source / 'bin' / 'baz').touch()
571-
(source / '__main__.py').touch()
572-
573-
args = [str(source), '--include-pattern', r'.*\.py', '--exclude-pattern', r'.*zed\.py']
574-
zipapp.main(args)
575-
target = source.with_suffix('.pyz')
576-
self.assertTrue(target.is_file())
577-
578-
with zipfile.ZipFile(target, 'r') as zf:
579-
self.assertEqual(zf.namelist(),
580-
["__main__.py"])
581-
582457

583458
if __name__ == "__main__":
584459
unittest.main()

Lib/zipapp.py

Lines changed: 78 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import contextlib
22
import os
33
import pathlib
4-
import re
54
import shutil
65
import stat
76
import sys
87
import zipfile
98

9+
from collections.abc import Iterable, Callable
10+
1011
__all__ = ['ZipAppError', 'create_archive', 'get_interpreter']
1112

1213

@@ -75,8 +76,7 @@ def _copy_archive(archive, new_archive, interpreter=None):
7576

7677

7778
def create_archive(source, target=None, interpreter=None, main=None,
78-
filter=None, compressed=False, include_pattern=None,
79-
exclude_pattern=None):
79+
filter=None, compressed=False):
8080
"""Create an application archive from SOURCE.
8181
8282
The SOURCE can be the name of a directory, or a filename or a file-like
@@ -178,7 +178,67 @@ def get_interpreter(archive):
178178
with _maybe_open(archive, 'rb') as f:
179179
if f.read(2) == b'#!':
180180
return f.readline().strip().decode(shebang_encoding)
181-
181+
182+
def _normalize_patterns(values: Iterable[str] | None) -> list[str]:
183+
"""
184+
Split comma-separated items, strip whitespace, drop empties.
185+
If a token has no glob metacharacters, treat it as a directory prefix:
186+
expand 'foo' into ['foo', 'foo/**'] (after normalizing slashes).
187+
"""
188+
if not values:
189+
return []
190+
191+
def has_glob(s: str) -> bool:
192+
return any(ch in s for ch in "*?[]")
193+
194+
out: list[str] = []
195+
for v in values:
196+
for raw in (p.strip() for p in v.split(',')):
197+
if not raw:
198+
continue
199+
# normalize user input to POSIX-like form (match against rel.as_posix())
200+
tok = raw.replace('\\', '/').lstrip('./').rstrip('/')
201+
if not tok:
202+
continue
203+
if has_glob(tok):
204+
out.append(tok)
205+
else:
206+
# directory name implies subtree
207+
out.append(tok)
208+
out.append(f"{tok}/**")
209+
return out
210+
211+
def _make_glob_filter(
212+
includes: Iterable[str] | None,
213+
excludes: Iterable[str] | None
214+
) -> Callable[[pathlib.Path], bool]:
215+
"""
216+
Build a filter(relative_path: Path) -> bool applying include first, then exclude.
217+
- Path argument is relative to source_root
218+
- Patterns are matched against POSIX-style relative paths
219+
- If includes is empty, defaults to ["**"] (include all)
220+
"""
221+
inc = _normalize_patterns(includes)
222+
exc = _normalize_patterns(excludes)
223+
if not inc:
224+
inc = ["**"]
225+
226+
def matches_any(patterns: list[str], rel: pathlib.Path) -> bool:
227+
posix = rel.as_posix()
228+
# pathlib.Path.match uses glob semantics with ** (recursive)
229+
return any(rel.match(pat) or pathlib.PurePosixPath(posix).match(pat)
230+
for pat in patterns)
231+
232+
def _filter(rel: pathlib.Path) -> bool:
233+
# Always work on files and directories; we'll add both. If a directory
234+
# is excluded, its children still get visited by rglob('*') but will fail here.
235+
if not matches_any(inc, rel):
236+
return False
237+
if exc and matches_any(exc, rel):
238+
return False
239+
return True
240+
241+
return _filter
182242

183243
def main(args=None):
184244
"""Run the zipapp command line interface.
@@ -204,19 +264,14 @@ def main(args=None):
204264
"Files are stored uncompressed by default.")
205265
parser.add_argument('--info', default=False, action='store_true',
206266
help="Display the interpreter from the archive.")
207-
parser.add_argument('--include-pattern', default=None,
208-
help=(
209-
"Accept a regex filtering for files to be allowed in output"
210-
" archive. This will run first if `--exclude-pattern` is also used."
211-
))
212-
parser.add_argument('--exclude-pattern', default=None,
213-
help=(
214-
"Accept a regex filtering files to be denied inclusion in output"
215-
" archive. This will run second if `--include-pattern` is also used."
216-
" Usage example: `python -m zipapp myapp -o myapp.pyz --exclude-pattern='.*notthis.*'`"
217-
))
218267
parser.add_argument('source',
219268
help="Source directory (or existing archive).")
269+
parser.add_argument('--include', action='extend', nargs='+', default=None,
270+
help=("Glob pattern(s) of files/dirs to include (relative to SOURCE). "
271+
"Repeat or use commas. Defaults to '**' (everything)."))
272+
parser.add_argument('--exclude', action='extend', nargs='+', default=None,
273+
help=("Glob pattern(s) of files/dirs to exclude (relative to SOURCE). "
274+
"Repeat or use commas. Applied after --include."))
220275

221276
args = parser.parse_args(args)
222277

@@ -234,14 +289,17 @@ def main(args=None):
234289
raise SystemExit("In-place editing of archives is not supported")
235290
if args.main:
236291
raise SystemExit("Cannot change the main function when copying")
237-
238-
include_pattern = re.compile(args.include_pattern) if args.include_pattern else None
239-
exclude_pattern = re.compile(args.exclude_pattern) if args.exclude_pattern else None
292+
293+
# build a filter from include and exclude flags
294+
filter_fn = None
295+
src_path = pathlib.Path(args.source)
296+
if src_path.exists() and src_path.is_dir():
297+
filter_fn = _make_glob_filter(args.include, args.exclude)
240298

241299
create_archive(args.source, args.output,
242300
interpreter=args.python, main=args.main,
243-
compressed=args.compress, include_pattern=include_pattern,
244-
exclude_pattern=exclude_pattern)
301+
compressed=args.compress,
302+
filter=filter_fn)
245303

246304

247305
if __name__ == '__main__':

Misc/NEWS.d/next/Library/2024-06-26-02-14-43.gh-issue-117829.Fpf5i1.rst

Lines changed: 0 additions & 2 deletions
This file was deleted.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add flags ``--include`` and ``--exclude`` to the CLI of the :mod:`zipapp` module. These flags accept glob patterns to
2+
indicate allow-list and/or deny-list of files to be included in the zipapp file.

0 commit comments

Comments
 (0)