Skip to content

Commit 408f833

Browse files
committed
Avoid unnecessary file checking operations
Combines _can_bag and _can_read into one function. Instead of compiling a list of every single file and directory that cannot be read/written to, this new function throws an Exception as soon as a problematic file or directory comes up. At most, one walk() operation is completed now, instead of two.
1 parent 861ddac commit 408f833

1 file changed

Lines changed: 101 additions & 130 deletions

File tree

bagit.py

Lines changed: 101 additions & 130 deletions
Original file line numberDiff line numberDiff line change
@@ -177,91 +177,64 @@ def make_bag(
177177
old_dir = os.path.abspath(os.path.curdir)
178178

179179
try:
180-
# TODO: These two checks are currently redundant since an unreadable directory will also
181-
# often be unwritable, and this code will require review when we add the option to
182-
# bag to a destination other than the source. It would be nice if we could avoid
183-
# walking the directory tree more than once even if most filesystems will cache it
180+
# An exception is raised if any permissions will prevent the bagging of this path
181+
_check_can_bag(bag_dir)
184182

185-
unbaggable = _can_bag(bag_dir)
183+
LOGGER.info(_("Creating data directory"))
186184

187-
if unbaggable:
188-
LOGGER.error(
189-
_("Unable to write to the following directories and files:\n%s"),
190-
unbaggable,
191-
)
192-
raise BagError(_("Missing permissions to move all files and directories"))
193-
194-
unreadable_dirs, unreadable_files = _can_read(bag_dir)
195-
196-
if unreadable_dirs or unreadable_files:
197-
if unreadable_dirs:
198-
LOGGER.error(
199-
_("The following directories do not have read permissions:\n%s"),
200-
unreadable_dirs,
201-
)
202-
if unreadable_files:
203-
LOGGER.error(
204-
_("The following files do not have read permissions:\n%s"),
205-
unreadable_files,
206-
)
207-
raise BagError(
208-
_("Read permissions are required to calculate file fixities")
209-
)
210-
else:
211-
LOGGER.info(_("Creating data directory"))
212-
213-
# FIXME: if we calculate full paths we won't need to deal with changing directories
214-
os.chdir(bag_dir)
215-
cwd = os.getcwd()
216-
temp_data = tempfile.mkdtemp(dir=cwd)
217-
218-
for f in os.listdir("."):
219-
if os.path.abspath(f) == temp_data:
220-
continue
221-
new_f = os.path.join(temp_data, f)
222-
LOGGER.info(
223-
_("Moving %(source)s to %(destination)s"),
224-
{"source": f, "destination": new_f},
225-
)
226-
os.rename(f, new_f)
185+
# FIXME: if we calculate full paths we won't need to deal with changing directories
186+
os.chdir(bag_dir)
187+
cwd = os.getcwd()
188+
temp_data = tempfile.mkdtemp(dir=cwd)
227189

190+
for f in os.listdir("."):
191+
if os.path.abspath(f) == temp_data:
192+
continue
193+
new_f = os.path.join(temp_data, f)
228194
LOGGER.info(
229195
_("Moving %(source)s to %(destination)s"),
230-
{"source": temp_data, "destination": "data"},
196+
{"source": f, "destination": new_f},
231197
)
232-
os.rename(temp_data, "data")
198+
os.rename(f, new_f)
233199

234-
# permissions for the payload directory should match those of the
235-
# original directory
236-
os.chmod("data", os.stat(cwd).st_mode)
200+
LOGGER.info(
201+
_("Moving %(source)s to %(destination)s"),
202+
{"source": temp_data, "destination": "data"},
203+
)
204+
os.rename(temp_data, "data")
237205

238-
total_bytes, total_files = make_manifests(
239-
"data", processes, algorithms=checksums, encoding=encoding
206+
# permissions for the payload directory should match those of the
207+
# original directory
208+
os.chmod("data", os.stat(cwd).st_mode)
209+
210+
total_bytes, total_files = make_manifests(
211+
"data", processes, algorithms=checksums, encoding=encoding
212+
)
213+
214+
LOGGER.info(_("Creating bagit.txt"))
215+
txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n"""
216+
with open_text_file("bagit.txt", "w") as bagit_file:
217+
bagit_file.write(txt)
218+
219+
LOGGER.info(_("Creating bag-info.txt"))
220+
if bag_info is None:
221+
bag_info = {}
222+
223+
# allow 'Bagging-Date' and 'Bag-Software-Agent' to be overidden
224+
if "Bagging-Date" not in bag_info:
225+
bag_info["Bagging-Date"] = date.strftime(date.today(), "%Y-%m-%d")
226+
if "Bag-Software-Agent" not in bag_info:
227+
bag_info["Bag-Software-Agent"] = "bagit.py v%s <%s>" % (
228+
VERSION,
229+
PROJECT_URL,
240230
)
241231

242-
LOGGER.info(_("Creating bagit.txt"))
243-
txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n"""
244-
with open_text_file("bagit.txt", "w") as bagit_file:
245-
bagit_file.write(txt)
246-
247-
LOGGER.info(_("Creating bag-info.txt"))
248-
if bag_info is None:
249-
bag_info = {}
250-
251-
# allow 'Bagging-Date' and 'Bag-Software-Agent' to be overidden
252-
if "Bagging-Date" not in bag_info:
253-
bag_info["Bagging-Date"] = date.strftime(date.today(), "%Y-%m-%d")
254-
if "Bag-Software-Agent" not in bag_info:
255-
bag_info["Bag-Software-Agent"] = "bagit.py v%s <%s>" % (
256-
VERSION,
257-
PROJECT_URL,
258-
)
232+
bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)
233+
_make_tag_file("bag-info.txt", bag_info)
259234

260-
bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files)
261-
_make_tag_file("bag-info.txt", bag_info)
235+
for c in checksums:
236+
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
262237

263-
for c in checksums:
264-
_make_tagmanifest_file(c, bag_dir, encoding="utf-8")
265238
except Exception:
266239
LOGGER.exception(_("An error occurred creating a bag in %s"), bag_dir)
267240
raise
@@ -472,31 +445,7 @@ def save(self, processes=1, manifests=False):
472445
% self.path
473446
)
474447

475-
unbaggable = _can_bag(self.path)
476-
if unbaggable:
477-
LOGGER.error(
478-
_(
479-
"Missing write permissions for the following directories and files:\n%s"
480-
),
481-
unbaggable,
482-
)
483-
raise BagError(_("Missing permissions to move all files and directories"))
484-
485-
unreadable_dirs, unreadable_files = _can_read(self.path)
486-
if unreadable_dirs or unreadable_files:
487-
if unreadable_dirs:
488-
LOGGER.error(
489-
_("The following directories do not have read permissions:\n%s"),
490-
unreadable_dirs,
491-
)
492-
if unreadable_files:
493-
LOGGER.error(
494-
_("The following files do not have read permissions:\n%s"),
495-
unreadable_files,
496-
)
497-
raise BagError(
498-
_("Read permissions are required to calculate file fixities")
499-
)
448+
_check_can_bag(self.path)
500449

501450
# Change working directory to bag directory so helper functions work
502451
old_dir = os.path.abspath(os.path.curdir)
@@ -1328,47 +1277,69 @@ def _walk(data_dir):
13281277
yield path
13291278

13301279

1331-
def _can_bag(test_dir):
1332-
"""Scan the provided directory for files which cannot be bagged due to insufficient permissions"""
1333-
unbaggable = []
1280+
def _check_can_bag(test_dir):
1281+
"""
1282+
Scan the provided directory to ensure all files and directories can be read
1283+
and alldirectories can be written.
13341284
1285+
If there are any permission issues, a BagError is raised.
1286+
"""
13351287
if not os.access(test_dir, os.R_OK):
1336-
# We cannot continue without permission to read the source directory
1337-
unbaggable.append(test_dir)
1338-
return unbaggable
1288+
LOGGER.error(_("Cannot read the source directory %s"), test_dir)
1289+
raise BagError(
1290+
_(
1291+
"Cannot read the source directory %s. Read permissions are "
1292+
"required to find files"
1293+
)
1294+
% test_dir
1295+
)
13391296

13401297
if not os.access(test_dir, os.W_OK):
1341-
unbaggable.append(test_dir)
1298+
LOGGER.error(_("Cannot write to the source directory %s"), test_dir)
1299+
raise BagError(
1300+
_(
1301+
"Cannot write to the source directory %s. Write permissions "
1302+
"are required to move files within the bag"
1303+
)
1304+
% test_dir
1305+
)
13421306

1343-
for dirpath, dirnames, filenames in os.walk(test_dir):
1344-
for directory in dirnames:
1345-
full_path = os.path.join(dirpath, directory)
1346-
if not os.access(full_path, os.W_OK):
1347-
unbaggable.append(full_path)
1307+
for dirpath, directories, filenames in os.walk(test_dir, topdown=True):
1308+
for filename in filenames:
1309+
full_path = os.path.join(dirpath, filename)
13481310

1349-
return unbaggable
1311+
if not os.access(full_path, os.R_OK):
1312+
LOGGER.error(_("Cannot read the file %s"), full_path)
1313+
raise BagError(
1314+
_(
1315+
"Cannot read the file %s. Read permissions are "
1316+
"required to calculate file fixities"
1317+
)
1318+
% full_path
1319+
)
13501320

1321+
for directory in directories:
1322+
full_path = os.path.join(dirpath, directory)
13511323

1352-
def _can_read(test_dir):
1353-
"""
1354-
returns ((unreadable_dirs), (unreadable_files))
1355-
"""
1356-
unreadable_dirs = []
1357-
unreadable_files = []
1324+
if not os.access(full_path, os.R_OK):
1325+
LOGGER.error(_("Cannot read the sub-directory %s"), full_path)
1326+
raise BagError(
1327+
_(
1328+
"Cannot read the sub-directory %s. Read permissions "
1329+
"are required to find files"
1330+
)
1331+
% full_path
1332+
)
13581333

1359-
if not os.access(test_dir, os.R_OK):
1360-
unreadable_dirs.append(test_dir)
1361-
else:
1362-
for dirpath, dirnames, filenames in os.walk(test_dir):
1363-
for dn in dirnames:
1364-
full_path = os.path.join(dirpath, dn)
1365-
if not os.access(full_path, os.R_OK):
1366-
unreadable_dirs.append(full_path)
1367-
for fn in filenames:
1368-
full_path = os.path.join(dirpath, fn)
1369-
if not os.access(full_path, os.R_OK):
1370-
unreadable_files.append(full_path)
1371-
return (tuple(unreadable_dirs), tuple(unreadable_files))
1334+
elif not os.access(full_path, os.W_OK):
1335+
LOGGER.error(_("Cannot write to the sub-directory %s"), full_path)
1336+
raise BagError(
1337+
_(
1338+
"Cannot write to the sub-directory %s. Write permissions "
1339+
"are required to move files within the bag"
1340+
)
1341+
% full_path
1342+
)
13721343

13731344

13741345
def generate_manifest_lines(filename, algorithms=DEFAULT_CHECKSUMS):

0 commit comments

Comments
 (0)