Skip to content

Commit f02daaa

Browse files
djwonggregkh
authored andcommitted
xfs: fix deadlock and streamline xfs_getfsmap performance
[ Upstream commit 8ffa90e ] Refactor xfs_getfsmap to improve its performance: instead of indirectly calling a function that copies one record to userspace at a time, create a shadow buffer in the kernel and copy the whole array once at the end. On the author's computer, this reduces the runtime on his /home by ~20%. This also eliminates a deadlock when running GETFSMAP against the realtime device. The current code locks the rtbitmap to create fsmappings and copies them into userspace, having not released the rtbitmap lock. If the userspace buffer is an mmap of a sparse file that itself resides on the realtime device, the write page fault will recurse into the fs for allocation, which will deadlock on the rtbitmap lock. Fixes: 4c934c7 ("xfs: report realtime space information via the rtbitmap") Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
1 parent afd3729 commit f02daaa

3 files changed

Lines changed: 124 additions & 71 deletions

File tree

fs/xfs/xfs_fsmap.c

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include "xfs_rtalloc.h"
2727

2828
/* Convert an xfs_fsmap to an fsmap. */
29-
void
29+
static void
3030
xfs_fsmap_from_internal(
3131
struct fsmap *dest,
3232
struct xfs_fsmap *src)
@@ -155,8 +155,7 @@ xfs_fsmap_owner_from_rmap(
155155
/* getfsmap query state */
156156
struct xfs_getfsmap_info {
157157
struct xfs_fsmap_head *head;
158-
xfs_fsmap_format_t formatter; /* formatting fn */
159-
void *format_arg; /* format buffer */
158+
struct fsmap *fsmap_recs; /* mapping records */
160159
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
161160
xfs_daddr_t next_daddr; /* next daddr we expect */
162161
u64 missing_owner; /* owner of holes */
@@ -224,6 +223,20 @@ xfs_getfsmap_is_shared(
224223
return 0;
225224
}
226225

226+
static inline void
227+
xfs_getfsmap_format(
228+
struct xfs_mount *mp,
229+
struct xfs_fsmap *xfm,
230+
struct xfs_getfsmap_info *info)
231+
{
232+
struct fsmap *rec;
233+
234+
trace_xfs_getfsmap_mapping(mp, xfm);
235+
236+
rec = &info->fsmap_recs[info->head->fmh_entries++];
237+
xfs_fsmap_from_internal(rec, xfm);
238+
}
239+
227240
/*
228241
* Format a reverse mapping for getfsmap, having translated rm_startblock
229242
* into the appropriate daddr units.
@@ -288,10 +301,7 @@ xfs_getfsmap_helper(
288301
fmr.fmr_offset = 0;
289302
fmr.fmr_length = rec_daddr - info->next_daddr;
290303
fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
291-
error = info->formatter(&fmr, info->format_arg);
292-
if (error)
293-
return error;
294-
info->head->fmh_entries++;
304+
xfs_getfsmap_format(mp, &fmr, info);
295305
}
296306

297307
if (info->last)
@@ -323,11 +333,8 @@ xfs_getfsmap_helper(
323333
if (shared)
324334
fmr.fmr_flags |= FMR_OF_SHARED;
325335
}
326-
error = info->formatter(&fmr, info->format_arg);
327-
if (error)
328-
return error;
329-
info->head->fmh_entries++;
330336

337+
xfs_getfsmap_format(mp, &fmr, info);
331338
out:
332339
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
333340
if (info->next_daddr < rec_daddr)
@@ -795,11 +802,11 @@ xfs_getfsmap_check_keys(
795802
#endif /* CONFIG_XFS_RT */
796803

797804
/*
798-
* Get filesystem's extents as described in head, and format for
799-
* output. Calls formatter to fill the user's buffer until all
800-
* extents are mapped, until the passed-in head->fmh_count slots have
801-
* been filled, or until the formatter short-circuits the loop, if it
802-
* is tracking filled-in extents on its own.
805+
* Get filesystem's extents as described in head, and format for output. Fills
806+
* in the supplied records array until there are no more reverse mappings to
807+
* return or head.fmh_entries == head.fmh_count. In the second case, this
808+
* function returns -ECANCELED to indicate that more records would have been
809+
* returned.
803810
*
804811
* Key to Confusion
805812
* ----------------
@@ -819,8 +826,7 @@ int
819826
xfs_getfsmap(
820827
struct xfs_mount *mp,
821828
struct xfs_fsmap_head *head,
822-
xfs_fsmap_format_t formatter,
823-
void *arg)
829+
struct fsmap *fsmap_recs)
824830
{
825831
struct xfs_trans *tp = NULL;
826832
struct xfs_fsmap dkeys[2]; /* per-dev keys */
@@ -895,8 +901,7 @@ xfs_getfsmap(
895901

896902
info.next_daddr = head->fmh_keys[0].fmr_physical +
897903
head->fmh_keys[0].fmr_length;
898-
info.formatter = formatter;
899-
info.format_arg = arg;
904+
info.fsmap_recs = fsmap_recs;
900905
info.head = head;
901906

902907
/*

fs/xfs/xfs_fsmap.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,9 @@ struct xfs_fsmap_head {
2727
struct xfs_fsmap fmh_keys[2]; /* low and high keys */
2828
};
2929

30-
void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
3130
void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
3231

33-
/* fsmap to userspace formatter - copy to user & advance pointer */
34-
typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
35-
3632
int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
37-
xfs_fsmap_format_t formatter, void *arg);
33+
struct fsmap *out_recs);
3834

3935
#endif /* __XFS_FSMAP_H__ */

fs/xfs/xfs_ioctl.c

Lines changed: 98 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,39 +1715,17 @@ xfs_ioc_getbmap(
17151715
return error;
17161716
}
17171717

1718-
struct getfsmap_info {
1719-
struct xfs_mount *mp;
1720-
struct fsmap_head __user *data;
1721-
unsigned int idx;
1722-
__u32 last_flags;
1723-
};
1724-
1725-
STATIC int
1726-
xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
1727-
{
1728-
struct getfsmap_info *info = priv;
1729-
struct fsmap fm;
1730-
1731-
trace_xfs_getfsmap_mapping(info->mp, xfm);
1732-
1733-
info->last_flags = xfm->fmr_flags;
1734-
xfs_fsmap_from_internal(&fm, xfm);
1735-
if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
1736-
sizeof(struct fsmap)))
1737-
return -EFAULT;
1738-
1739-
return 0;
1740-
}
1741-
17421718
STATIC int
17431719
xfs_ioc_getfsmap(
17441720
struct xfs_inode *ip,
17451721
struct fsmap_head __user *arg)
17461722
{
1747-
struct getfsmap_info info = { NULL };
17481723
struct xfs_fsmap_head xhead = {0};
17491724
struct fsmap_head head;
1750-
bool aborted = false;
1725+
struct fsmap *recs;
1726+
unsigned int count;
1727+
__u32 last_flags = 0;
1728+
bool done = false;
17511729
int error;
17521730

17531731
if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
@@ -1759,38 +1737,112 @@ xfs_ioc_getfsmap(
17591737
sizeof(head.fmh_keys[1].fmr_reserved)))
17601738
return -EINVAL;
17611739

1740+
/*
1741+
* Use an internal memory buffer so that we don't have to copy fsmap
1742+
* data to userspace while holding locks. Start by trying to allocate
1743+
* up to 128k for the buffer, but fall back to a single page if needed.
1744+
*/
1745+
count = min_t(unsigned int, head.fmh_count,
1746+
131072 / sizeof(struct fsmap));
1747+
recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
1748+
if (!recs) {
1749+
count = min_t(unsigned int, head.fmh_count,
1750+
PAGE_SIZE / sizeof(struct fsmap));
1751+
recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
1752+
if (!recs)
1753+
return -ENOMEM;
1754+
}
1755+
17621756
xhead.fmh_iflags = head.fmh_iflags;
1763-
xhead.fmh_count = head.fmh_count;
17641757
xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
17651758
xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
17661759

17671760
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
17681761
trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
17691762

1770-
info.mp = ip->i_mount;
1771-
info.data = arg;
1772-
error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
1773-
if (error == -ECANCELED) {
1774-
error = 0;
1775-
aborted = true;
1776-
} else if (error)
1777-
return error;
1763+
head.fmh_entries = 0;
1764+
do {
1765+
struct fsmap __user *user_recs;
1766+
struct fsmap *last_rec;
1767+
1768+
user_recs = &arg->fmh_recs[head.fmh_entries];
1769+
xhead.fmh_entries = 0;
1770+
xhead.fmh_count = min_t(unsigned int, count,
1771+
head.fmh_count - head.fmh_entries);
1772+
1773+
/* Run query, record how many entries we got. */
1774+
error = xfs_getfsmap(ip->i_mount, &xhead, recs);
1775+
switch (error) {
1776+
case 0:
1777+
/*
1778+
* There are no more records in the result set. Copy
1779+
* whatever we got to userspace and break out.
1780+
*/
1781+
done = true;
1782+
break;
1783+
case -ECANCELED:
1784+
/*
1785+
* The internal memory buffer is full. Copy whatever
1786+
* records we got to userspace and go again if we have
1787+
* not yet filled the userspace buffer.
1788+
*/
1789+
error = 0;
1790+
break;
1791+
default:
1792+
goto out_free;
1793+
}
1794+
head.fmh_entries += xhead.fmh_entries;
1795+
head.fmh_oflags = xhead.fmh_oflags;
17781796

1779-
/* If we didn't abort, set the "last" flag in the last fmx */
1780-
if (!aborted && info.idx) {
1781-
info.last_flags |= FMR_OF_LAST;
1782-
if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
1783-
&info.last_flags, sizeof(info.last_flags)))
1784-
return -EFAULT;
1797+
/*
1798+
* If the caller wanted a record count or there aren't any
1799+
* new records to return, we're done.
1800+
*/
1801+
if (head.fmh_count == 0 || xhead.fmh_entries == 0)
1802+
break;
1803+
1804+
/* Copy all the records we got out to userspace. */
1805+
if (copy_to_user(user_recs, recs,
1806+
xhead.fmh_entries * sizeof(struct fsmap))) {
1807+
error = -EFAULT;
1808+
goto out_free;
1809+
}
1810+
1811+
/* Remember the last record flags we copied to userspace. */
1812+
last_rec = &recs[xhead.fmh_entries - 1];
1813+
last_flags = last_rec->fmr_flags;
1814+
1815+
/* Set up the low key for the next iteration. */
1816+
xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
1817+
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
1818+
} while (!done && head.fmh_entries < head.fmh_count);
1819+
1820+
/*
1821+
* If there are no more records in the query result set and we're not
1822+
* in counting mode, mark the last record returned with the LAST flag.
1823+
*/
1824+
if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
1825+
struct fsmap __user *user_rec;
1826+
1827+
last_flags |= FMR_OF_LAST;
1828+
user_rec = &arg->fmh_recs[head.fmh_entries - 1];
1829+
1830+
if (copy_to_user(&user_rec->fmr_flags, &last_flags,
1831+
sizeof(last_flags))) {
1832+
error = -EFAULT;
1833+
goto out_free;
1834+
}
17851835
}
17861836

17871837
/* copy back header */
1788-
head.fmh_entries = xhead.fmh_entries;
1789-
head.fmh_oflags = xhead.fmh_oflags;
1790-
if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
1791-
return -EFAULT;
1838+
if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
1839+
error = -EFAULT;
1840+
goto out_free;
1841+
}
17921842

1793-
return 0;
1843+
out_free:
1844+
kmem_free(recs);
1845+
return error;
17941846
}
17951847

17961848
STATIC int

0 commit comments

Comments
 (0)