Skip to content

Commit f07288c

Browse files
ebiedermgregkh
authored andcommitted
mnt: Make propagate_umount less slow for overlapping mount propagation trees
commit 296990deb389c7da21c78030376ba244dc1badf5 upstream. Andrei Vagin pointed out that time to executue propagate_umount can go non-linear (and take a ludicrious amount of time) when the mount propogation trees of the mounts to be unmunted by a lazy unmount overlap. Make the walk of the mount propagation trees nearly linear by remembering which mounts have already been visited, allowing subsequent walks to detect when walking a mount propgation tree or a subtree of a mount propgation tree would be duplicate work and to skip them entirely. Walk the list of mounts whose propgatation trees need to be traversed from the mount highest in the mount tree to mounts lower in the mount tree so that odds are higher that the code will walk the largest trees first, allowing later tree walks to be skipped entirely. Add cleanup_umount_visitation to remover the code's memory of which mounts have been visited. Add the functions last_slave and skip_propagation_subtree to allow skipping appropriate parts of the mount propagation tree without needing to change the logic of the rest of the code. A script to generate overlapping mount propagation trees: $ cat runs.h set -e mount -t tmpfs zdtm /mnt mkdir -p /mnt/1 /mnt/2 mount -t tmpfs zdtm /mnt/1 mount --make-shared /mnt/1 mkdir /mnt/1/1 iteration=10 if [ -n "$1" ] ; then iteration=$1 fi for i in $(seq $iteration); do mount --bind /mnt/1/1 /mnt/1/1 done mount --rbind /mnt/1 /mnt/2 TIMEFORMAT='%Rs' nr=$(( ( 2 ** ( $iteration + 1 ) ) + 1 )) echo -n "umount -l /mnt/1 -> $nr " time umount -l /mnt/1 nr=$(cat /proc/self/mountinfo | grep zdtm | wc -l ) time umount -l /mnt/2 $ for i in $(seq 9 19); do echo $i; unshare -Urm bash ./run.sh $i; done Here are the performance numbers with and without the patch: mhash | 8192 | 8192 | 1048576 | 1048576 mounts | before | after | before | after ------------------------------------------------ 1025 | 0.040s | 0.016s | 0.038s | 0.019s 2049 | 0.094s | 0.017s | 0.080s | 0.018s 4097 | 0.243s | 0.019s | 0.206s | 0.023s 8193 | 1.202s | 0.028s | 1.562s | 0.032s 16385 | 9.635s | 0.036s | 9.952s | 0.041s 32769 | 60.928s | 0.063s | 44.321s | 0.064s 65537 | | 0.097s | | 0.097s 131073 | | 0.233s | | 0.176s 262145 | | 0.653s | | 0.344s 524289 | | 2.305s | | 0.735s 1048577 | | 7.107s | | 2.603s Andrei Vagin reports fixing the performance problem is part of the work to fix CVE-2016-6213. Fixes: a05964f ("[PATCH] shared mounts handling: umount") Reported-by: Andrei Vagin <avagin@openvz.org> Reviewed-by: Andrei Vagin <avagin@virtuozzo.com> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent fdb8f10 commit f07288c

1 file changed

Lines changed: 62 additions & 1 deletion

File tree

fs/pnode.c

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p)
2424
return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
2525
}
2626

27+
static inline struct mount *last_slave(struct mount *p)
28+
{
29+
return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
30+
}
31+
2732
static inline struct mount *next_slave(struct mount *p)
2833
{
2934
return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -164,6 +169,19 @@ static struct mount *propagation_next(struct mount *m,
164169
}
165170
}
166171

172+
static struct mount *skip_propagation_subtree(struct mount *m,
173+
struct mount *origin)
174+
{
175+
/*
176+
* Advance m such that propagation_next will not return
177+
* the slaves of m.
178+
*/
179+
if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
180+
m = last_slave(m);
181+
182+
return m;
183+
}
184+
167185
static struct mount *next_group(struct mount *m, struct mount *origin)
168186
{
169187
while (1) {
@@ -507,6 +525,15 @@ static void restore_mounts(struct list_head *to_restore)
507525
}
508526
}
509527

528+
static void cleanup_umount_visitations(struct list_head *visited)
529+
{
530+
while (!list_empty(visited)) {
531+
struct mount *mnt =
532+
list_first_entry(visited, struct mount, mnt_umounting);
533+
list_del_init(&mnt->mnt_umounting);
534+
}
535+
}
536+
510537
/*
511538
* collect all mounts that receive propagation from the mount in @list,
512539
* and return these additional mounts in the same list.
@@ -519,18 +546,51 @@ int propagate_umount(struct list_head *list)
519546
struct mount *mnt;
520547
LIST_HEAD(to_restore);
521548
LIST_HEAD(to_umount);
549+
LIST_HEAD(visited);
522550

523-
list_for_each_entry(mnt, list, mnt_list) {
551+
/* Find candidates for unmounting */
552+
list_for_each_entry_reverse(mnt, list, mnt_list) {
524553
struct mount *parent = mnt->mnt_parent;
525554
struct mount *m;
526555

556+
/*
557+
* If this mount has already been visited it is known that it's
558+
* entire peer group and all of their slaves in the propagation
559+
* tree for the mountpoint has already been visited and there is
560+
* no need to visit them again.
561+
*/
562+
if (!list_empty(&mnt->mnt_umounting))
563+
continue;
564+
565+
list_add_tail(&mnt->mnt_umounting, &visited);
527566
for (m = propagation_next(parent, parent); m;
528567
m = propagation_next(m, parent)) {
529568
struct mount *child = __lookup_mnt(&m->mnt,
530569
mnt->mnt_mountpoint);
531570
if (!child)
532571
continue;
533572

573+
if (!list_empty(&child->mnt_umounting)) {
574+
/*
575+
* If the child has already been visited it is
576+
* know that it's entire peer group and all of
577+
* their slaves in the propgation tree for the
578+
* mountpoint has already been visited and there
579+
* is no need to visit this subtree again.
580+
*/
581+
m = skip_propagation_subtree(m, parent);
582+
continue;
583+
} else if (child->mnt.mnt_flags & MNT_UMOUNT) {
584+
/*
585+
* We have come accross an partially unmounted
586+
* mount in list that has not been visited yet.
587+
* Remember it has been visited and continue
588+
* about our merry way.
589+
*/
590+
list_add_tail(&child->mnt_umounting, &visited);
591+
continue;
592+
}
593+
534594
/* Check the child and parents while progress is made */
535595
while (__propagate_umount(child,
536596
&to_umount, &to_restore)) {
@@ -544,6 +604,7 @@ int propagate_umount(struct list_head *list)
544604

545605
umount_list(&to_umount, &to_restore);
546606
restore_mounts(&to_restore);
607+
cleanup_umount_visitations(&visited);
547608
list_splice_tail(&to_umount, list);
548609

549610
return 0;

0 commit comments

Comments
 (0)