sync mm-stable with mm-hotfixes-stable to pick up depended-upon upstream changes

This commit is contained in:
Andrew Morton 2023-04-16 12:31:58 -07:00
commit e492cd61b9
26 changed files with 474 additions and 255 deletions

View File

@ -232,6 +232,8 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com>
<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
<jon.toppins+linux@gmail.com> <jtoppins@redhat.com>
Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
<josh@joshtriplett.org> <josh@freedesktop.org>
<josh@joshtriplett.org> <josh@kernel.org>

View File

@ -39,13 +39,12 @@ With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via
# cat /sys/kernel/debug/zsmalloc/zram0/classes
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage
class size 10% 20% 30% 40% 50% 60% 70% 80% 90% 99% 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
...
9 176 0 1 186 129 8 4
10 192 1 0 2880 2872 135 3
11 208 0 1 819 795 42 2
12 224 0 1 219 159 12 4
30 512 0 12 4 1 0 1 0 0 1 0 414 3464 3346 433 1 14
31 528 2 7 2 2 1 0 1 0 0 2 117 4154 3793 536 4 44
32 544 6 3 4 1 2 1 0 0 0 1 260 4170 3965 556 2 26
...
...
@ -54,10 +53,28 @@ class
index
size
object size zspage stores
almost_empty
the number of ZS_ALMOST_EMPTY zspages(see below)
almost_full
the number of ZS_ALMOST_FULL zspages(see below)
10%
the number of zspages with usage ratio less than 10% (see below)
20%
the number of zspages with usage ratio between 10% and 20%
30%
the number of zspages with usage ratio between 20% and 30%
40%
the number of zspages with usage ratio between 30% and 40%
50%
the number of zspages with usage ratio between 40% and 50%
60%
the number of zspages with usage ratio between 50% and 60%
70%
the number of zspages with usage ratio between 60% and 70%
80%
the number of zspages with usage ratio between 70% and 80%
90%
the number of zspages with usage ratio between 80% and 90%
99%
the number of zspages with usage ratio between 90% and 99%
100%
the number of zspages with usage ratio 100%
obj_allocated
the number of objects allocated
obj_used
@ -66,19 +83,14 @@ pages_used
the number of pages allocated for the class
pages_per_zspage
the number of 0-order pages to make a zspage
freeable
the approximate number of pages class compaction can free
We assign a zspage to ZS_ALMOST_EMPTY fullness group when n <= N / f, where
* n = number of allocated objects
* N = total number of objects zspage can store
* f = fullness_threshold_frac(ie, 4 at the moment)
Similarly, we assign zspage to:
* ZS_ALMOST_FULL when n > N / f
* ZS_EMPTY when n == 0
* ZS_FULL when n == N
Each zspage maintains inuse counter which keeps track of the number of
objects stored in the zspage. The inuse counter determines the zspage's
"fullness group" which is calculated as the ratio of the "inuse" objects to
the total number of objects the zspage can hold (objs_per_zspage). The
closer the inuse counter is to objs_per_zspage, the better.
Internals
=========
@ -94,10 +106,10 @@ of objects that each zspage can store.
For instance, consider the following size classes:::
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
94 1536 0 0 0 0 0 3 0
100 1632 0 0 0 0 0 2 0
94 1536 0 .... 0 0 0 0 3 0
100 1632 0 .... 0 0 0 0 2 0
...
@ -134,10 +146,11 @@ reduces memory wastage.
Let's take a closer look at the bottom of `/sys/kernel/debug/zsmalloc/zramX/classes`:::
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
202 3264 0 0 0 0 0 4 0
254 4096 0 0 0 0 0 1 0
202 3264 0 .. 0 0 0 0 4 0
254 4096 0 .. 0 0 0 0 1 0
...
Size class #202 stores objects of size 3264 bytes and has a maximum of 4 pages
@ -151,40 +164,42 @@ efficient storage of large objects.
For zspage chain size of 8, huge class watermark becomes 3632 bytes:::
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
202 3264 0 0 0 0 0 4 0
211 3408 0 0 0 0 0 5 0
217 3504 0 0 0 0 0 6 0
222 3584 0 0 0 0 0 7 0
225 3632 0 0 0 0 0 8 0
254 4096 0 0 0 0 0 1 0
202 3264 0 .. 0 0 0 0 4 0
211 3408 0 .. 0 0 0 0 5 0
217 3504 0 .. 0 0 0 0 6 0
222 3584 0 .. 0 0 0 0 7 0
225 3632 0 .. 0 0 0 0 8 0
254 4096 0 .. 0 0 0 0 1 0
...
For zspage chain size of 16, huge class watermark becomes 3840 bytes:::
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
202 3264 0 0 0 0 0 4 0
206 3328 0 0 0 0 0 13 0
207 3344 0 0 0 0 0 9 0
208 3360 0 0 0 0 0 14 0
211 3408 0 0 0 0 0 5 0
212 3424 0 0 0 0 0 16 0
214 3456 0 0 0 0 0 11 0
217 3504 0 0 0 0 0 6 0
219 3536 0 0 0 0 0 13 0
222 3584 0 0 0 0 0 7 0
223 3600 0 0 0 0 0 15 0
225 3632 0 0 0 0 0 8 0
228 3680 0 0 0 0 0 9 0
230 3712 0 0 0 0 0 10 0
232 3744 0 0 0 0 0 11 0
234 3776 0 0 0 0 0 12 0
235 3792 0 0 0 0 0 13 0
236 3808 0 0 0 0 0 14 0
238 3840 0 0 0 0 0 15 0
254 4096 0 0 0 0 0 1 0
202 3264 0 .. 0 0 0 0 4 0
206 3328 0 .. 0 0 0 0 13 0
207 3344 0 .. 0 0 0 0 9 0
208 3360 0 .. 0 0 0 0 14 0
211 3408 0 .. 0 0 0 0 5 0
212 3424 0 .. 0 0 0 0 16 0
214 3456 0 .. 0 0 0 0 11 0
217 3504 0 .. 0 0 0 0 6 0
219 3536 0 .. 0 0 0 0 13 0
222 3584 0 .. 0 0 0 0 7 0
223 3600 0 .. 0 0 0 0 15 0
225 3632 0 .. 0 0 0 0 8 0
228 3680 0 .. 0 0 0 0 9 0
230 3712 0 .. 0 0 0 0 10 0
232 3744 0 .. 0 0 0 0 11 0
234 3776 0 .. 0 0 0 0 12 0
235 3792 0 .. 0 0 0 0 13 0
236 3808 0 .. 0 0 0 0 14 0
238 3840 0 .. 0 0 0 0 15 0
254 4096 0 .. 0 0 0 0 1 0
...
Overall the combined zspage chain size effect on zsmalloc pool configuration:::
@ -214,9 +229,10 @@ zram as a build artifacts storage (Linux kernel compilation).
zsmalloc classes stats:::
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
Total 13 51 413836 412973 159955 3
Total 13 .. 51 413836 412973 159955 3
zram mm_stat:::
@ -227,9 +243,10 @@ zram as a build artifacts storage (Linux kernel compilation).
zsmalloc classes stats:::
class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage freeable
class size 10% .... 100% obj_allocated obj_used pages_used pages_per_zspage freeable
...
Total 18 87 414852 412978 156666 0
Total 18 .. 87 414852 412978 156666 0
zram mm_stat:::

View File

@ -781,6 +781,33 @@ out:
return ret;
}
static int __dax_clear_dirty_range(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
XA_STATE(xas, &mapping->i_pages, start);
unsigned int scanned = 0;
void *entry;
xas_lock_irq(&xas);
xas_for_each(&xas, entry, end) {
entry = get_unlocked_entry(&xas, 0);
xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
put_unlocked_entry(&xas, entry, WAKE_NEXT);
if (++scanned % XA_CHECK_SCHED)
continue;
xas_pause(&xas);
xas_unlock_irq(&xas);
cond_resched();
xas_lock_irq(&xas);
}
xas_unlock_irq(&xas);
return 0;
}
/*
* Delete DAX entry at @index from @mapping. Wait for it
* to be unlocked before deleting it.
@ -1440,6 +1467,16 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
* written by write(2) is visible in mmap.
*/
if (iomap->flags & IOMAP_F_NEW || cow) {
/*
* Filesystem allows CoW on non-shared extents. The src extents
* may have been mmapped with dirty mark before. To be able to
* invalidate its dax entries, we need to clear the dirty mark
* in advance.
*/
if (cow)
__dax_clear_dirty_range(iomi->inode->i_mapping,
pos >> PAGE_SHIFT,
(end - 1) >> PAGE_SHIFT);
invalidate_inode_pages2_range(iomi->inode->i_mapping,
pos >> PAGE_SHIFT,
(end - 1) >> PAGE_SHIFT);

View File

@ -978,6 +978,16 @@ restart:
continue;
}
/*
* If wb_tryget fails, the wb has been shutdown, skip it.
*
* Pin @wb so that it stays on @bdi->wb_list. This allows
* continuing iteration from @wb after dropping and
* regrabbing rcu read lock.
*/
if (!wb_tryget(wb))
continue;
/* alloc failed, execute synchronously using on-stack fallback */
work = &fallback_work;
*work = *base_work;
@ -986,13 +996,6 @@ restart:
work->done = &fallback_work_done;
wb_queue_work(wb, work);
/*
* Pin @wb so that it stays on @bdi->wb_list. This allows
* continuing iteration from @wb after dropping and
* regrabbing rcu read lock.
*/
wb_get(wb);
last_wb = wb;
rcu_read_unlock();

View File

@ -2219,6 +2219,7 @@ static int nilfs_btree_assign_p(struct nilfs_bmap *btree,
/* on-disk format */
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = level;
memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}

View File

@ -314,6 +314,7 @@ static int nilfs_direct_assign_p(struct nilfs_bmap *direct,
binfo->bi_dat.bi_blkoff = cpu_to_le64(key);
binfo->bi_dat.bi_level = 0;
memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad));
return 0;
}

View File

@ -2609,11 +2609,10 @@ static int nilfs_segctor_thread(void *arg)
goto loop;
end_thread:
spin_unlock(&sci->sc_state_lock);
/* end sync. */
sci->sc_task = NULL;
wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */
spin_unlock(&sci->sc_state_lock);
return 0;
}

View File

@ -482,6 +482,7 @@ static void nilfs_put_super(struct super_block *sb)
up_write(&nilfs->ns_sem);
}
nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);
@ -1105,6 +1106,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent)
nilfs_put_root(fsroot);
failed_unload:
nilfs_sysfs_delete_device_group(nilfs);
iput(nilfs->ns_sufile);
iput(nilfs->ns_cpfile);
iput(nilfs->ns_dat);

View File

@ -87,7 +87,6 @@ void destroy_nilfs(struct the_nilfs *nilfs)
{
might_sleep();
if (nilfs_init(nilfs)) {
nilfs_sysfs_delete_device_group(nilfs);
brelse(nilfs->ns_sbh[0]);
brelse(nilfs->ns_sbh[1]);
}
@ -305,6 +304,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
}
err = nilfs_sysfs_create_device_group(sb);
if (unlikely(err))
goto sysfs_error;
if (valid_fs)
goto skip_recovery;
@ -366,6 +369,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb)
goto failed;
failed_unload:
nilfs_sysfs_delete_device_group(nilfs);
sysfs_error:
iput(nilfs->ns_cpfile);
iput(nilfs->ns_sufile);
iput(nilfs->ns_dat);
@ -697,10 +703,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data)
if (err)
goto failed_sbh;
err = nilfs_sysfs_create_device_group(sb);
if (err)
goto failed_sbh;
set_nilfs_init(nilfs);
err = 0;
out:

View File

@ -1977,8 +1977,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
ret = -EFAULT;
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
goto out;
/* Ignore unsupported features (userspace built against newer kernel) */
features = uffdio_api.features & UFFD_API_FEATURES;
features = uffdio_api.features;
ret = -EINVAL;
if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
goto err_out;
ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
goto err_out;

View File

@ -800,7 +800,8 @@ struct mm_struct {
unsigned long cpu_bitmap[];
};
#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
#define MM_MT_FLAGS (MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | \
MT_FLAGS_USE_RCU)
extern struct mm_struct init_mm;
/* Pointer magic because the dynamic array size confuses some compilers. */

View File

@ -683,6 +683,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
if (retval)
goto out;
mt_clear_in_rcu(vmi.mas.tree);
for_each_vma(old_vmi, mpnt) {
struct file *file;
@ -766,6 +767,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
retval = arch_dup_mmap(oldmm, mm);
loop_out:
vma_iter_free(&vmi);
if (!retval)
mt_set_in_rcu(vmi.mas.tree);
out:
mmap_write_unlock(mm);
flush_tlb_mm(oldmm);

View File

@ -185,7 +185,7 @@ static void mt_free_rcu(struct rcu_head *head)
*/
static void ma_free_rcu(struct maple_node *node)
{
node->parent = ma_parent_ptr(node);
WARN_ON(node->parent != ma_parent_ptr(node));
call_rcu(&node->rcu, mt_free_rcu);
}
@ -539,11 +539,14 @@ static inline struct maple_node *mte_parent(const struct maple_enode *enode)
*/
static inline bool ma_dead_node(const struct maple_node *node)
{
struct maple_node *parent = (void *)((unsigned long)
node->parent & ~MAPLE_NODE_MASK);
struct maple_node *parent;
/* Do not reorder reads from the node prior to the parent check */
smp_rmb();
parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK);
return (parent == node);
}
/*
* mte_dead_node() - check if the @enode is dead.
* @enode: The encoded maple node
@ -555,6 +558,8 @@ static inline bool mte_dead_node(const struct maple_enode *enode)
struct maple_node *parent, *node;
node = mte_to_node(enode);
/* Do not reorder reads from the node prior to the parent check */
smp_rmb();
parent = mte_parent(enode);
return (parent == node);
}
@ -625,6 +630,8 @@ static inline unsigned int mas_alloc_req(const struct ma_state *mas)
* @node - the maple node
* @type - the node type
*
* In the event of a dead node, this array may be %NULL
*
* Return: A pointer to the maple node pivots
*/
static inline unsigned long *ma_pivots(struct maple_node *node,
@ -817,6 +824,11 @@ static inline void *mt_slot(const struct maple_tree *mt,
return rcu_dereference_check(slots[offset], mt_locked(mt));
}
static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
unsigned char offset)
{
return rcu_dereference_protected(slots[offset], mt_locked(mt));
}
/*
* mas_slot_locked() - Get the slot value when holding the maple tree lock.
* @mas: The maple state
@ -828,7 +840,7 @@ static inline void *mt_slot(const struct maple_tree *mt,
static inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots,
unsigned char offset)
{
return rcu_dereference_protected(slots[offset], mt_locked(mas->tree));
return mt_slot_locked(mas->tree, slots, offset);
}
/*
@ -899,6 +911,45 @@ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt,
meta->end = end;
}
/*
* mt_clear_meta() - clear the metadata information of a node, if it exists
* @mt: The maple tree
* @mn: The maple node
* @type: The maple node type
* @offset: The offset of the highest sub-gap in this node.
* @end: The end of the data in this node.
*/
static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn,
enum maple_type type)
{
struct maple_metadata *meta;
unsigned long *pivots;
void __rcu **slots;
void *next;
switch (type) {
case maple_range_64:
pivots = mn->mr64.pivot;
if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) {
slots = mn->mr64.slot;
next = mt_slot_locked(mt, slots,
MAPLE_RANGE64_SLOTS - 1);
if (unlikely((mte_to_node(next) &&
mte_node_type(next))))
return; /* no metadata, could be node */
}
fallthrough;
case maple_arange_64:
meta = ma_meta(mn, type);
break;
default:
return;
}
meta->gap = 0;
meta->end = 0;
}
/*
* ma_meta_end() - Get the data end of a node from the metadata
* @mn: The maple node
@ -1096,8 +1147,11 @@ static int mas_ascend(struct ma_state *mas)
a_type = mas_parent_enum(mas, p_enode);
a_node = mte_parent(p_enode);
a_slot = mte_parent_slot(p_enode);
pivots = ma_pivots(a_node, a_type);
a_enode = mt_mk_node(a_node, a_type);
pivots = ma_pivots(a_node, a_type);
if (unlikely(ma_dead_node(a_node)))
return 1;
if (!set_min && a_slot) {
set_min = true;
@ -1249,26 +1303,21 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
node = mas->alloc;
node->request_count = 0;
while (requested) {
max_req = MAPLE_ALLOC_SLOTS;
if (node->node_count) {
unsigned int offset = node->node_count;
slots = (void **)&node->slot[offset];
max_req -= offset;
} else {
slots = (void **)&node->slot;
}
max_req = MAPLE_ALLOC_SLOTS - node->node_count;
slots = (void **)&node->slot[node->node_count];
max_req = min(requested, max_req);
count = mt_alloc_bulk(gfp, max_req, slots);
if (!count)
goto nomem_bulk;
if (node->node_count == 0) {
node->slot[0]->node_count = 0;
node->slot[0]->request_count = 0;
}
node->node_count += count;
allocated += count;
node = node->slot[0];
node->node_count = 0;
node->request_count = 0;
requested -= count;
}
mas->alloc->total = allocated;
@ -1354,12 +1403,16 @@ static inline struct maple_enode *mas_start(struct ma_state *mas)
mas->max = ULONG_MAX;
mas->depth = 0;
retry:
root = mas_root(mas);
/* Tree with nodes */
if (likely(xa_is_node(root))) {
mas->depth = 1;
mas->node = mte_safe_root(root);
mas->offset = 0;
if (mte_dead_node(mas->node))
goto retry;
return NULL;
}
@ -1401,6 +1454,9 @@ static inline unsigned char ma_data_end(struct maple_node *node,
{
unsigned char offset;
if (!pivots)
return 0;
if (type == maple_arange_64)
return ma_meta_end(node, type);
@ -1436,6 +1492,9 @@ static inline unsigned char mas_data_end(struct ma_state *mas)
return ma_meta_end(node, type);
pivots = ma_pivots(node, type);
if (unlikely(ma_dead_node(node)))
return 0;
offset = mt_pivots[type] - 1;
if (likely(!pivots[offset]))
return ma_meta_end(node, type);
@ -1724,9 +1783,11 @@ static inline void mas_replace(struct ma_state *mas, bool advanced)
rcu_assign_pointer(slots[offset], mas->node);
}
if (!advanced)
if (!advanced) {
mte_set_node_dead(old_enode);
mas_free(mas, old_enode);
}
}
/*
* mas_new_child() - Find the new child of a node.
@ -3659,10 +3720,9 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
slot++;
mas->depth = 1;
mas_set_height(mas);
ma_set_meta(node, maple_leaf_64, 0, slot);
/* swap the new root into the tree */
rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node));
ma_set_meta(node, maple_leaf_64, 0, slot);
return slot;
}
@ -3875,18 +3935,13 @@ static inline void *mtree_lookup_walk(struct ma_state *mas)
end = ma_data_end(node, type, pivots, max);
if (unlikely(ma_dead_node(node)))
goto dead_node;
if (pivots[offset] >= mas->index)
goto next;
do {
offset++;
} while ((offset < end) && (pivots[offset] < mas->index));
if (likely(offset > end))
if (pivots[offset] >= mas->index) {
max = pivots[offset];
break;
}
} while (++offset < end);
next:
slots = ma_slots(node, type);
next = mt_slot(mas->tree, slots, offset);
if (unlikely(ma_dead_node(node)))
@ -4164,6 +4219,7 @@ static inline bool mas_wr_node_store(struct ma_wr_state *wr_mas)
done:
mas_leaf_set_meta(mas, newnode, dst_pivots, maple_leaf_64, new_end);
if (in_rcu) {
mte_set_node_dead(mas->node);
mas->node = mt_mk_node(newnode, wr_mas->type);
mas_replace(mas, false);
} else {
@ -4505,6 +4561,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
node = mas_mn(mas);
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
if (unlikely(ma_dead_node(node)))
return 1;
mas->max = pivots[offset];
if (offset)
mas->min = pivots[offset - 1] + 1;
@ -4526,6 +4585,9 @@ static inline int mas_prev_node(struct ma_state *mas, unsigned long min)
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
offset = ma_data_end(node, mt, pivots, mas->max);
if (unlikely(ma_dead_node(node)))
return 1;
if (offset)
mas->min = pivots[offset - 1] + 1;
@ -4574,6 +4636,7 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
struct maple_enode *enode;
int level = 0;
unsigned char offset;
unsigned char node_end;
enum maple_type mt;
void __rcu **slots;
@ -4597,7 +4660,11 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
node = mas_mn(mas);
mt = mte_node_type(mas->node);
pivots = ma_pivots(node, mt);
} while (unlikely(offset == ma_data_end(node, mt, pivots, mas->max)));
node_end = ma_data_end(node, mt, pivots, mas->max);
if (unlikely(ma_dead_node(node)))
return 1;
} while (unlikely(offset == node_end));
slots = ma_slots(node, mt);
pivot = mas_safe_pivot(mas, pivots, ++offset, mt);
@ -4613,6 +4680,9 @@ static inline int mas_next_node(struct ma_state *mas, struct maple_node *node,
mt = mte_node_type(mas->node);
slots = ma_slots(node, mt);
pivots = ma_pivots(node, mt);
if (unlikely(ma_dead_node(node)))
return 1;
offset = 0;
pivot = pivots[0];
}
@ -4659,11 +4729,14 @@ static inline void *mas_next_nentry(struct ma_state *mas,
return NULL;
}
pivots = ma_pivots(node, type);
slots = ma_slots(node, type);
mas->index = mas_safe_min(mas, pivots, mas->offset);
pivots = ma_pivots(node, type);
count = ma_data_end(node, type, pivots, mas->max);
if (ma_dead_node(node))
if (unlikely(ma_dead_node(node)))
return NULL;
mas->index = mas_safe_min(mas, pivots, mas->offset);
if (unlikely(ma_dead_node(node)))
return NULL;
if (mas->index > max)
@ -4817,6 +4890,11 @@ retry:
slots = ma_slots(mn, mt);
pivots = ma_pivots(mn, mt);
if (unlikely(ma_dead_node(mn))) {
mas_rewalk(mas, index);
goto retry;
}
if (offset == mt_pivots[mt])
pivot = mas->max;
else
@ -5400,24 +5478,26 @@ no_gap:
}
/*
* mas_dead_leaves() - Mark all leaves of a node as dead.
* mte_dead_leaves() - Mark all leaves of a node as dead.
* @mas: The maple state
* @slots: Pointer to the slot array
* @type: The maple node type
*
* Must hold the write lock.
*
* Return: The number of leaves marked as dead.
*/
static inline
unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
unsigned char mte_dead_leaves(struct maple_enode *enode, struct maple_tree *mt,
void __rcu **slots)
{
struct maple_node *node;
enum maple_type type;
void *entry;
int offset;
for (offset = 0; offset < mt_slot_count(mas->node); offset++) {
entry = mas_slot_locked(mas, slots, offset);
for (offset = 0; offset < mt_slot_count(enode); offset++) {
entry = mt_slot(mt, slots, offset);
type = mte_node_type(entry);
node = mte_to_node(entry);
/* Use both node and type to catch LE & BE metadata */
@ -5425,7 +5505,6 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
break;
mte_set_node_dead(entry);
smp_wmb(); /* Needed for RCU */
node->type = type;
rcu_assign_pointer(slots[offset], node);
}
@ -5433,151 +5512,160 @@ unsigned char mas_dead_leaves(struct ma_state *mas, void __rcu **slots)
return offset;
}
static void __rcu **mas_dead_walk(struct ma_state *mas, unsigned char offset)
/**
* mte_dead_walk() - Walk down a dead tree to just before the leaves
* @enode: The maple encoded node
* @offset: The starting offset
*
* Note: This can only be used from the RCU callback context.
*/
static void __rcu **mte_dead_walk(struct maple_enode **enode, unsigned char offset)
{
struct maple_node *node, *next;
void __rcu **slots = NULL;
next = mas_mn(mas);
next = mte_to_node(*enode);
do {
mas->node = ma_enode_ptr(next);
node = mas_mn(mas);
*enode = ma_enode_ptr(next);
node = mte_to_node(*enode);
slots = ma_slots(node, node->type);
next = mas_slot_locked(mas, slots, offset);
next = rcu_dereference_protected(slots[offset],
lock_is_held(&rcu_callback_map));
offset = 0;
} while (!ma_is_leaf(next->type));
return slots;
}
/**
* mt_free_walk() - Walk & free a tree in the RCU callback context
* @head: The RCU head that's within the node.
*
* Note: This can only be used from the RCU callback context.
*/
static void mt_free_walk(struct rcu_head *head)
{
void __rcu **slots;
struct maple_node *node, *start;
struct maple_tree mt;
struct maple_enode *enode;
unsigned char offset;
enum maple_type type;
MA_STATE(mas, &mt, 0, 0);
node = container_of(head, struct maple_node, rcu);
if (ma_is_leaf(node->type))
goto free_leaf;
mt_init_flags(&mt, node->ma_flags);
mas_lock(&mas);
start = node;
mas.node = mt_mk_node(node, node->type);
slots = mas_dead_walk(&mas, 0);
node = mas_mn(&mas);
enode = mt_mk_node(node, node->type);
slots = mte_dead_walk(&enode, 0);
node = mte_to_node(enode);
do {
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
mas.node = node->piv_parent;
if (mas_mn(&mas) == node)
goto start_slots_free;
enode = node->piv_parent;
if (mte_to_node(enode) == node)
goto free_leaf;
type = mte_node_type(mas.node);
slots = ma_slots(mte_to_node(mas.node), type);
if ((offset < mt_slots[type]) && (slots[offset]))
slots = mas_dead_walk(&mas, offset);
node = mas_mn(&mas);
type = mte_node_type(enode);
slots = ma_slots(mte_to_node(enode), type);
if ((offset < mt_slots[type]) &&
rcu_dereference_protected(slots[offset],
lock_is_held(&rcu_callback_map)))
slots = mte_dead_walk(&enode, offset);
node = mte_to_node(enode);
} while ((node != start) || (node->slot_len < offset));
slots = ma_slots(node, node->type);
mt_free_bulk(node->slot_len, slots);
start_slots_free:
mas_unlock(&mas);
free_leaf:
mt_free_rcu(&node->rcu);
}
static inline void __rcu **mas_destroy_descend(struct ma_state *mas,
struct maple_enode *prev, unsigned char offset)
static inline void __rcu **mte_destroy_descend(struct maple_enode **enode,
struct maple_tree *mt, struct maple_enode *prev, unsigned char offset)
{
struct maple_node *node;
struct maple_enode *next = mas->node;
struct maple_enode *next = *enode;
void __rcu **slots = NULL;
enum maple_type type;
unsigned char next_offset = 0;
do {
mas->node = next;
node = mas_mn(mas);
slots = ma_slots(node, mte_node_type(mas->node));
next = mas_slot_locked(mas, slots, 0);
*enode = next;
node = mte_to_node(*enode);
type = mte_node_type(*enode);
slots = ma_slots(node, type);
next = mt_slot_locked(mt, slots, next_offset);
if ((mte_dead_node(next)))
next = mas_slot_locked(mas, slots, 1);
next = mt_slot_locked(mt, slots, ++next_offset);
mte_set_node_dead(mas->node);
node->type = mte_node_type(mas->node);
mte_set_node_dead(*enode);
node->type = type;
node->piv_parent = prev;
node->parent_slot = offset;
offset = 0;
prev = mas->node;
offset = next_offset;
next_offset = 0;
prev = *enode;
} while (!mte_is_leaf(next));
return slots;
}
static void mt_destroy_walk(struct maple_enode *enode, unsigned char ma_flags,
static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt,
bool free)
{
void __rcu **slots;
struct maple_node *node = mte_to_node(enode);
struct maple_enode *start;
struct maple_tree mt;
MA_STATE(mas, &mt, 0, 0);
if (mte_is_leaf(enode))
if (mte_is_leaf(enode)) {
node->type = mte_node_type(enode);
goto free_leaf;
}
mt_init_flags(&mt, ma_flags);
mas_lock(&mas);
mas.node = start = enode;
slots = mas_destroy_descend(&mas, start, 0);
node = mas_mn(&mas);
start = enode;
slots = mte_destroy_descend(&enode, mt, start, 0);
node = mte_to_node(enode); // Updated in the above call.
do {
enum maple_type type;
unsigned char offset;
struct maple_enode *parent, *tmp;
node->slot_len = mas_dead_leaves(&mas, slots);
node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
offset = node->parent_slot + 1;
mas.node = node->piv_parent;
if (mas_mn(&mas) == node)
goto start_slots_free;
enode = node->piv_parent;
if (mte_to_node(enode) == node)
goto free_leaf;
type = mte_node_type(mas.node);
slots = ma_slots(mte_to_node(mas.node), type);
type = mte_node_type(enode);
slots = ma_slots(mte_to_node(enode), type);
if (offset >= mt_slots[type])
goto next;
tmp = mas_slot_locked(&mas, slots, offset);
tmp = mt_slot_locked(mt, slots, offset);
if (mte_node_type(tmp) && mte_to_node(tmp)) {
parent = mas.node;
mas.node = tmp;
slots = mas_destroy_descend(&mas, parent, offset);
parent = enode;
enode = tmp;
slots = mte_destroy_descend(&enode, mt, parent, offset);
}
next:
node = mas_mn(&mas);
} while (start != mas.node);
node = mte_to_node(enode);
} while (start != enode);
node = mas_mn(&mas);
node->slot_len = mas_dead_leaves(&mas, slots);
node = mte_to_node(enode);
node->slot_len = mte_dead_leaves(enode, mt, slots);
if (free)
mt_free_bulk(node->slot_len, slots);
start_slots_free:
mas_unlock(&mas);
free_leaf:
if (free)
mt_free_rcu(&node->rcu);
else
mt_clear_meta(mt, node, node->type);
}
/*
@ -5593,10 +5681,10 @@ static inline void mte_destroy_walk(struct maple_enode *enode,
struct maple_node *node = mte_to_node(enode);
if (mt_in_rcu(mt)) {
mt_destroy_walk(enode, mt->ma_flags, false);
mt_destroy_walk(enode, mt, false);
call_rcu(&node->rcu, mt_free_walk);
} else {
mt_destroy_walk(enode, mt->ma_flags, true);
mt_destroy_walk(enode, mt, true);
}
}
@ -6618,11 +6706,11 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
while (likely(!ma_is_leaf(mt))) {
MT_BUG_ON(mas->tree, mte_dead_node(mas->node));
slots = ma_slots(mn, mt);
pivots = ma_pivots(mn, mt);
max = pivots[0];
entry = mas_slot(mas, slots, 0);
pivots = ma_pivots(mn, mt);
if (unlikely(ma_dead_node(mn)))
return NULL;
max = pivots[0];
mas->node = entry;
mn = mas_mn(mas);
mt = mte_node_type(mas->node);
@ -6642,13 +6730,13 @@ static inline void *mas_first_entry(struct ma_state *mas, struct maple_node *mn,
if (likely(entry))
return entry;
pivots = ma_pivots(mn, mt);
mas->index = pivots[0] + 1;
mas->offset = 1;
entry = mas_slot(mas, slots, 1);
pivots = ma_pivots(mn, mt);
if (unlikely(ma_dead_node(mn)))
return NULL;
mas->index = pivots[0] + 1;
if (mas->index > limit)
goto none;

View File

@ -507,6 +507,15 @@ static LIST_HEAD(offline_cgwbs);
static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
static void cgwb_free_rcu(struct rcu_head *rcu_head)
{
struct bdi_writeback *wb = container_of(rcu_head,
struct bdi_writeback, rcu);
percpu_ref_exit(&wb->refcnt);
kfree(wb);
}
static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
@ -529,11 +538,10 @@ static void cgwb_release_workfn(struct work_struct *work)
list_del(&wb->offline_node);
spin_unlock_irq(&cgwb_lock);
percpu_ref_exit(&wb->refcnt);
wb_exit(wb);
bdi_put(bdi);
WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu);
call_rcu(&wb->rcu, cgwb_free_rcu);
}
static void cgwb_release(struct percpu_ref *refcnt)

View File

@ -1830,10 +1830,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (is_swap_pmd(*pmd)) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
struct page *page = pfn_swap_entry_to_page(entry);
pmd_t newpmd;
VM_BUG_ON(!is_pmd_migration_entry(*pmd));
if (is_writable_migration_entry(entry)) {
pmd_t newpmd;
/*
* A protection check is difficult so
* just be safe and disable write
@ -1847,8 +1847,16 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
newpmd = pmd_swp_mksoft_dirty(newpmd);
if (pmd_swp_uffd_wp(*pmd))
newpmd = pmd_swp_mkuffd_wp(newpmd);
set_pmd_at(mm, addr, pmd, newpmd);
} else {
newpmd = *pmd;
}
if (uffd_wp)
newpmd = pmd_swp_mkuffd_wp(newpmd);
else if (uffd_wp_resolve)
newpmd = pmd_swp_clear_uffd_wp(newpmd);
if (!pmd_same(*pmd, newpmd))
set_pmd_at(mm, addr, pmd, newpmd);
goto unlock;
}
#endif
@ -2649,9 +2657,10 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
is_hzp = is_huge_zero_page(&folio->page);
VM_WARN_ON_ONCE_FOLIO(is_hzp, folio);
if (is_hzp)
if (is_hzp) {
pr_warn_ratelimited("Called split_huge_page for huge zero page\n");
return -EBUSY;
}
if (folio_test_writeback(folio))
return -EBUSY;
@ -3242,6 +3251,8 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
pmdswp = swp_entry_to_pmd(entry);
if (pmd_soft_dirty(pmdval))
pmdswp = pmd_swp_mksoft_dirty(pmdswp);
if (pmd_uffd_wp(pmdval))
pmdswp = pmd_swp_mkuffd_wp(pmdswp);
set_pmd_at(mm, address, pvmw->pmd, pmdswp);
page_remove_rmap(page, vma, true);
put_page(page);

View File

@ -5478,7 +5478,7 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
struct folio *pagecache_folio, spinlock_t *ptl)
{
const bool unshare = flags & FAULT_FLAG_UNSHARE;
pte_t pte;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(vma);
struct page *old_page;
struct folio *new_folio;
@ -5487,6 +5487,17 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long haddr = address & huge_page_mask(h);
struct mmu_notifier_range range;
/*
* Never handle CoW for uffd-wp protected pages. It should be only
* handled when the uffd-wp protection is removed.
*
* Note that only the CoW optimization path (in hugetlb_no_page())
* can trigger this, because hugetlb_fault() will always resolve
* uffd-wp bit first.
*/
if (!unshare && huge_pte_uffd_wp(pte))
return 0;
/*
* hugetlb does not support FOLL_FORCE-style write faults that keep the
* PTE mapped R/O such as maybe_mkwrite() would do.
@ -5500,7 +5511,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
return 0;
}
pte = huge_ptep_get(ptep);
old_page = pte_page(pte);
delayacct_wpcopy_start();

View File

@ -572,6 +572,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
result = SCAN_PTE_NON_PRESENT;
goto out;
}
if (pte_uffd_wp(pteval)) {
result = SCAN_PTE_UFFD_WP;
goto out;
}
page = vm_normal_page(vma, address, pteval);
if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL;

View File

@ -3569,8 +3569,21 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct mmu_notifier_range range;
if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags))
/*
* We need a reference to lock the folio because we don't hold
* the PTL so a racing thread can remove the device-exclusive
* entry and unmap it. If the folio is free the entry must
* have been removed already. If it happens to have already
* been re-allocated after being freed all we do is lock and
* unlock it.
*/
if (!folio_try_get(folio))
return 0;
if (!folio_lock_or_retry(folio, vma->vm_mm, vmf->flags)) {
folio_put(folio);
return VM_FAULT_RETRY;
}
mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0,
vma->vm_mm, vmf->address & PAGE_MASK,
(vmf->address & PAGE_MASK) + PAGE_SIZE, NULL);
@ -3583,6 +3596,7 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
folio_unlock(folio);
folio_put(folio);
mmu_notifier_invalidate_range_end(&range);
return 0;

View File

@ -790,63 +790,52 @@ static int vma_replace_policy(struct vm_area_struct *vma,
return err;
}
/* Step 2: apply policy to a range and do splits. */
static int mbind_range(struct mm_struct *mm, unsigned long start,
/* Split or merge the VMA (if required) and apply the new policy */
static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct **prev, unsigned long start,
unsigned long end, struct mempolicy *new_pol)
{
VMA_ITERATOR(vmi, mm, start);
struct vm_area_struct *prev;
struct vm_area_struct *vma;
int err = 0;
struct vm_area_struct *merged;
unsigned long vmstart, vmend;
pgoff_t pgoff;
int err;
prev = vma_prev(&vmi);
vma = vma_find(&vmi, end);
if (WARN_ON(!vma))
return 0;
if (start > vma->vm_start)
prev = vma;
do {
unsigned long vmstart = max(start, vma->vm_start);
unsigned long vmend = min(end, vma->vm_end);
vmend = min(end, vma->vm_end);
if (start > vma->vm_start) {
*prev = vma;
vmstart = start;
} else {
vmstart = vma->vm_start;
}
if (mpol_equal(vma_policy(vma), new_pol))
goto next;
return 0;
pgoff = vma->vm_pgoff +
((vmstart - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff,
new_pol, vma->vm_userfaultfd_ctx,
anon_vma_name(vma));
if (prev) {
vma = prev;
goto replace;
pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
merged = vma_merge(vmi, vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, new_pol,
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (merged) {
*prev = merged;
return vma_replace_policy(merged, new_pol);
}
if (vma->vm_start != vmstart) {
err = split_vma(&vmi, vma, vmstart, 1);
err = split_vma(vmi, vma, vmstart, 1);
if (err)
goto out;
}
if (vma->vm_end != vmend) {
err = split_vma(&vmi, vma, vmend, 0);
if (err)
goto out;
}
replace:
err = vma_replace_policy(vma, new_pol);
if (err)
goto out;
next:
prev = vma;
} for_each_vma_range(vmi, vma, end);
out:
return err;
}
if (vma->vm_end != vmend) {
err = split_vma(vmi, vma, vmend, 0);
if (err)
return err;
}
*prev = vma;
return vma_replace_policy(vma, new_pol);
}
/* Set the process memory policy */
static long do_set_mempolicy(unsigned short mode, unsigned short flags,
nodemask_t *nodes)
@ -1259,6 +1248,8 @@ static long do_mbind(unsigned long start, unsigned long len,
nodemask_t *nmask, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct vma_iterator vmi;
struct mempolicy *new;
unsigned long end;
int err;
@ -1328,7 +1319,13 @@ static long do_mbind(unsigned long start, unsigned long len,
goto up_out;
}
err = mbind_range(mm, start, end, new);
vma_iter_init(&vmi, mm, start);
prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) {
err = mbind_range(&vmi, vma, &prev, start, end, new);
if (err)
break;
}
if (!err) {
int nr_failed = 0;
@ -1489,10 +1486,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
unsigned long, home_node, unsigned long, flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_area_struct *vma, *prev;
struct mempolicy *new, *old;
unsigned long vmstart;
unsigned long vmend;
unsigned long end;
int err = -ENOENT;
VMA_ITERATOR(vmi, mm, start);
@ -1521,6 +1516,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
if (end == start)
return 0;
mmap_write_lock(mm);
prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) {
/*
* If any vma in the range got policy other than MPOL_BIND
@ -1541,9 +1537,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
}
new->home_node = home_node;
vmstart = max(start, vma->vm_start);
vmend = min(end, vma->vm_end);
err = mbind_range(mm, vmstart, vmend, new);
err = mbind_range(&vmi, vma, &prev, start, end, new);
mpol_put(new);
if (err)
break;

View File

@ -2309,7 +2309,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
int count = 0;
int error = -ENOMEM;
MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, MT_FLAGS_LOCK_EXTERN);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
mt_set_external_lock(&mt_detach, &mm->mmap_lock);
/*
@ -3069,6 +3069,7 @@ void exit_mmap(struct mm_struct *mm)
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
mmap_write_lock(mm);
mt_clear_in_rcu(&mm->mm_mt);
free_pgtables(&tlb, &mm->mm_mt, vma, FIRST_USER_ADDRESS,
USER_PGTABLES_CEILING, true);
tlb_finish_mmu(&tlb);

View File

@ -869,7 +869,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
}
tlb_finish_mmu(&tlb);
if (vma_iter_end(&vmi) < end)
if (!error && vma_iter_end(&vmi) < end)
error = -ENOMEM;
out:

View File

@ -222,7 +222,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
if (lruvec)
unlock_page_lruvec_irqrestore(lruvec, flags);
folios_put(fbatch->folios, folio_batch_count(fbatch));
folio_batch_init(fbatch);
folio_batch_reinit(fbatch);
}
static void folio_batch_add_and_move(struct folio_batch *fbatch,

View File

@ -679,6 +679,7 @@ static void __del_from_avail_list(struct swap_info_struct *p)
{
int nid;
assert_spin_locked(&p->lock);
for_each_node(nid)
plist_del(&p->avail_lists[nid], &swap_avail_heads[nid]);
}
@ -2434,8 +2435,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
spin_unlock(&swap_lock);
goto out_dput;
}
del_from_avail_list(p);
spin_lock(&p->lock);
del_from_avail_list(p);
if (p->prio < 0) {
struct swap_info_struct *si = p;
int nid;

View File

@ -3042,6 +3042,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
* allocation request, free them via vfree() if any.
*/
if (area->nr_pages != nr_small_pages) {
/* vm_area_alloc_pages() can also fail due to a fatal signal */
if (!fatal_signal_pending(current))
warn_alloc(gfp_mask, NULL,
"vmalloc error: size %lu, page order %u, failed to allocate pages",
area->nr_pages * PAGE_SIZE, page_order);

View File

@ -857,7 +857,7 @@ int main(int argc, char **argv)
if (cull & CULL_PID || filter & FILTER_PID)
fprintf(fout, ", PID %d", list[i].pid);
if (cull & CULL_TGID || filter & FILTER_TGID)
fprintf(fout, ", TGID %d", list[i].pid);
fprintf(fout, ", TGID %d", list[i].tgid);
if (cull & CULL_COMM || filter & FILTER_COMM)
fprintf(fout, ", task_comm_name: %s", list[i].comm);
if (cull & CULL_ALLOCATOR) {

View File

@ -108,6 +108,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mn->slot[1] != NULL);
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas.node = MAS_START;
mas_nomem(&mas, GFP_KERNEL);
@ -160,6 +161,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas_allocated(&mas) != i);
MT_BUG_ON(mt, !mn);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
@ -192,6 +194,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != i - 1);
MT_BUG_ON(mt, !mn);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
@ -210,6 +213,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
MT_BUG_ON(mt, mas_allocated(&mas) != j - 1);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@ -233,6 +237,7 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas_allocated(&mas) != i - j);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_allocated(&mas) != i - j - 1);
}
@ -269,6 +274,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mn = mas_pop_node(&mas); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@ -294,6 +300,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mn = mas_pop_node(&mas2); /* get the next node. */
MT_BUG_ON(mt, mn == NULL);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas2) != 0);
@ -334,10 +341,12 @@ static noinline void check_new_node(struct maple_tree *mt)
MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 2);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
for (i = 1; i <= MAPLE_ALLOC_SLOTS + 1; i++) {
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, not_empty(mn));
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
}
MT_BUG_ON(mt, mas_allocated(&mas) != 0);
@ -375,6 +384,7 @@ static noinline void check_new_node(struct maple_tree *mt)
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas_destroy(&mas);
@ -382,10 +392,13 @@ static noinline void check_new_node(struct maple_tree *mt)
mas_node_count(&mas, i); /* Request */
mas_nomem(&mas, GFP_KERNEL); /* Fill request */
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mn = mas_pop_node(&mas); /* get the next node. */
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
mas_destroy(&mas);
}
@ -35369,6 +35382,7 @@ static noinline void check_prealloc(struct maple_tree *mt)
MT_BUG_ON(mt, allocated != 1 + height * 3);
mn = mas_pop_node(&mas);
MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
mas_destroy(&mas);
@ -35386,6 +35400,7 @@ static noinline void check_prealloc(struct maple_tree *mt)
mas_destroy(&mas);
allocated = mas_allocated(&mas);
MT_BUG_ON(mt, allocated != 0);
mn->parent = ma_parent_ptr(mn);
ma_free_rcu(mn);
MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0);
@ -35756,6 +35771,7 @@ void farmer_tests(void)
tree.ma_root = mt_mk_node(node, maple_leaf_64);
mt_dump(&tree);
node->parent = ma_parent_ptr(node);
ma_free_rcu(node);
/* Check things that will make lockdep angry */