mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2024-12-29 09:13:38 +00:00
21 hotfixes. 12 are cc:stable and the remainder pertain to post-6.7
issues or aren't considered to be needed in earlier kernel versions. -----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZcfLvgAKCRDdBJ7gKXxA joCTAP4/XdBXA7Sj3GyjSAkYjg2U0quwX9oRhsx2Qy9duPDaLAD+NRl9XG14YSOB f/7OiTQoDfnwVgHAOVBHY/ylrcgZRQg= =2wdS -----END PGP SIGNATURE----- Merge tag 'mm-hotfixes-stable-2024-02-10-11-16' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm Pull misc fixes from Andrew Morton: "21 hotfixes. 12 are cc:stable and the remainder pertain to post-6.7 issues or aren't considered to be needed in earlier kernel versions" * tag 'mm-hotfixes-stable-2024-02-10-11-16' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (21 commits) nilfs2: fix potential bug in end_buffer_async_write mm/damon/sysfs-schemes: fix wrong DAMOS tried regions update timeout setup nilfs2: fix hang in nilfs_lookup_dirty_data_buffers() MAINTAINERS: Leo Yan has moved mm/zswap: don't return LRU_SKIP if we have dropped lru lock fs,hugetlb: fix NULL pointer dereference in hugetlbs_fill_super mailmap: switch email address for John Moon mm: zswap: fix objcg use-after-free in entry destruction mm/madvise: don't forget to leave lazy MMU mode in madvise_cold_or_pageout_pte_range() arch/arm/mm: fix major fault accounting when retrying under per-VMA lock selftests: core: include linux/close_range.h for CLOSE_RANGE_* macros mm/memory-failure: fix crash in split_huge_page_to_list from soft_offline_page mm: memcg: optimize parent iteration in memcg_rstat_updated() nilfs2: fix data corruption in dsync block recovery for small block sizes mm/userfaultfd: UFFDIO_MOVE implementation should use ptep_get() exit: wait_task_zombie: kill the no longer necessary spin_lock_irq(siglock) fs/proc: do_task_stat: use sig->stats_lock to gather the threads/children stats fs/proc: do_task_stat: move thread_group_cputime_adjusted() outside of lock_task_sighand() getrusage: use sig->stats_lock rather than lock_task_sighand() getrusage: move thread_group_cputime_adjusted() outside of lock_task_sighand() ...
This commit is contained in:
commit
7521f258ea
2
.mailmap
2
.mailmap
@ -289,6 +289,7 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
|
||||
John Crispin <john@phrozen.org> <blogic@openwrt.org>
|
||||
John Fastabend <john.fastabend@gmail.com> <john.r.fastabend@intel.com>
|
||||
John Keeping <john@keeping.me.uk> <john@metanate.com>
|
||||
John Moon <john@jmoon.dev> <quic_johmoo@quicinc.com>
|
||||
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
|
||||
John Stultz <johnstul@us.ibm.com>
|
||||
<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
|
||||
@ -344,6 +345,7 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com>
|
||||
Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
|
||||
Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
|
||||
Leon Romanovsky <leon@kernel.org> <leonro@nvidia.com>
|
||||
Leo Yan <leo.yan@linux.dev> <leo.yan@linaro.org>
|
||||
Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
|
||||
Linas Vepstas <linas@austin.ibm.com>
|
||||
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
|
||||
|
@ -17182,7 +17182,7 @@ R: John Garry <john.g.garry@oracle.com>
|
||||
R: Will Deacon <will@kernel.org>
|
||||
R: James Clark <james.clark@arm.com>
|
||||
R: Mike Leach <mike.leach@linaro.org>
|
||||
R: Leo Yan <leo.yan@linaro.org>
|
||||
R: Leo Yan <leo.yan@linux.dev>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Supported
|
||||
F: tools/build/feature/test-libopencsd.c
|
||||
|
@ -298,6 +298,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
||||
goto done;
|
||||
}
|
||||
count_vm_vma_lock_event(VMA_LOCK_RETRY);
|
||||
if (fault & VM_FAULT_MAJOR)
|
||||
flags |= FAULT_FLAG_TRIED;
|
||||
|
||||
/* Quick path to respond to signals */
|
||||
if (fault_signal_pending(fault, regs)) {
|
||||
|
@ -100,6 +100,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
loff_t len, vma_len;
|
||||
int ret;
|
||||
struct hstate *h = hstate_file(file);
|
||||
vm_flags_t vm_flags;
|
||||
|
||||
/*
|
||||
* vma address alignment (but not the pgoff alignment) has
|
||||
@ -141,10 +142,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
file_accessed(file);
|
||||
|
||||
ret = -ENOMEM;
|
||||
|
||||
vm_flags = vma->vm_flags;
|
||||
/*
|
||||
* for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
|
||||
* reserving here. Note: only for SHM hugetlbfs file, the inode
|
||||
* flag S_PRIVATE is set.
|
||||
*/
|
||||
if (inode->i_flags & S_PRIVATE)
|
||||
vm_flags |= VM_NORESERVE;
|
||||
|
||||
if (!hugetlb_reserve_pages(inode,
|
||||
vma->vm_pgoff >> huge_page_order(h),
|
||||
len >> huge_page_shift(h), vma,
|
||||
vma->vm_flags))
|
||||
vm_flags))
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
@ -1354,6 +1365,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
|
||||
{
|
||||
struct hugetlbfs_fs_context *ctx = fc->fs_private;
|
||||
struct fs_parse_result result;
|
||||
struct hstate *h;
|
||||
char *rest;
|
||||
unsigned long ps;
|
||||
int opt;
|
||||
@ -1398,11 +1410,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
|
||||
|
||||
case Opt_pagesize:
|
||||
ps = memparse(param->string, &rest);
|
||||
ctx->hstate = size_to_hstate(ps);
|
||||
if (!ctx->hstate) {
|
||||
h = size_to_hstate(ps);
|
||||
if (!h) {
|
||||
pr_err("Unsupported page size %lu MB\n", ps / SZ_1M);
|
||||
return -EINVAL;
|
||||
}
|
||||
ctx->hstate = h;
|
||||
return 0;
|
||||
|
||||
case Opt_min_size:
|
||||
|
@ -107,7 +107,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
|
||||
nilfs_transaction_commit(inode->i_sb);
|
||||
|
||||
mapped:
|
||||
folio_wait_stable(folio);
|
||||
/*
|
||||
* Since checksumming including data blocks is performed to determine
|
||||
* the validity of the log to be written and used for recovery, it is
|
||||
* necessary to wait for writeback to finish here, regardless of the
|
||||
* stable write requirement of the backing device.
|
||||
*/
|
||||
folio_wait_writeback(folio);
|
||||
out:
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return vmf_fs_error(ret);
|
||||
|
@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
|
||||
|
||||
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
|
||||
struct nilfs_recovery_block *rb,
|
||||
struct page *page)
|
||||
loff_t pos, struct page *page)
|
||||
{
|
||||
struct buffer_head *bh_org;
|
||||
size_t from = pos & ~PAGE_MASK;
|
||||
void *kaddr;
|
||||
|
||||
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
|
||||
@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
|
||||
return -EIO;
|
||||
|
||||
kaddr = kmap_atomic(page);
|
||||
memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
|
||||
memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
|
||||
kunmap_atomic(kaddr);
|
||||
brelse(bh_org);
|
||||
return 0;
|
||||
@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
|
||||
goto failed_inode;
|
||||
}
|
||||
|
||||
err = nilfs_recovery_copy_block(nilfs, rb, page);
|
||||
err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
|
||||
if (unlikely(err))
|
||||
goto failed_page;
|
||||
|
||||
|
@ -1703,7 +1703,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
|
||||
|
||||
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
|
||||
b_assoc_buffers) {
|
||||
set_buffer_async_write(bh);
|
||||
if (bh == segbuf->sb_super_root) {
|
||||
if (bh->b_folio != bd_folio) {
|
||||
folio_lock(bd_folio);
|
||||
@ -1714,6 +1713,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
|
||||
}
|
||||
break;
|
||||
}
|
||||
set_buffer_async_write(bh);
|
||||
if (bh->b_folio != fs_folio) {
|
||||
nilfs_begin_folio_io(fs_folio);
|
||||
fs_folio = bh->b_folio;
|
||||
@ -1800,7 +1800,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
|
||||
|
||||
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
|
||||
b_assoc_buffers) {
|
||||
clear_buffer_async_write(bh);
|
||||
if (bh == segbuf->sb_super_root) {
|
||||
clear_buffer_uptodate(bh);
|
||||
if (bh->b_folio != bd_folio) {
|
||||
@ -1809,6 +1808,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
|
||||
}
|
||||
break;
|
||||
}
|
||||
clear_buffer_async_write(bh);
|
||||
if (bh->b_folio != fs_folio) {
|
||||
nilfs_end_folio_io(fs_folio, err);
|
||||
fs_folio = bh->b_folio;
|
||||
@ -1896,8 +1896,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
|
||||
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
|
||||
BIT(BH_NILFS_Redirected));
|
||||
|
||||
set_mask_bits(&bh->b_state, clear_bits, set_bits);
|
||||
if (bh == segbuf->sb_super_root) {
|
||||
set_buffer_uptodate(bh);
|
||||
clear_buffer_dirty(bh);
|
||||
if (bh->b_folio != bd_folio) {
|
||||
folio_end_writeback(bd_folio);
|
||||
bd_folio = bh->b_folio;
|
||||
@ -1905,6 +1906,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
|
||||
update_sr = true;
|
||||
break;
|
||||
}
|
||||
set_mask_bits(&bh->b_state, clear_bits, set_bits);
|
||||
if (bh->b_folio != fs_folio) {
|
||||
nilfs_end_folio_io(fs_folio, 0);
|
||||
fs_folio = bh->b_folio;
|
||||
|
@ -477,13 +477,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
|
||||
int permitted;
|
||||
struct mm_struct *mm;
|
||||
unsigned long long start_time;
|
||||
unsigned long cmin_flt = 0, cmaj_flt = 0;
|
||||
unsigned long min_flt = 0, maj_flt = 0;
|
||||
u64 cutime, cstime, utime, stime;
|
||||
u64 cgtime, gtime;
|
||||
unsigned long cmin_flt, cmaj_flt, min_flt, maj_flt;
|
||||
u64 cutime, cstime, cgtime, utime, stime, gtime;
|
||||
unsigned long rsslim = 0;
|
||||
unsigned long flags;
|
||||
int exit_code = task->exit_code;
|
||||
struct signal_struct *sig = task->signal;
|
||||
unsigned int seq = 1;
|
||||
|
||||
state = *get_task_state(task);
|
||||
vsize = eip = esp = 0;
|
||||
@ -511,12 +511,8 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
|
||||
|
||||
sigemptyset(&sigign);
|
||||
sigemptyset(&sigcatch);
|
||||
cutime = cstime = utime = stime = 0;
|
||||
cgtime = gtime = 0;
|
||||
|
||||
if (lock_task_sighand(task, &flags)) {
|
||||
struct signal_struct *sig = task->signal;
|
||||
|
||||
if (sig->tty) {
|
||||
struct pid *pgrp = tty_get_pgrp(sig->tty);
|
||||
tty_pgrp = pid_nr_ns(pgrp, ns);
|
||||
@ -527,28 +523,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
|
||||
num_threads = get_nr_threads(task);
|
||||
collect_sigign_sigcatch(task, &sigign, &sigcatch);
|
||||
|
||||
cmin_flt = sig->cmin_flt;
|
||||
cmaj_flt = sig->cmaj_flt;
|
||||
cutime = sig->cutime;
|
||||
cstime = sig->cstime;
|
||||
cgtime = sig->cgtime;
|
||||
rsslim = READ_ONCE(sig->rlim[RLIMIT_RSS].rlim_cur);
|
||||
|
||||
/* add up live thread stats at the group level */
|
||||
if (whole) {
|
||||
struct task_struct *t;
|
||||
|
||||
__for_each_thread(sig, t) {
|
||||
min_flt += t->min_flt;
|
||||
maj_flt += t->maj_flt;
|
||||
gtime += task_gtime(t);
|
||||
}
|
||||
|
||||
min_flt += sig->min_flt;
|
||||
maj_flt += sig->maj_flt;
|
||||
thread_group_cputime_adjusted(task, &utime, &stime);
|
||||
gtime += sig->gtime;
|
||||
|
||||
if (sig->flags & (SIGNAL_GROUP_EXIT | SIGNAL_STOP_STOPPED))
|
||||
exit_code = sig->group_exit_code;
|
||||
}
|
||||
@ -562,10 +539,41 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
|
||||
|
||||
if (permitted && (!whole || num_threads < 2))
|
||||
wchan = !task_is_running(task);
|
||||
if (!whole) {
|
||||
|
||||
do {
|
||||
seq++; /* 2 on the 1st/lockless path, otherwise odd */
|
||||
flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
|
||||
|
||||
cmin_flt = sig->cmin_flt;
|
||||
cmaj_flt = sig->cmaj_flt;
|
||||
cutime = sig->cutime;
|
||||
cstime = sig->cstime;
|
||||
cgtime = sig->cgtime;
|
||||
|
||||
if (whole) {
|
||||
struct task_struct *t;
|
||||
|
||||
min_flt = sig->min_flt;
|
||||
maj_flt = sig->maj_flt;
|
||||
gtime = sig->gtime;
|
||||
|
||||
rcu_read_lock();
|
||||
__for_each_thread(sig, t) {
|
||||
min_flt += t->min_flt;
|
||||
maj_flt += t->maj_flt;
|
||||
gtime += task_gtime(t);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
} while (need_seqretry(&sig->stats_lock, seq));
|
||||
done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
|
||||
|
||||
if (whole) {
|
||||
thread_group_cputime_adjusted(task, &utime, &stime);
|
||||
} else {
|
||||
task_cputime_adjusted(task, &utime, &stime);
|
||||
min_flt = task->min_flt;
|
||||
maj_flt = task->maj_flt;
|
||||
task_cputime_adjusted(task, &utime, &stime);
|
||||
gtime = task_gtime(task);
|
||||
}
|
||||
|
||||
|
@ -1127,17 +1127,14 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
|
||||
* and nobody can change them.
|
||||
*
|
||||
* psig->stats_lock also protects us from our sub-threads
|
||||
* which can reap other children at the same time. Until
|
||||
* we change k_getrusage()-like users to rely on this lock
|
||||
* we have to take ->siglock as well.
|
||||
* which can reap other children at the same time.
|
||||
*
|
||||
* We use thread_group_cputime_adjusted() to get times for
|
||||
* the thread group, which consolidates times for all threads
|
||||
* in the group including the group leader.
|
||||
*/
|
||||
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
|
||||
spin_lock_irq(¤t->sighand->siglock);
|
||||
write_seqlock(&psig->stats_lock);
|
||||
write_seqlock_irq(&psig->stats_lock);
|
||||
psig->cutime += tgutime + sig->cutime;
|
||||
psig->cstime += tgstime + sig->cstime;
|
||||
psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
|
||||
@ -1160,8 +1157,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
|
||||
psig->cmaxrss = maxrss;
|
||||
task_io_accounting_add(&psig->ioac, &p->ioac);
|
||||
task_io_accounting_add(&psig->ioac, &sig->ioac);
|
||||
write_sequnlock(&psig->stats_lock);
|
||||
spin_unlock_irq(¤t->sighand->siglock);
|
||||
write_sequnlock_irq(&psig->stats_lock);
|
||||
}
|
||||
|
||||
if (wo->wo_rusage)
|
||||
|
54
kernel/sys.c
54
kernel/sys.c
@ -1785,21 +1785,24 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
|
||||
struct task_struct *t;
|
||||
unsigned long flags;
|
||||
u64 tgutime, tgstime, utime, stime;
|
||||
unsigned long maxrss = 0;
|
||||
unsigned long maxrss;
|
||||
struct mm_struct *mm;
|
||||
struct signal_struct *sig = p->signal;
|
||||
unsigned int seq = 0;
|
||||
|
||||
memset((char *)r, 0, sizeof (*r));
|
||||
retry:
|
||||
memset(r, 0, sizeof(*r));
|
||||
utime = stime = 0;
|
||||
maxrss = 0;
|
||||
|
||||
if (who == RUSAGE_THREAD) {
|
||||
task_cputime_adjusted(current, &utime, &stime);
|
||||
accumulate_thread_rusage(p, r);
|
||||
maxrss = sig->maxrss;
|
||||
goto out;
|
||||
goto out_thread;
|
||||
}
|
||||
|
||||
if (!lock_task_sighand(p, &flags))
|
||||
return;
|
||||
flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
|
||||
|
||||
switch (who) {
|
||||
case RUSAGE_BOTH:
|
||||
@ -1819,9 +1822,6 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
|
||||
fallthrough;
|
||||
|
||||
case RUSAGE_SELF:
|
||||
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
|
||||
utime += tgutime;
|
||||
stime += tgstime;
|
||||
r->ru_nvcsw += sig->nvcsw;
|
||||
r->ru_nivcsw += sig->nivcsw;
|
||||
r->ru_minflt += sig->min_flt;
|
||||
@ -1830,28 +1830,42 @@ void getrusage(struct task_struct *p, int who, struct rusage *r)
|
||||
r->ru_oublock += sig->oublock;
|
||||
if (maxrss < sig->maxrss)
|
||||
maxrss = sig->maxrss;
|
||||
|
||||
rcu_read_lock();
|
||||
__for_each_thread(sig, t)
|
||||
accumulate_thread_rusage(t, r);
|
||||
rcu_read_unlock();
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
unlock_task_sighand(p, &flags);
|
||||
|
||||
out:
|
||||
if (need_seqretry(&sig->stats_lock, seq)) {
|
||||
seq = 1;
|
||||
goto retry;
|
||||
}
|
||||
done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
|
||||
|
||||
if (who == RUSAGE_CHILDREN)
|
||||
goto out_children;
|
||||
|
||||
thread_group_cputime_adjusted(p, &tgutime, &tgstime);
|
||||
utime += tgutime;
|
||||
stime += tgstime;
|
||||
|
||||
out_thread:
|
||||
mm = get_task_mm(p);
|
||||
if (mm) {
|
||||
setmax_mm_hiwater_rss(&maxrss, mm);
|
||||
mmput(mm);
|
||||
}
|
||||
|
||||
out_children:
|
||||
r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
|
||||
r->ru_utime = ns_to_kernel_old_timeval(utime);
|
||||
r->ru_stime = ns_to_kernel_old_timeval(stime);
|
||||
|
||||
if (who != RUSAGE_CHILDREN) {
|
||||
struct mm_struct *mm = get_task_mm(p);
|
||||
|
||||
if (mm) {
|
||||
setmax_mm_hiwater_rss(&maxrss, mm);
|
||||
mmput(mm);
|
||||
}
|
||||
}
|
||||
r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
|
||||
}
|
||||
|
||||
SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
|
||||
|
@ -2194,7 +2194,7 @@ static void damos_tried_regions_init_upd_status(
|
||||
sysfs_regions->upd_timeout_jiffies = jiffies +
|
||||
2 * usecs_to_jiffies(scheme->apply_interval_us ?
|
||||
scheme->apply_interval_us :
|
||||
ctx->attrs.sample_interval);
|
||||
ctx->attrs.aggr_interval);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -429,6 +429,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
|
||||
if (++batch_count == SWAP_CLUSTER_MAX) {
|
||||
batch_count = 0;
|
||||
if (need_resched()) {
|
||||
arch_leave_lazy_mmu_mode();
|
||||
pte_unmap_unlock(start_pte, ptl);
|
||||
cond_resched();
|
||||
goto restart;
|
||||
|
@ -621,6 +621,15 @@ static inline int memcg_events_index(enum vm_event_item idx)
|
||||
}
|
||||
|
||||
struct memcg_vmstats_percpu {
|
||||
/* Stats updates since the last flush */
|
||||
unsigned int stats_updates;
|
||||
|
||||
/* Cached pointers for fast iteration in memcg_rstat_updated() */
|
||||
struct memcg_vmstats_percpu *parent;
|
||||
struct memcg_vmstats *vmstats;
|
||||
|
||||
/* The above should fit a single cacheline for memcg_rstat_updated() */
|
||||
|
||||
/* Local (CPU and cgroup) page state & events */
|
||||
long state[MEMCG_NR_STAT];
|
||||
unsigned long events[NR_MEMCG_EVENTS];
|
||||
@ -632,10 +641,7 @@ struct memcg_vmstats_percpu {
|
||||
/* Cgroup1: threshold notifications & softlimit tree updates */
|
||||
unsigned long nr_page_events;
|
||||
unsigned long targets[MEM_CGROUP_NTARGETS];
|
||||
|
||||
/* Stats updates since the last flush */
|
||||
unsigned int stats_updates;
|
||||
};
|
||||
} ____cacheline_aligned;
|
||||
|
||||
struct memcg_vmstats {
|
||||
/* Aggregated (CPU and subtree) page state & events */
|
||||
@ -698,36 +704,35 @@ static void memcg_stats_unlock(void)
|
||||
}
|
||||
|
||||
|
||||
static bool memcg_should_flush_stats(struct mem_cgroup *memcg)
|
||||
static bool memcg_vmstats_needs_flush(struct memcg_vmstats *vmstats)
|
||||
{
|
||||
return atomic64_read(&memcg->vmstats->stats_updates) >
|
||||
return atomic64_read(&vmstats->stats_updates) >
|
||||
MEMCG_CHARGE_BATCH * num_online_cpus();
|
||||
}
|
||||
|
||||
static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
|
||||
{
|
||||
struct memcg_vmstats_percpu *statc;
|
||||
int cpu = smp_processor_id();
|
||||
unsigned int x;
|
||||
|
||||
if (!val)
|
||||
return;
|
||||
|
||||
cgroup_rstat_updated(memcg->css.cgroup, cpu);
|
||||
|
||||
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
|
||||
x = __this_cpu_add_return(memcg->vmstats_percpu->stats_updates,
|
||||
abs(val));
|
||||
|
||||
if (x < MEMCG_CHARGE_BATCH)
|
||||
statc = this_cpu_ptr(memcg->vmstats_percpu);
|
||||
for (; statc; statc = statc->parent) {
|
||||
statc->stats_updates += abs(val);
|
||||
if (statc->stats_updates < MEMCG_CHARGE_BATCH)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If @memcg is already flush-able, increasing stats_updates is
|
||||
* redundant. Avoid the overhead of the atomic update.
|
||||
*/
|
||||
if (!memcg_should_flush_stats(memcg))
|
||||
atomic64_add(x, &memcg->vmstats->stats_updates);
|
||||
__this_cpu_write(memcg->vmstats_percpu->stats_updates, 0);
|
||||
if (!memcg_vmstats_needs_flush(statc->vmstats))
|
||||
atomic64_add(statc->stats_updates,
|
||||
&statc->vmstats->stats_updates);
|
||||
statc->stats_updates = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -756,7 +761,7 @@ void mem_cgroup_flush_stats(struct mem_cgroup *memcg)
|
||||
if (!memcg)
|
||||
memcg = root_mem_cgroup;
|
||||
|
||||
if (memcg_should_flush_stats(memcg))
|
||||
if (memcg_vmstats_needs_flush(memcg->vmstats))
|
||||
do_flush_stats(memcg);
|
||||
}
|
||||
|
||||
@ -770,7 +775,7 @@ void mem_cgroup_flush_stats_ratelimited(struct mem_cgroup *memcg)
|
||||
static void flush_memcg_stats_dwork(struct work_struct *w)
|
||||
{
|
||||
/*
|
||||
* Deliberately ignore memcg_should_flush_stats() here so that flushing
|
||||
* Deliberately ignore memcg_vmstats_needs_flush() here so that flushing
|
||||
* in latency-sensitive paths is as cheap as possible.
|
||||
*/
|
||||
do_flush_stats(root_mem_cgroup);
|
||||
@ -5477,10 +5482,11 @@ static void mem_cgroup_free(struct mem_cgroup *memcg)
|
||||
__mem_cgroup_free(memcg);
|
||||
}
|
||||
|
||||
static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent)
|
||||
{
|
||||
struct memcg_vmstats_percpu *statc, *pstatc;
|
||||
struct mem_cgroup *memcg;
|
||||
int node;
|
||||
int node, cpu;
|
||||
int __maybe_unused i;
|
||||
long error = -ENOMEM;
|
||||
|
||||
@ -5504,6 +5510,14 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
|
||||
if (!memcg->vmstats_percpu)
|
||||
goto fail;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
if (parent)
|
||||
pstatc = per_cpu_ptr(parent->vmstats_percpu, cpu);
|
||||
statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
|
||||
statc->parent = parent ? pstatc : NULL;
|
||||
statc->vmstats = memcg->vmstats;
|
||||
}
|
||||
|
||||
for_each_node(node)
|
||||
if (alloc_mem_cgroup_per_node_info(memcg, node))
|
||||
goto fail;
|
||||
@ -5549,7 +5563,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
|
||||
old_memcg = set_active_memcg(parent);
|
||||
memcg = mem_cgroup_alloc();
|
||||
memcg = mem_cgroup_alloc(parent);
|
||||
set_active_memcg(old_memcg);
|
||||
if (IS_ERR(memcg))
|
||||
return ERR_CAST(memcg);
|
||||
|
@ -1377,6 +1377,9 @@ void ClearPageHWPoisonTakenOff(struct page *page)
|
||||
*/
|
||||
static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
|
||||
{
|
||||
if (PageSlab(page))
|
||||
return false;
|
||||
|
||||
/* Soft offline could migrate non-LRU movable pages */
|
||||
if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
|
||||
return true;
|
||||
|
@ -902,8 +902,8 @@ static int move_present_pte(struct mm_struct *mm,
|
||||
|
||||
double_pt_lock(dst_ptl, src_ptl);
|
||||
|
||||
if (!pte_same(*src_pte, orig_src_pte) ||
|
||||
!pte_same(*dst_pte, orig_dst_pte)) {
|
||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
@ -946,8 +946,8 @@ static int move_swap_pte(struct mm_struct *mm,
|
||||
|
||||
double_pt_lock(dst_ptl, src_ptl);
|
||||
|
||||
if (!pte_same(*src_pte, orig_src_pte) ||
|
||||
!pte_same(*dst_pte, orig_dst_pte)) {
|
||||
if (!pte_same(ptep_get(src_pte), orig_src_pte) ||
|
||||
!pte_same(ptep_get(dst_pte), orig_dst_pte)) {
|
||||
double_pt_unlock(dst_ptl, src_ptl);
|
||||
return -EAGAIN;
|
||||
}
|
||||
@ -1016,7 +1016,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
}
|
||||
|
||||
spin_lock(dst_ptl);
|
||||
orig_dst_pte = *dst_pte;
|
||||
orig_dst_pte = ptep_get(dst_pte);
|
||||
spin_unlock(dst_ptl);
|
||||
if (!pte_none(orig_dst_pte)) {
|
||||
err = -EEXIST;
|
||||
@ -1024,7 +1024,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
}
|
||||
|
||||
spin_lock(src_ptl);
|
||||
orig_src_pte = *src_pte;
|
||||
orig_src_pte = ptep_get(src_pte);
|
||||
spin_unlock(src_ptl);
|
||||
if (pte_none(orig_src_pte)) {
|
||||
if (!(mode & UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES))
|
||||
@ -1054,7 +1054,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
|
||||
* page isn't freed under us
|
||||
*/
|
||||
spin_lock(src_ptl);
|
||||
if (!pte_same(orig_src_pte, *src_pte)) {
|
||||
if (!pte_same(orig_src_pte, ptep_get(src_pte))) {
|
||||
spin_unlock(src_ptl);
|
||||
err = -EAGAIN;
|
||||
goto out;
|
||||
|
12
mm/zswap.c
12
mm/zswap.c
@ -536,10 +536,6 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
|
||||
*/
|
||||
static void zswap_free_entry(struct zswap_entry *entry)
|
||||
{
|
||||
if (entry->objcg) {
|
||||
obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
|
||||
obj_cgroup_put(entry->objcg);
|
||||
}
|
||||
if (!entry->length)
|
||||
atomic_dec(&zswap_same_filled_pages);
|
||||
else {
|
||||
@ -548,6 +544,10 @@ static void zswap_free_entry(struct zswap_entry *entry)
|
||||
atomic_dec(&entry->pool->nr_stored);
|
||||
zswap_pool_put(entry->pool);
|
||||
}
|
||||
if (entry->objcg) {
|
||||
obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
|
||||
obj_cgroup_put(entry->objcg);
|
||||
}
|
||||
zswap_entry_cache_free(entry);
|
||||
atomic_dec(&zswap_stored_pages);
|
||||
zswap_update_total_size();
|
||||
@ -895,10 +895,8 @@ static enum lru_status shrink_memcg_cb(struct list_head *item, struct list_lru_o
|
||||
* into the warmer region. We should terminate shrinking (if we're in the dynamic
|
||||
* shrinker context).
|
||||
*/
|
||||
if (writeback_result == -EEXIST && encountered_page_in_swapcache) {
|
||||
ret = LRU_SKIP;
|
||||
if (writeback_result == -EEXIST && encountered_page_in_swapcache)
|
||||
*encountered_page_in_swapcache = true;
|
||||
}
|
||||
|
||||
goto put_unlock;
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <syscall.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/resource.h>
|
||||
#include <linux/close_range.h>
|
||||
|
||||
#include "../kselftest_harness.h"
|
||||
#include "../clone3/clone3_selftests.h"
|
||||
|
Loading…
Reference in New Issue
Block a user