mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-13 09:20:17 +00:00
Merge branch 'akpm' (patches from Andrew)
Merge fixes from Andrew Morton: "27 fixes. There are three patches that aren't actually fixes. They're simple function renamings which are nice-to-have in mainline as ongoing net development depends on them." * akpm: (27 commits) timerfd: export defines to userspace mm/hugetlb.c: fix reservation race when freeing surplus pages mm/slab.c: fix SLAB freelist randomization duplicate entries zram: support BDI_CAP_STABLE_WRITES zram: revalidate disk under init_lock mm: support anonymous stable page mm: add documentation for page fragment APIs mm: rename __page_frag functions to __page_frag_cache, drop order from drain mm: rename __alloc_page_frag to page_frag_alloc and __free_page_frag to page_frag_free mm, memcg: fix the active list aging for lowmem requests when memcg is enabled mm: don't dereference struct page fields of invalid pages mailmap: add codeaurora.org names for nameless email commits signal: protect SIGNAL_UNKILLABLE from unintentional clearing. mm: pmd dirty emulation in page fault handler ipc/sem.c: fix incorrect sem_lock pairing lib/Kconfig.debug: fix frv build failure mm: get rid of __GFP_OTHER_NODE mm: fix remote numa hits statistics mm: fix devm_memremap_pages crash, use mem_hotplug_{begin, done} ocfs2: fix crash caused by stale lvb with fsdlm plugin ...
This commit is contained in:
commit
ba836a6f5a
4
.mailmap
4
.mailmap
@ -137,6 +137,7 @@ Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
|
||||
Rudolf Marek <R.Marek@sh.cvut.cz>
|
||||
Rui Saraiva <rmps@joel.ist.utl.pt>
|
||||
Sachin P Sant <ssant@in.ibm.com>
|
||||
Sarangdhar Joshi <spjoshi@codeaurora.org>
|
||||
Sam Ravnborg <sam@mars.ravnborg.org>
|
||||
Santosh Shilimkar <ssantosh@kernel.org>
|
||||
Santosh Shilimkar <santosh.shilimkar@oracle.org>
|
||||
@ -150,10 +151,13 @@ Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
|
||||
Simon Kelley <simon@thekelleys.org.uk>
|
||||
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
|
||||
Stephen Hemminger <shemminger@osdl.org>
|
||||
Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
|
||||
Subhash Jadavani <subhashj@codeaurora.org>
|
||||
Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
|
||||
Sumit Semwal <sumit.semwal@ti.com>
|
||||
Tejun Heo <htejun@gmail.com>
|
||||
Thomas Graf <tgraf@suug.ch>
|
||||
Thomas Pedersen <twp@codeaurora.org>
|
||||
Tony Luck <tony.luck@intel.com>
|
||||
Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
|
||||
Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
|
||||
|
42
Documentation/vm/page_frags
Normal file
42
Documentation/vm/page_frags
Normal file
@ -0,0 +1,42 @@
|
||||
Page fragments
|
||||
--------------
|
||||
|
||||
A page fragment is an arbitrary-length arbitrary-offset area of memory
|
||||
which resides within a 0 or higher order compound page. Multiple
|
||||
fragments within that page are individually refcounted, in the page's
|
||||
reference counter.
|
||||
|
||||
The page_frag functions, page_frag_alloc and page_frag_free, provide a
|
||||
simple allocation framework for page fragments. This is used by the
|
||||
network stack and network device drivers to provide a backing region of
|
||||
memory for use as either an sk_buff->head, or to be used in the "frags"
|
||||
portion of skb_shared_info.
|
||||
|
||||
In order to make use of the page fragment APIs a backing page fragment
|
||||
cache is needed. This provides a central point for the fragment allocation
|
||||
and tracks allows multiple calls to make use of a cached page. The
|
||||
advantage to doing this is that multiple calls to get_page can be avoided
|
||||
which can be expensive at allocation time. However due to the nature of
|
||||
this caching it is required that any calls to the cache be protected by
|
||||
either a per-cpu limitation, or a per-cpu limitation and forcing interrupts
|
||||
to be disabled when executing the fragment allocation.
|
||||
|
||||
The network stack uses two separate caches per CPU to handle fragment
|
||||
allocation. The netdev_alloc_cache is used by callers making use of the
|
||||
__netdev_alloc_frag and __netdev_alloc_skb calls. The napi_alloc_cache is
|
||||
used by callers of the __napi_alloc_frag and __napi_alloc_skb calls. The
|
||||
main difference between these two calls is the context in which they may be
|
||||
called. The "netdev" prefixed functions are usable in any context as these
|
||||
functions will disable interrupts, while the "napi" prefixed functions are
|
||||
only usable within the softirq context.
|
||||
|
||||
Many network device drivers use a similar methodology for allocating page
|
||||
fragments, but the page fragments are cached at the ring or descriptor
|
||||
level. In order to enable these cases it is necessary to provide a generic
|
||||
way of tearing down a page cache. For this reason __page_frag_cache_drain
|
||||
was implemented. It allows for freeing multiple references from a single
|
||||
page via a single call. The advantage to doing this is that it allows for
|
||||
cleaning up the multiple references that were added to a page in order to
|
||||
avoid calling get_page per allocation.
|
||||
|
||||
Alexander Duyck, Nov 29, 2016.
|
@ -81,7 +81,6 @@ Descriptions of section entries:
|
||||
Q: Patchwork web based patch tracking system site
|
||||
T: SCM tree type and location.
|
||||
Type is one of: git, hg, quilt, stgit, topgit
|
||||
B: Bug tracking system location.
|
||||
S: Status, one of the following:
|
||||
Supported: Someone is actually paid to look after this.
|
||||
Maintained: Someone actually looks after it.
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/err.h>
|
||||
@ -112,6 +113,14 @@ static inline bool is_partial_io(struct bio_vec *bvec)
|
||||
return bvec->bv_len != PAGE_SIZE;
|
||||
}
|
||||
|
||||
static void zram_revalidate_disk(struct zram *zram)
|
||||
{
|
||||
revalidate_disk(zram->disk);
|
||||
/* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
|
||||
zram->disk->queue->backing_dev_info.capabilities |=
|
||||
BDI_CAP_STABLE_WRITES;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if request is within bounds and aligned on zram logical blocks.
|
||||
*/
|
||||
@ -1095,15 +1104,9 @@ static ssize_t disksize_store(struct device *dev,
|
||||
zram->comp = comp;
|
||||
zram->disksize = disksize;
|
||||
set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
|
||||
zram_revalidate_disk(zram);
|
||||
up_write(&zram->init_lock);
|
||||
|
||||
/*
|
||||
* Revalidate disk out of the init_lock to avoid lockdep splat.
|
||||
* It's okay because disk's capacity is protected by init_lock
|
||||
* so that revalidate_disk always sees up-to-date capacity.
|
||||
*/
|
||||
revalidate_disk(zram->disk);
|
||||
|
||||
return len;
|
||||
|
||||
out_destroy_comp:
|
||||
@ -1149,7 +1152,7 @@ static ssize_t reset_store(struct device *dev,
|
||||
/* Make sure all the pending I/O are finished */
|
||||
fsync_bdev(bdev);
|
||||
zram_reset_device(zram);
|
||||
revalidate_disk(zram->disk);
|
||||
zram_revalidate_disk(zram);
|
||||
bdput(bdev);
|
||||
|
||||
mutex_lock(&bdev->bd_mutex);
|
||||
|
@ -3962,8 +3962,8 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
|
||||
PAGE_SIZE,
|
||||
DMA_FROM_DEVICE,
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
__page_frag_drain(buffer_info->page, 0,
|
||||
buffer_info->pagecnt_bias);
|
||||
__page_frag_cache_drain(buffer_info->page,
|
||||
buffer_info->pagecnt_bias);
|
||||
|
||||
buffer_info->page = NULL;
|
||||
}
|
||||
@ -6991,7 +6991,7 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring,
|
||||
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
|
||||
PAGE_SIZE, DMA_FROM_DEVICE,
|
||||
DMA_ATTR_SKIP_CPU_SYNC);
|
||||
__page_frag_drain(page, 0, rx_buffer->pagecnt_bias);
|
||||
__page_frag_cache_drain(page, rx_buffer->pagecnt_bias);
|
||||
}
|
||||
|
||||
/* clear contents of rx_buffer */
|
||||
|
51
fs/dax.c
51
fs/dax.c
@ -691,8 +691,8 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
||||
pgoff_t index, unsigned long pfn)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
pte_t *ptep;
|
||||
pte_t pte;
|
||||
pte_t pte, *ptep = NULL;
|
||||
pmd_t *pmdp = NULL;
|
||||
spinlock_t *ptl;
|
||||
bool changed;
|
||||
|
||||
@ -707,21 +707,42 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
|
||||
|
||||
address = pgoff_address(index, vma);
|
||||
changed = false;
|
||||
if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
|
||||
if (follow_pte_pmd(vma->vm_mm, address, &ptep, &pmdp, &ptl))
|
||||
continue;
|
||||
if (pfn != pte_pfn(*ptep))
|
||||
goto unlock;
|
||||
if (!pte_dirty(*ptep) && !pte_write(*ptep))
|
||||
goto unlock;
|
||||
|
||||
flush_cache_page(vma, address, pfn);
|
||||
pte = ptep_clear_flush(vma, address, ptep);
|
||||
pte = pte_wrprotect(pte);
|
||||
pte = pte_mkclean(pte);
|
||||
set_pte_at(vma->vm_mm, address, ptep, pte);
|
||||
changed = true;
|
||||
unlock:
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
if (pmdp) {
|
||||
#ifdef CONFIG_FS_DAX_PMD
|
||||
pmd_t pmd;
|
||||
|
||||
if (pfn != pmd_pfn(*pmdp))
|
||||
goto unlock_pmd;
|
||||
if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
|
||||
goto unlock_pmd;
|
||||
|
||||
flush_cache_page(vma, address, pfn);
|
||||
pmd = pmdp_huge_clear_flush(vma, address, pmdp);
|
||||
pmd = pmd_wrprotect(pmd);
|
||||
pmd = pmd_mkclean(pmd);
|
||||
set_pmd_at(vma->vm_mm, address, pmdp, pmd);
|
||||
changed = true;
|
||||
unlock_pmd:
|
||||
spin_unlock(ptl);
|
||||
#endif
|
||||
} else {
|
||||
if (pfn != pte_pfn(*ptep))
|
||||
goto unlock_pte;
|
||||
if (!pte_dirty(*ptep) && !pte_write(*ptep))
|
||||
goto unlock_pte;
|
||||
|
||||
flush_cache_page(vma, address, pfn);
|
||||
pte = ptep_clear_flush(vma, address, ptep);
|
||||
pte = pte_wrprotect(pte);
|
||||
pte = pte_mkclean(pte);
|
||||
set_pte_at(vma->vm_mm, address, ptep, pte);
|
||||
changed = true;
|
||||
unlock_pte:
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
|
||||
if (changed)
|
||||
mmu_notifier_invalidate_page(vma->vm_mm, address);
|
||||
|
@ -3303,6 +3303,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
|
||||
mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
|
||||
lockres->l_level, new_level);
|
||||
|
||||
/*
|
||||
* On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
|
||||
* expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
|
||||
* we can recover correctly from node failure. Otherwise, we may get
|
||||
* invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
|
||||
*/
|
||||
if (!ocfs2_is_o2cb_active() &&
|
||||
lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
|
||||
lvb = 1;
|
||||
|
||||
if (lvb)
|
||||
dlm_flags |= DLM_LKF_VALBLK;
|
||||
|
||||
|
@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
|
||||
*/
|
||||
static struct ocfs2_stack_plugin *active_stack;
|
||||
|
||||
inline int ocfs2_is_o2cb_active(void)
|
||||
{
|
||||
return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
|
||||
|
||||
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
|
||||
{
|
||||
struct ocfs2_stack_plugin *p;
|
||||
|
@ -298,6 +298,9 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
|
||||
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
|
||||
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
|
||||
|
||||
/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
|
||||
int ocfs2_is_o2cb_active(void);
|
||||
|
||||
extern struct kset *ocfs2_kset;
|
||||
|
||||
#endif /* STACKGLUE_H */
|
||||
|
@ -38,9 +38,8 @@ struct vm_area_struct;
|
||||
#define ___GFP_ACCOUNT 0x100000u
|
||||
#define ___GFP_NOTRACK 0x200000u
|
||||
#define ___GFP_DIRECT_RECLAIM 0x400000u
|
||||
#define ___GFP_OTHER_NODE 0x800000u
|
||||
#define ___GFP_WRITE 0x1000000u
|
||||
#define ___GFP_KSWAPD_RECLAIM 0x2000000u
|
||||
#define ___GFP_WRITE 0x800000u
|
||||
#define ___GFP_KSWAPD_RECLAIM 0x1000000u
|
||||
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
|
||||
|
||||
/*
|
||||
@ -172,11 +171,6 @@ struct vm_area_struct;
|
||||
* __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of
|
||||
* distinguishing in the source between false positives and allocations that
|
||||
* cannot be supported (e.g. page tables).
|
||||
*
|
||||
* __GFP_OTHER_NODE is for allocations that are on a remote node but that
|
||||
* should not be accounted for as a remote allocation in vmstat. A
|
||||
* typical user would be khugepaged collapsing a huge page on a remote
|
||||
* node.
|
||||
*/
|
||||
#define __GFP_COLD ((__force gfp_t)___GFP_COLD)
|
||||
#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
|
||||
@ -184,10 +178,9 @@ struct vm_area_struct;
|
||||
#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
|
||||
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK)
|
||||
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
|
||||
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE)
|
||||
|
||||
/* Room for N __GFP_FOO bits */
|
||||
#define __GFP_BITS_SHIFT 26
|
||||
#define __GFP_BITS_SHIFT 25
|
||||
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
|
||||
|
||||
/*
|
||||
@ -506,11 +499,10 @@ extern void free_hot_cold_page(struct page *page, bool cold);
|
||||
extern void free_hot_cold_page_list(struct list_head *list, bool cold);
|
||||
|
||||
struct page_frag_cache;
|
||||
extern void __page_frag_drain(struct page *page, unsigned int order,
|
||||
unsigned int count);
|
||||
extern void *__alloc_page_frag(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask);
|
||||
extern void __free_page_frag(void *addr);
|
||||
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
|
||||
extern void *page_frag_alloc(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask);
|
||||
extern void page_frag_free(void *addr);
|
||||
|
||||
#define __free_page(page) __free_pages((page), 0)
|
||||
#define free_page(addr) free_pages((addr), 0)
|
||||
|
@ -120,7 +120,7 @@ struct mem_cgroup_reclaim_iter {
|
||||
*/
|
||||
struct mem_cgroup_per_node {
|
||||
struct lruvec lruvec;
|
||||
unsigned long lru_size[NR_LRU_LISTS];
|
||||
unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
|
||||
|
||||
struct mem_cgroup_reclaim_iter iter[DEF_PRIORITY + 1];
|
||||
|
||||
@ -432,7 +432,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
|
||||
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
|
||||
|
||||
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||
int nr_pages);
|
||||
int zid, int nr_pages);
|
||||
|
||||
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||
int nid, unsigned int lru_mask);
|
||||
@ -441,9 +441,23 @@ static inline
|
||||
unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
||||
{
|
||||
struct mem_cgroup_per_node *mz;
|
||||
unsigned long nr_pages = 0;
|
||||
int zid;
|
||||
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
return mz->lru_size[lru];
|
||||
for (zid = 0; zid < MAX_NR_ZONES; zid++)
|
||||
nr_pages += mz->lru_zone_size[zid][lru];
|
||||
return nr_pages;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
|
||||
enum lru_list lru, int zone_idx)
|
||||
{
|
||||
struct mem_cgroup_per_node *mz;
|
||||
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
return mz->lru_zone_size[zone_idx][lru];
|
||||
}
|
||||
|
||||
void mem_cgroup_handle_over_high(void);
|
||||
@ -671,6 +685,12 @@ mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline
|
||||
unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec,
|
||||
enum lru_list lru, int zone_idx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||
|
@ -1210,8 +1210,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
struct vm_area_struct *vma);
|
||||
void unmap_mapping_range(struct address_space *mapping,
|
||||
loff_t const holebegin, loff_t const holelen, int even_cows);
|
||||
int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
|
||||
spinlock_t **ptlp);
|
||||
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp);
|
||||
int follow_pfn(struct vm_area_struct *vma, unsigned long address,
|
||||
unsigned long *pfn);
|
||||
int follow_phys(struct vm_area_struct *vma, unsigned long address,
|
||||
|
@ -39,7 +39,7 @@ static __always_inline void update_lru_size(struct lruvec *lruvec,
|
||||
{
|
||||
__update_lru_size(lruvec, lru, zid, nr_pages);
|
||||
#ifdef CONFIG_MEMCG
|
||||
mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
|
||||
mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -854,6 +854,16 @@ struct signal_struct {
|
||||
|
||||
#define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */
|
||||
|
||||
#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \
|
||||
SIGNAL_STOP_CONTINUED)
|
||||
|
||||
static inline void signal_set_stop_flags(struct signal_struct *sig,
|
||||
unsigned int flags)
|
||||
{
|
||||
WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP));
|
||||
sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags;
|
||||
}
|
||||
|
||||
/* If true, all threads except ->group_exit_task have pending SIGKILL */
|
||||
static inline int signal_group_exit(const struct signal_struct *sig)
|
||||
{
|
||||
|
@ -2480,7 +2480,7 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
|
||||
|
||||
static inline void skb_free_frag(void *addr)
|
||||
{
|
||||
__free_page_frag(addr);
|
||||
page_frag_free(addr);
|
||||
}
|
||||
|
||||
void *napi_alloc_frag(unsigned int fragsz);
|
||||
|
@ -226,7 +226,7 @@ static inline const char *__check_heap_object(const void *ptr,
|
||||
* (PAGE_SIZE*2). Larger requests are passed to the page allocator.
|
||||
*/
|
||||
#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
#define KMALLOC_SHIFT_LOW 3
|
||||
#endif
|
||||
@ -239,7 +239,7 @@ static inline const char *__check_heap_object(const void *ptr,
|
||||
* be allocated from the same page.
|
||||
*/
|
||||
#define KMALLOC_SHIFT_HIGH PAGE_SHIFT
|
||||
#define KMALLOC_SHIFT_MAX 30
|
||||
#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT - 1)
|
||||
#ifndef KMALLOC_SHIFT_LOW
|
||||
#define KMALLOC_SHIFT_LOW 3
|
||||
#endif
|
||||
|
@ -150,8 +150,9 @@ enum {
|
||||
SWP_FILE = (1 << 7), /* set after swap_activate success */
|
||||
SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */
|
||||
SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */
|
||||
SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */
|
||||
/* add others here before... */
|
||||
SWP_SCANNING = (1 << 10), /* refcount in scan_swap_map */
|
||||
SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */
|
||||
};
|
||||
|
||||
#define SWAP_CLUSTER_MAX 32UL
|
||||
|
@ -8,23 +8,7 @@
|
||||
#ifndef _LINUX_TIMERFD_H
|
||||
#define _LINUX_TIMERFD_H
|
||||
|
||||
/* For O_CLOEXEC and O_NONBLOCK */
|
||||
#include <linux/fcntl.h>
|
||||
|
||||
/* For _IO helpers */
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
/*
|
||||
* CAREFUL: Check include/asm-generic/fcntl.h when defining
|
||||
* new flags, since they might collide with O_* ones. We want
|
||||
* to re-use O_* flags that couldn't possibly have a meaning
|
||||
* from eventfd, in order to leave a free define-space for
|
||||
* shared O_* flags.
|
||||
*/
|
||||
#define TFD_TIMER_ABSTIME (1 << 0)
|
||||
#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
|
||||
#define TFD_CLOEXEC O_CLOEXEC
|
||||
#define TFD_NONBLOCK O_NONBLOCK
|
||||
#include <uapi/linux/timerfd.h>
|
||||
|
||||
#define TFD_SHARED_FCNTL_FLAGS (TFD_CLOEXEC | TFD_NONBLOCK)
|
||||
/* Flags for timerfd_create. */
|
||||
@ -32,6 +16,4 @@
|
||||
/* Flags for timerfd_settime. */
|
||||
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
|
||||
|
||||
#define TFD_IOC_SET_TICKS _IOW('T', 0, u64)
|
||||
|
||||
#endif /* _LINUX_TIMERFD_H */
|
||||
|
@ -47,8 +47,7 @@
|
||||
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
|
||||
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
|
||||
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
|
||||
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
|
||||
{(unsigned long)__GFP_OTHER_NODE, "__GFP_OTHER_NODE"} \
|
||||
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\
|
||||
|
||||
#define show_gfp_flags(flags) \
|
||||
(flags) ? __print_flags(flags, "|", \
|
||||
|
@ -414,6 +414,7 @@ header-y += telephony.h
|
||||
header-y += termios.h
|
||||
header-y += thermal.h
|
||||
header-y += time.h
|
||||
header-y += timerfd.h
|
||||
header-y += times.h
|
||||
header-y += timex.h
|
||||
header-y += tiocl.h
|
||||
|
36
include/uapi/linux/timerfd.h
Normal file
36
include/uapi/linux/timerfd.h
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* include/linux/timerfd.h
|
||||
*
|
||||
* Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _UAPI_LINUX_TIMERFD_H
|
||||
#define _UAPI_LINUX_TIMERFD_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/* For O_CLOEXEC and O_NONBLOCK */
|
||||
#include <linux/fcntl.h>
|
||||
|
||||
/* For _IO helpers */
|
||||
#include <linux/ioctl.h>
|
||||
|
||||
/*
|
||||
* CAREFUL: Check include/asm-generic/fcntl.h when defining
|
||||
* new flags, since they might collide with O_* ones. We want
|
||||
* to re-use O_* flags that couldn't possibly have a meaning
|
||||
* from eventfd, in order to leave a free define-space for
|
||||
* shared O_* flags.
|
||||
*
|
||||
* Also make sure to update the masks in include/linux/timerfd.h
|
||||
* when adding new flags.
|
||||
*/
|
||||
#define TFD_TIMER_ABSTIME (1 << 0)
|
||||
#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
|
||||
#define TFD_CLOEXEC O_CLOEXEC
|
||||
#define TFD_NONBLOCK O_NONBLOCK
|
||||
|
||||
#define TFD_IOC_SET_TICKS _IOW('T', 0, __u64)
|
||||
|
||||
#endif /* _UAPI_LINUX_TIMERFD_H */
|
@ -1977,7 +1977,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
sem_lock(sma, sops, nsops);
|
||||
locknum = sem_lock(sma, sops, nsops);
|
||||
|
||||
if (!ipc_valid_object(&sma->sem_perm))
|
||||
goto out_unlock_free;
|
||||
|
@ -56,7 +56,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
|
||||
attr->value_size == 0 || attr->map_flags)
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
|
||||
if (attr->value_size > KMALLOC_MAX_SIZE)
|
||||
/* if value_size is bigger, the user space won't be able to
|
||||
* access the elements.
|
||||
*/
|
||||
|
@ -274,7 +274,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
|
||||
*/
|
||||
goto free_htab;
|
||||
|
||||
if (htab->map.value_size >= (1 << (KMALLOC_SHIFT_MAX - 1)) -
|
||||
if (htab->map.value_size >= KMALLOC_MAX_SIZE -
|
||||
MAX_BPF_STACK - sizeof(struct htab_elem))
|
||||
/* if value_size is bigger, the user space won't be able to
|
||||
* access the elements via bpf syscall. This check also makes
|
||||
|
@ -246,7 +246,9 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
|
||||
/* pages are dead and unused, undo the arch mapping */
|
||||
align_start = res->start & ~(SECTION_SIZE - 1);
|
||||
align_size = ALIGN(resource_size(res), SECTION_SIZE);
|
||||
mem_hotplug_begin();
|
||||
arch_remove_memory(align_start, align_size);
|
||||
mem_hotplug_done();
|
||||
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
|
||||
pgmap_radix_release(res);
|
||||
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
|
||||
@ -358,7 +360,9 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
|
||||
if (error)
|
||||
goto err_pfn_remap;
|
||||
|
||||
mem_hotplug_begin();
|
||||
error = arch_add_memory(nid, align_start, align_size, true);
|
||||
mem_hotplug_done();
|
||||
if (error)
|
||||
goto err_add_memory;
|
||||
|
||||
|
@ -346,7 +346,7 @@ static bool task_participate_group_stop(struct task_struct *task)
|
||||
* fresh group stop. Read comment in do_signal_stop() for details.
|
||||
*/
|
||||
if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) {
|
||||
sig->flags = SIGNAL_STOP_STOPPED;
|
||||
signal_set_stop_flags(sig, SIGNAL_STOP_STOPPED);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -843,7 +843,7 @@ static bool prepare_signal(int sig, struct task_struct *p, bool force)
|
||||
* will take ->siglock, notice SIGNAL_CLD_MASK, and
|
||||
* notify its parent. See get_signal_to_deliver().
|
||||
*/
|
||||
signal->flags = why | SIGNAL_STOP_CONTINUED;
|
||||
signal_set_stop_flags(signal, why | SIGNAL_STOP_CONTINUED);
|
||||
signal->group_stop_count = 0;
|
||||
signal->group_exit_code = 0;
|
||||
}
|
||||
|
@ -164,7 +164,7 @@ config DEBUG_INFO_REDUCED
|
||||
|
||||
config DEBUG_INFO_SPLIT
|
||||
bool "Produce split debuginfo in .dwo files"
|
||||
depends on DEBUG_INFO
|
||||
depends on DEBUG_INFO && !FRV
|
||||
help
|
||||
Generate debug info into separate .dwo files. This significantly
|
||||
reduces the build directory size for builds with DEBUG_INFO,
|
||||
|
@ -138,7 +138,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
|
||||
dax_radix_locked_entry(0, RADIX_DAX_EMPTY));
|
||||
/* Wakeup waiters for exceptional entry lock */
|
||||
dax_wake_mapping_entry_waiter(mapping, page->index, p,
|
||||
false);
|
||||
true);
|
||||
}
|
||||
}
|
||||
__radix_tree_replace(&mapping->page_tree, node, slot, page,
|
||||
|
@ -883,15 +883,17 @@ void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd)
|
||||
{
|
||||
pmd_t entry;
|
||||
unsigned long haddr;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
|
||||
vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
|
||||
if (unlikely(!pmd_same(*vmf->pmd, orig_pmd)))
|
||||
goto unlock;
|
||||
|
||||
entry = pmd_mkyoung(orig_pmd);
|
||||
if (write)
|
||||
entry = pmd_mkdirty(entry);
|
||||
haddr = vmf->address & HPAGE_PMD_MASK;
|
||||
if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry,
|
||||
vmf->flags & FAULT_FLAG_WRITE))
|
||||
if (pmdp_set_access_flags(vmf->vma, haddr, vmf->pmd, entry, write))
|
||||
update_mmu_cache_pmd(vmf->vma, vmf->address, vmf->pmd);
|
||||
|
||||
unlock:
|
||||
@ -919,8 +921,7 @@ static int do_huge_pmd_wp_page_fallback(struct vm_fault *vmf, pmd_t orig_pmd,
|
||||
}
|
||||
|
||||
for (i = 0; i < HPAGE_PMD_NR; i++) {
|
||||
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE |
|
||||
__GFP_OTHER_NODE, vma,
|
||||
pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, vma,
|
||||
vmf->address, page_to_nid(page));
|
||||
if (unlikely(!pages[i] ||
|
||||
mem_cgroup_try_charge(pages[i], vma->vm_mm,
|
||||
|
37
mm/hugetlb.c
37
mm/hugetlb.c
@ -1773,23 +1773,32 @@ free:
|
||||
}
|
||||
|
||||
/*
|
||||
* When releasing a hugetlb pool reservation, any surplus pages that were
|
||||
* allocated to satisfy the reservation must be explicitly freed if they were
|
||||
* never used.
|
||||
* Called with hugetlb_lock held.
|
||||
* This routine has two main purposes:
|
||||
* 1) Decrement the reservation count (resv_huge_pages) by the value passed
|
||||
* in unused_resv_pages. This corresponds to the prior adjustments made
|
||||
* to the associated reservation map.
|
||||
* 2) Free any unused surplus pages that may have been allocated to satisfy
|
||||
* the reservation. As many as unused_resv_pages may be freed.
|
||||
*
|
||||
* Called with hugetlb_lock held. However, the lock could be dropped (and
|
||||
* reacquired) during calls to cond_resched_lock. Whenever dropping the lock,
|
||||
* we must make sure nobody else can claim pages we are in the process of
|
||||
* freeing. Do this by ensuring resv_huge_page always is greater than the
|
||||
* number of huge pages we plan to free when dropping the lock.
|
||||
*/
|
||||
static void return_unused_surplus_pages(struct hstate *h,
|
||||
unsigned long unused_resv_pages)
|
||||
{
|
||||
unsigned long nr_pages;
|
||||
|
||||
/* Uncommit the reservation */
|
||||
h->resv_huge_pages -= unused_resv_pages;
|
||||
|
||||
/* Cannot return gigantic pages currently */
|
||||
if (hstate_is_gigantic(h))
|
||||
return;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Part (or even all) of the reservation could have been backed
|
||||
* by pre-allocated pages. Only free surplus pages.
|
||||
*/
|
||||
nr_pages = min(unused_resv_pages, h->surplus_huge_pages);
|
||||
|
||||
/*
|
||||
@ -1799,12 +1808,22 @@ static void return_unused_surplus_pages(struct hstate *h,
|
||||
* when the nodes with surplus pages have no free pages.
|
||||
* free_pool_huge_page() will balance the the freed pages across the
|
||||
* on-line nodes with memory and will handle the hstate accounting.
|
||||
*
|
||||
* Note that we decrement resv_huge_pages as we free the pages. If
|
||||
* we drop the lock, resv_huge_pages will still be sufficiently large
|
||||
* to cover subsequent pages we may free.
|
||||
*/
|
||||
while (nr_pages--) {
|
||||
h->resv_huge_pages--;
|
||||
unused_resv_pages--;
|
||||
if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
|
||||
break;
|
||||
goto out;
|
||||
cond_resched_lock(&hugetlb_lock);
|
||||
}
|
||||
|
||||
out:
|
||||
/* Fully uncommit the reservation */
|
||||
h->resv_huge_pages -= unused_resv_pages;
|
||||
}
|
||||
|
||||
|
||||
|
@ -943,7 +943,7 @@ static void collapse_huge_page(struct mm_struct *mm,
|
||||
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
|
||||
|
||||
/* Only allocate from the target node */
|
||||
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE;
|
||||
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
|
||||
|
||||
/*
|
||||
* Before allocating the hugepage, release the mmap_sem read lock.
|
||||
@ -1242,7 +1242,6 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr;
|
||||
pmd_t *pmd, _pmd;
|
||||
bool deposited = false;
|
||||
|
||||
i_mmap_lock_write(mapping);
|
||||
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
|
||||
@ -1267,26 +1266,10 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
|
||||
spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd);
|
||||
/* assume page table is clear */
|
||||
_pmd = pmdp_collapse_flush(vma, addr, pmd);
|
||||
/*
|
||||
* now deposit the pgtable for arch that need it
|
||||
* otherwise free it.
|
||||
*/
|
||||
if (arch_needs_pgtable_deposit()) {
|
||||
/*
|
||||
* The deposit should be visibile only after
|
||||
* collapse is seen by others.
|
||||
*/
|
||||
smp_wmb();
|
||||
pgtable_trans_huge_deposit(vma->vm_mm, pmd,
|
||||
pmd_pgtable(_pmd));
|
||||
deposited = true;
|
||||
}
|
||||
spin_unlock(ptl);
|
||||
up_write(&vma->vm_mm->mmap_sem);
|
||||
if (!deposited) {
|
||||
atomic_long_dec(&vma->vm_mm->nr_ptes);
|
||||
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
|
||||
}
|
||||
atomic_long_dec(&vma->vm_mm->nr_ptes);
|
||||
pte_free(vma->vm_mm, pmd_pgtable(_pmd));
|
||||
}
|
||||
}
|
||||
i_mmap_unlock_write(mapping);
|
||||
@ -1326,8 +1309,7 @@ static void collapse_shmem(struct mm_struct *mm,
|
||||
VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
|
||||
|
||||
/* Only allocate from the target node */
|
||||
gfp = alloc_hugepage_khugepaged_gfpmask() |
|
||||
__GFP_OTHER_NODE | __GFP_THISNODE;
|
||||
gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
|
||||
|
||||
new_page = khugepaged_alloc_page(hpage, gfp, node);
|
||||
if (!new_page) {
|
||||
|
@ -625,8 +625,8 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
|
||||
unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||
int nid, unsigned int lru_mask)
|
||||
{
|
||||
struct lruvec *lruvec = mem_cgroup_lruvec(NODE_DATA(nid), memcg);
|
||||
unsigned long nr = 0;
|
||||
struct mem_cgroup_per_node *mz;
|
||||
enum lru_list lru;
|
||||
|
||||
VM_BUG_ON((unsigned)nid >= nr_node_ids);
|
||||
@ -634,8 +634,7 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
|
||||
for_each_lru(lru) {
|
||||
if (!(BIT(lru) & lru_mask))
|
||||
continue;
|
||||
mz = mem_cgroup_nodeinfo(memcg, nid);
|
||||
nr += mz->lru_size[lru];
|
||||
nr += mem_cgroup_get_lru_size(lruvec, lru);
|
||||
}
|
||||
return nr;
|
||||
}
|
||||
@ -1002,6 +1001,7 @@ out:
|
||||
* mem_cgroup_update_lru_size - account for adding or removing an lru page
|
||||
* @lruvec: mem_cgroup per zone lru vector
|
||||
* @lru: index of lru list the page is sitting on
|
||||
* @zid: zone id of the accounted pages
|
||||
* @nr_pages: positive when adding or negative when removing
|
||||
*
|
||||
* This function must be called under lru_lock, just before a page is added
|
||||
@ -1009,27 +1009,25 @@ out:
|
||||
* so as to allow it to check that lru_size 0 is consistent with list_empty).
|
||||
*/
|
||||
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||
int nr_pages)
|
||||
int zid, int nr_pages)
|
||||
{
|
||||
struct mem_cgroup_per_node *mz;
|
||||
unsigned long *lru_size;
|
||||
long size;
|
||||
bool empty;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return;
|
||||
|
||||
mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
|
||||
lru_size = mz->lru_size + lru;
|
||||
empty = list_empty(lruvec->lists + lru);
|
||||
lru_size = &mz->lru_zone_size[zid][lru];
|
||||
|
||||
if (nr_pages < 0)
|
||||
*lru_size += nr_pages;
|
||||
|
||||
size = *lru_size;
|
||||
if (WARN_ONCE(size < 0 || empty != !size,
|
||||
"%s(%p, %d, %d): lru_size %ld but %sempty\n",
|
||||
__func__, lruvec, lru, nr_pages, size, empty ? "" : "not ")) {
|
||||
if (WARN_ONCE(size < 0,
|
||||
"%s(%p, %d, %d): lru_size %ld\n",
|
||||
__func__, lruvec, lru, nr_pages, size)) {
|
||||
VM_BUG_ON(1);
|
||||
*lru_size = 0;
|
||||
}
|
||||
|
41
mm/memory.c
41
mm/memory.c
@ -3772,8 +3772,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
|
||||
}
|
||||
#endif /* __PAGETABLE_PMD_FOLDED */
|
||||
|
||||
static int __follow_pte(struct mm_struct *mm, unsigned long address,
|
||||
pte_t **ptepp, spinlock_t **ptlp)
|
||||
static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
@ -3790,11 +3790,20 @@ static int __follow_pte(struct mm_struct *mm, unsigned long address,
|
||||
|
||||
pmd = pmd_offset(pud, address);
|
||||
VM_BUG_ON(pmd_trans_huge(*pmd));
|
||||
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
|
||||
goto out;
|
||||
|
||||
/* We cannot handle huge page PFN maps. Luckily they don't exist. */
|
||||
if (pmd_huge(*pmd))
|
||||
if (pmd_huge(*pmd)) {
|
||||
if (!pmdpp)
|
||||
goto out;
|
||||
|
||||
*ptlp = pmd_lock(mm, pmd);
|
||||
if (pmd_huge(*pmd)) {
|
||||
*pmdpp = pmd;
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(*ptlp);
|
||||
}
|
||||
|
||||
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
|
||||
goto out;
|
||||
|
||||
ptep = pte_offset_map_lock(mm, pmd, address, ptlp);
|
||||
@ -3810,17 +3819,31 @@ out:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
int follow_pte(struct mm_struct *mm, unsigned long address, pte_t **ptepp,
|
||||
spinlock_t **ptlp)
|
||||
static inline int follow_pte(struct mm_struct *mm, unsigned long address,
|
||||
pte_t **ptepp, spinlock_t **ptlp)
|
||||
{
|
||||
int res;
|
||||
|
||||
/* (void) is needed to make gcc happy */
|
||||
(void) __cond_lock(*ptlp,
|
||||
!(res = __follow_pte(mm, address, ptepp, ptlp)));
|
||||
!(res = __follow_pte_pmd(mm, address, ptepp, NULL,
|
||||
ptlp)));
|
||||
return res;
|
||||
}
|
||||
|
||||
int follow_pte_pmd(struct mm_struct *mm, unsigned long address,
|
||||
pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp)
|
||||
{
|
||||
int res;
|
||||
|
||||
/* (void) is needed to make gcc happy */
|
||||
(void) __cond_lock(*ptlp,
|
||||
!(res = __follow_pte_pmd(mm, address, ptepp, pmdpp,
|
||||
ptlp)));
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL(follow_pte_pmd);
|
||||
|
||||
/**
|
||||
* follow_pfn - look up PFN at a user virtual address
|
||||
* @vma: memory mapping
|
||||
|
@ -1864,14 +1864,14 @@ int move_freepages(struct zone *zone,
|
||||
#endif
|
||||
|
||||
for (page = start_page; page <= end_page;) {
|
||||
/* Make sure we are not inadvertently changing nodes */
|
||||
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
|
||||
|
||||
if (!pfn_valid_within(page_to_pfn(page))) {
|
||||
page++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Make sure we are not inadvertently changing nodes */
|
||||
VM_BUG_ON_PAGE(page_to_nid(page) != zone_to_nid(zone), page);
|
||||
|
||||
if (!PageBuddy(page)) {
|
||||
page++;
|
||||
continue;
|
||||
@ -2583,30 +2583,22 @@ int __isolate_free_page(struct page *page, unsigned int order)
|
||||
* Update NUMA hit/miss statistics
|
||||
*
|
||||
* Must be called with interrupts disabled.
|
||||
*
|
||||
* When __GFP_OTHER_NODE is set assume the node of the preferred
|
||||
* zone is the local node. This is useful for daemons who allocate
|
||||
* memory on behalf of other processes.
|
||||
*/
|
||||
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z,
|
||||
gfp_t flags)
|
||||
static inline void zone_statistics(struct zone *preferred_zone, struct zone *z)
|
||||
{
|
||||
#ifdef CONFIG_NUMA
|
||||
int local_nid = numa_node_id();
|
||||
enum zone_stat_item local_stat = NUMA_LOCAL;
|
||||
|
||||
if (unlikely(flags & __GFP_OTHER_NODE)) {
|
||||
if (z->node != numa_node_id())
|
||||
local_stat = NUMA_OTHER;
|
||||
local_nid = preferred_zone->node;
|
||||
}
|
||||
|
||||
if (z->node == local_nid) {
|
||||
if (z->node == preferred_zone->node)
|
||||
__inc_zone_state(z, NUMA_HIT);
|
||||
__inc_zone_state(z, local_stat);
|
||||
} else {
|
||||
else {
|
||||
__inc_zone_state(z, NUMA_MISS);
|
||||
__inc_zone_state(preferred_zone, NUMA_FOREIGN);
|
||||
}
|
||||
__inc_zone_state(z, local_stat);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -2674,7 +2666,7 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
|
||||
}
|
||||
|
||||
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
|
||||
zone_statistics(preferred_zone, zone, gfp_flags);
|
||||
zone_statistics(preferred_zone, zone);
|
||||
local_irq_restore(flags);
|
||||
|
||||
VM_BUG_ON_PAGE(bad_range(zone, page), page);
|
||||
@ -3904,8 +3896,8 @@ EXPORT_SYMBOL(free_pages);
|
||||
* drivers to provide a backing region of memory for use as either an
|
||||
* sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
|
||||
*/
|
||||
static struct page *__page_frag_refill(struct page_frag_cache *nc,
|
||||
gfp_t gfp_mask)
|
||||
static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
gfp_t gfp = gfp_mask;
|
||||
@ -3925,22 +3917,23 @@ static struct page *__page_frag_refill(struct page_frag_cache *nc,
|
||||
return page;
|
||||
}
|
||||
|
||||
void __page_frag_drain(struct page *page, unsigned int order,
|
||||
unsigned int count)
|
||||
void __page_frag_cache_drain(struct page *page, unsigned int count)
|
||||
{
|
||||
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
|
||||
|
||||
if (page_ref_sub_and_test(page, count)) {
|
||||
unsigned int order = compound_order(page);
|
||||
|
||||
if (order == 0)
|
||||
free_hot_cold_page(page, false);
|
||||
else
|
||||
__free_pages_ok(page, order);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__page_frag_drain);
|
||||
EXPORT_SYMBOL(__page_frag_cache_drain);
|
||||
|
||||
void *__alloc_page_frag(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask)
|
||||
void *page_frag_alloc(struct page_frag_cache *nc,
|
||||
unsigned int fragsz, gfp_t gfp_mask)
|
||||
{
|
||||
unsigned int size = PAGE_SIZE;
|
||||
struct page *page;
|
||||
@ -3948,7 +3941,7 @@ void *__alloc_page_frag(struct page_frag_cache *nc,
|
||||
|
||||
if (unlikely(!nc->va)) {
|
||||
refill:
|
||||
page = __page_frag_refill(nc, gfp_mask);
|
||||
page = __page_frag_cache_refill(nc, gfp_mask);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
@ -3991,19 +3984,19 @@ refill:
|
||||
|
||||
return nc->va + offset;
|
||||
}
|
||||
EXPORT_SYMBOL(__alloc_page_frag);
|
||||
EXPORT_SYMBOL(page_frag_alloc);
|
||||
|
||||
/*
|
||||
* Frees a page fragment allocated out of either a compound or order 0 page.
|
||||
*/
|
||||
void __free_page_frag(void *addr)
|
||||
void page_frag_free(void *addr)
|
||||
{
|
||||
struct page *page = virt_to_head_page(addr);
|
||||
|
||||
if (unlikely(put_page_testzero(page)))
|
||||
__free_pages_ok(page, compound_order(page));
|
||||
}
|
||||
EXPORT_SYMBOL(__free_page_frag);
|
||||
EXPORT_SYMBOL(page_frag_free);
|
||||
|
||||
static void *make_alloc_exact(unsigned long addr, unsigned int order,
|
||||
size_t size)
|
||||
|
@ -2457,7 +2457,6 @@ union freelist_init_state {
|
||||
unsigned int pos;
|
||||
unsigned int *list;
|
||||
unsigned int count;
|
||||
unsigned int rand;
|
||||
};
|
||||
struct rnd_state rnd_state;
|
||||
};
|
||||
@ -2483,8 +2482,7 @@ static bool freelist_state_initialize(union freelist_init_state *state,
|
||||
} else {
|
||||
state->list = cachep->random_seq;
|
||||
state->count = count;
|
||||
state->pos = 0;
|
||||
state->rand = rand;
|
||||
state->pos = rand % count;
|
||||
ret = true;
|
||||
}
|
||||
return ret;
|
||||
@ -2493,7 +2491,9 @@ static bool freelist_state_initialize(union freelist_init_state *state,
|
||||
/* Get the next entry on the list and randomize it using a random shift */
|
||||
static freelist_idx_t next_random_slot(union freelist_init_state *state)
|
||||
{
|
||||
return (state->list[state->pos++] + state->rand) % state->count;
|
||||
if (state->pos >= state->count)
|
||||
state->pos = 0;
|
||||
return state->list[state->pos++];
|
||||
}
|
||||
|
||||
/* Swap two freelist entries */
|
||||
|
@ -943,11 +943,25 @@ bool reuse_swap_page(struct page *page, int *total_mapcount)
|
||||
count = page_trans_huge_mapcount(page, total_mapcount);
|
||||
if (count <= 1 && PageSwapCache(page)) {
|
||||
count += page_swapcount(page);
|
||||
if (count == 1 && !PageWriteback(page)) {
|
||||
if (count != 1)
|
||||
goto out;
|
||||
if (!PageWriteback(page)) {
|
||||
delete_from_swap_cache(page);
|
||||
SetPageDirty(page);
|
||||
} else {
|
||||
swp_entry_t entry;
|
||||
struct swap_info_struct *p;
|
||||
|
||||
entry.val = page_private(page);
|
||||
p = swap_info_get(entry);
|
||||
if (p->flags & SWP_STABLE_WRITES) {
|
||||
spin_unlock(&p->lock);
|
||||
return false;
|
||||
}
|
||||
spin_unlock(&p->lock);
|
||||
}
|
||||
}
|
||||
out:
|
||||
return count <= 1;
|
||||
}
|
||||
|
||||
@ -2448,6 +2462,10 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
||||
error = -ENOMEM;
|
||||
goto bad_swap;
|
||||
}
|
||||
|
||||
if (bdi_cap_stable_pages_required(inode_to_bdi(inode)))
|
||||
p->flags |= SWP_STABLE_WRITES;
|
||||
|
||||
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
|
||||
int cpu;
|
||||
|
||||
|
27
mm/vmscan.c
27
mm/vmscan.c
@ -242,6 +242,16 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
||||
return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
|
||||
}
|
||||
|
||||
unsigned long lruvec_zone_lru_size(struct lruvec *lruvec, enum lru_list lru,
|
||||
int zone_idx)
|
||||
{
|
||||
if (!mem_cgroup_disabled())
|
||||
return mem_cgroup_get_zone_lru_size(lruvec, lru, zone_idx);
|
||||
|
||||
return zone_page_state(&lruvec_pgdat(lruvec)->node_zones[zone_idx],
|
||||
NR_ZONE_LRU_BASE + lru);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a shrinker callback to be called from the vm.
|
||||
*/
|
||||
@ -1382,8 +1392,7 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode)
|
||||
* be complete before mem_cgroup_update_lru_size due to a santity check.
|
||||
*/
|
||||
static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
||||
enum lru_list lru, unsigned long *nr_zone_taken,
|
||||
unsigned long nr_taken)
|
||||
enum lru_list lru, unsigned long *nr_zone_taken)
|
||||
{
|
||||
int zid;
|
||||
|
||||
@ -1392,11 +1401,11 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
|
||||
continue;
|
||||
|
||||
__update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
|
||||
#ifdef CONFIG_MEMCG
|
||||
mem_cgroup_update_lru_size(lruvec, lru, zid, -nr_zone_taken[zid]);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
mem_cgroup_update_lru_size(lruvec, lru, -nr_taken);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1501,7 +1510,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
|
||||
*nr_scanned = scan;
|
||||
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
|
||||
nr_taken, mode, is_file_lru(lru));
|
||||
update_lru_sizes(lruvec, lru, nr_zone_taken, nr_taken);
|
||||
update_lru_sizes(lruvec, lru, nr_zone_taken);
|
||||
return nr_taken;
|
||||
}
|
||||
|
||||
@ -2047,10 +2056,8 @@ static bool inactive_list_is_low(struct lruvec *lruvec, bool file,
|
||||
if (!managed_zone(zone))
|
||||
continue;
|
||||
|
||||
inactive_zone = zone_page_state(zone,
|
||||
NR_ZONE_LRU_BASE + (file * LRU_FILE));
|
||||
active_zone = zone_page_state(zone,
|
||||
NR_ZONE_LRU_BASE + (file * LRU_FILE) + LRU_ACTIVE);
|
||||
inactive_zone = lruvec_zone_lru_size(lruvec, file * LRU_FILE, zid);
|
||||
active_zone = lruvec_zone_lru_size(lruvec, (file * LRU_FILE) + LRU_ACTIVE, zid);
|
||||
|
||||
inactive -= min(inactive, inactive_zone);
|
||||
active -= min(active, active_zone);
|
||||
|
@ -369,7 +369,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
|
||||
|
||||
local_irq_save(flags);
|
||||
nc = this_cpu_ptr(&netdev_alloc_cache);
|
||||
data = __alloc_page_frag(nc, fragsz, gfp_mask);
|
||||
data = page_frag_alloc(nc, fragsz, gfp_mask);
|
||||
local_irq_restore(flags);
|
||||
return data;
|
||||
}
|
||||
@ -391,7 +391,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
|
||||
{
|
||||
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
|
||||
|
||||
return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
|
||||
return page_frag_alloc(&nc->page, fragsz, gfp_mask);
|
||||
}
|
||||
|
||||
void *napi_alloc_frag(unsigned int fragsz)
|
||||
@ -441,7 +441,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
|
||||
local_irq_save(flags);
|
||||
|
||||
nc = this_cpu_ptr(&netdev_alloc_cache);
|
||||
data = __alloc_page_frag(nc, len, gfp_mask);
|
||||
data = page_frag_alloc(nc, len, gfp_mask);
|
||||
pfmemalloc = nc->pfmemalloc;
|
||||
|
||||
local_irq_restore(flags);
|
||||
@ -505,7 +505,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
|
||||
if (sk_memalloc_socks())
|
||||
gfp_mask |= __GFP_MEMALLOC;
|
||||
|
||||
data = __alloc_page_frag(&nc->page, len, gfp_mask);
|
||||
data = page_frag_alloc(&nc->page, len, gfp_mask);
|
||||
if (unlikely(!data))
|
||||
return NULL;
|
||||
|
||||
|
@ -655,7 +655,6 @@ static const struct {
|
||||
{ "__GFP_RECLAIM", "R" },
|
||||
{ "__GFP_DIRECT_RECLAIM", "DR" },
|
||||
{ "__GFP_KSWAPD_RECLAIM", "KR" },
|
||||
{ "__GFP_OTHER_NODE", "ON" },
|
||||
};
|
||||
|
||||
static size_t max_gfp_len;
|
||||
|
Loading…
x
Reference in New Issue
Block a user