mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-10 15:58:47 +00:00
825a46af5a
This patch provides the implementation and cpuset interface for an alternative memory allocation policy that can be applied to certain kinds of memory allocations, such as the page cache (file system buffers) and some slab caches (such as inode caches). The policy is called "memory spreading." If enabled, it spreads out these kinds of memory allocations over all the nodes allowed to a task, instead of preferring to place them on the node where the task is executing. All other kinds of allocations, including anonymous pages for a tasks stack and data regions, are not affected by this policy choice, and continue to be allocated preferring the node local to execution, as modified by the NUMA mempolicy. There are two boolean flag files per cpuset that control where the kernel allocates pages for the file system buffers and related in kernel data structures. They are called 'memory_spread_page' and 'memory_spread_slab'. If the per-cpuset boolean flag file 'memory_spread_page' is set, then the kernel will spread the file system buffers (page cache) evenly over all the nodes that the faulting task is allowed to use, instead of preferring to put those pages on the node where the task is running. If the per-cpuset boolean flag file 'memory_spread_slab' is set, then the kernel will spread some file system related slab caches, such as for inodes and dentries evenly over all the nodes that the faulting task is allowed to use, instead of preferring to put those pages on the node where the task is running. The implementation is simple. Setting the cpuset flags 'memory_spread_page' or 'memory_spread_cache' turns on the per-process flags PF_SPREAD_PAGE or PF_SPREAD_SLAB, respectively, for each task that is in the cpuset or subsequently joins that cpuset. In subsequent patches, the page allocation calls for the affected page cache and slab caches are modified to perform an inline check for these flags, and if set, a call to a new routine cpuset_mem_spread_node() returns the node to prefer for the allocation. The cpuset_mem_spread_node() routine is also simple. It uses the value of a per-task rotor cpuset_mem_spread_rotor to select the next node in the current tasks mems_allowed to prefer for the allocation. This policy can provide substantial improvements for jobs that need to place thread local data on the corresponding node, but that need to access large file system data sets that need to be spread across the several nodes in the jobs cpuset in order to fit. Without this patch, especially for jobs that might have one thread reading in the data set, the memory allocation across the nodes in the jobs cpuset can become very uneven. A couple of Copyright year ranges are updated as well. And a couple of email addresses that can be found in the MAINTAINERS file are removed. Signed-off-by: Paul Jackson <pj@sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
132 lines
3.2 KiB
C
132 lines
3.2 KiB
C
#ifndef _LINUX_CPUSET_H
|
|
#define _LINUX_CPUSET_H
|
|
/*
|
|
* cpuset interface
|
|
*
|
|
* Copyright (C) 2003 BULL SA
|
|
* Copyright (C) 2004-2006 Silicon Graphics, Inc.
|
|
*
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/nodemask.h>
|
|
|
|
#ifdef CONFIG_CPUSETS
|
|
|
|
extern int number_of_cpusets; /* How many cpusets are defined in system? */
|
|
|
|
extern int cpuset_init_early(void);
|
|
extern int cpuset_init(void);
|
|
extern void cpuset_init_smp(void);
|
|
extern void cpuset_fork(struct task_struct *p);
|
|
extern void cpuset_exit(struct task_struct *p);
|
|
extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
|
|
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
|
|
void cpuset_init_current_mems_allowed(void);
|
|
void cpuset_update_task_memory_state(void);
|
|
#define cpuset_nodes_subset_current_mems_allowed(nodes) \
|
|
nodes_subset((nodes), current->mems_allowed)
|
|
int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl);
|
|
|
|
extern int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask);
|
|
static int inline cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
|
|
{
|
|
return number_of_cpusets <= 1 || __cpuset_zone_allowed(z, gfp_mask);
|
|
}
|
|
|
|
extern int cpuset_excl_nodes_overlap(const struct task_struct *p);
|
|
|
|
#define cpuset_memory_pressure_bump() \
|
|
do { \
|
|
if (cpuset_memory_pressure_enabled) \
|
|
__cpuset_memory_pressure_bump(); \
|
|
} while (0)
|
|
extern int cpuset_memory_pressure_enabled;
|
|
extern void __cpuset_memory_pressure_bump(void);
|
|
|
|
extern struct file_operations proc_cpuset_operations;
|
|
extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer);
|
|
|
|
extern void cpuset_lock(void);
|
|
extern void cpuset_unlock(void);
|
|
|
|
extern int cpuset_mem_spread_node(void);
|
|
|
|
static inline int cpuset_do_page_mem_spread(void)
|
|
{
|
|
return current->flags & PF_SPREAD_PAGE;
|
|
}
|
|
|
|
static inline int cpuset_do_slab_mem_spread(void)
|
|
{
|
|
return current->flags & PF_SPREAD_SLAB;
|
|
}
|
|
|
|
#else /* !CONFIG_CPUSETS */
|
|
|
|
static inline int cpuset_init_early(void) { return 0; }
|
|
static inline int cpuset_init(void) { return 0; }
|
|
static inline void cpuset_init_smp(void) {}
|
|
static inline void cpuset_fork(struct task_struct *p) {}
|
|
static inline void cpuset_exit(struct task_struct *p) {}
|
|
|
|
static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
|
|
{
|
|
return cpu_possible_map;
|
|
}
|
|
|
|
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
|
|
{
|
|
return node_possible_map;
|
|
}
|
|
|
|
static inline void cpuset_init_current_mems_allowed(void) {}
|
|
static inline void cpuset_update_task_memory_state(void) {}
|
|
#define cpuset_nodes_subset_current_mems_allowed(nodes) (1)
|
|
|
|
static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int cpuset_excl_nodes_overlap(const struct task_struct *p)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline void cpuset_memory_pressure_bump(void) {}
|
|
|
|
static inline char *cpuset_task_status_allowed(struct task_struct *task,
|
|
char *buffer)
|
|
{
|
|
return buffer;
|
|
}
|
|
|
|
static inline void cpuset_lock(void) {}
|
|
static inline void cpuset_unlock(void) {}
|
|
|
|
static inline int cpuset_mem_spread_node(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int cpuset_do_page_mem_spread(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int cpuset_do_slab_mem_spread(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
#endif /* !CONFIG_CPUSETS */
|
|
|
|
#endif /* _LINUX_CPUSET_H */
|