mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-10 07:50:04 +00:00
[PATCH] NUMA slab locking fixes: fix cpu down and up locking
This fixes locking and bugs in cpu_down and cpu_up paths of the NUMA slab allocator. Sonny Rao <sonny@burdell.org> reported problems sometime back on POWER5 boxes, when the last cpu on the nodes were being offlined. We could not reproduce the same on x86_64 because the cpumask (node_to_cpumask) was not being updated on cpu down. Since that issue is now fixed, we can reproduce Sonny's problems on x86_64 NUMA, and here is the fix. The problem earlier was on CPU_DOWN, if it was the last cpu on the node to go down, the array_caches (shared, alien) and the kmem_list3 of the node were being freed (kfree) with the kmem_list3 lock held. If the l3 or the array_caches were to come from the same cache being cleared, we hit on badness. This patch cleans up the locking in cpu_up and cpu_down path. We cannot really free l3 on cpu down because, there is no node offlining yet and even though a cpu is not yet up, node local memory can be allocated for it. So l3s are usually allocated at keme_cache_create and destroyed at kmem_cache_destroy. Hence, we don't need cachep->spinlock protection to get to the cachep->nodelist[nodeid] either. Patch survived onlining and offlining on a 4 core 2 node Tyan box with a 4 dbench process running all the time. Signed-off-by: Alok N Kataria <alokk@calsoftinc.com> Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org> Cc: Christoph Lameter <christoph@lameter.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
ca3b9b9173
commit
4484ebf12b
123
mm/slab.c
123
mm/slab.c
@ -884,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
|
||||
}
|
||||
}
|
||||
|
||||
static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3)
|
||||
static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
|
||||
{
|
||||
int i = 0;
|
||||
struct array_cache *ac;
|
||||
unsigned long flags;
|
||||
|
||||
for_each_online_node(i) {
|
||||
ac = l3->alien[i];
|
||||
ac = alien[i];
|
||||
if (ac) {
|
||||
spin_lock_irqsave(&ac->lock, flags);
|
||||
__drain_alien_cache(cachep, ac, i);
|
||||
@ -901,8 +901,11 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3)
|
||||
}
|
||||
#else
|
||||
#define alloc_alien_cache(node, limit) do { } while (0)
|
||||
#define free_alien_cache(ac_ptr) do { } while (0)
|
||||
#define drain_alien_cache(cachep, l3) do { } while (0)
|
||||
#define drain_alien_cache(cachep, alien) do { } while (0)
|
||||
|
||||
static inline void free_alien_cache(struct array_cache **ac_ptr)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static int __devinit cpuup_callback(struct notifier_block *nfb,
|
||||
@ -936,6 +939,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
|
||||
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
|
||||
((unsigned long)cachep) % REAPTIMEOUT_LIST3;
|
||||
|
||||
/*
|
||||
* The l3s don't come and go as CPUs come and
|
||||
* go. cache_chain_mutex is sufficient
|
||||
* protection here.
|
||||
*/
|
||||
cachep->nodelists[node] = l3;
|
||||
}
|
||||
|
||||
@ -950,26 +958,47 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
|
||||
& array cache's */
|
||||
list_for_each_entry(cachep, &cache_chain, next) {
|
||||
struct array_cache *nc;
|
||||
struct array_cache *shared;
|
||||
struct array_cache **alien;
|
||||
|
||||
nc = alloc_arraycache(node, cachep->limit,
|
||||
cachep->batchcount);
|
||||
cachep->batchcount);
|
||||
if (!nc)
|
||||
goto bad;
|
||||
shared = alloc_arraycache(node,
|
||||
cachep->shared * cachep->batchcount,
|
||||
0xbaadf00d);
|
||||
if (!shared)
|
||||
goto bad;
|
||||
#ifdef CONFIG_NUMA
|
||||
alien = alloc_alien_cache(node, cachep->limit);
|
||||
if (!alien)
|
||||
goto bad;
|
||||
#endif
|
||||
cachep->array[cpu] = nc;
|
||||
|
||||
l3 = cachep->nodelists[node];
|
||||
BUG_ON(!l3);
|
||||
if (!l3->shared) {
|
||||
if (!(nc = alloc_arraycache(node,
|
||||
cachep->shared *
|
||||
cachep->batchcount,
|
||||
0xbaadf00d)))
|
||||
goto bad;
|
||||
|
||||
/* we are serialised from CPU_DEAD or
|
||||
CPU_UP_CANCELLED by the cpucontrol lock */
|
||||
l3->shared = nc;
|
||||
spin_lock_irq(&l3->list_lock);
|
||||
if (!l3->shared) {
|
||||
/*
|
||||
* We are serialised from CPU_DEAD or
|
||||
* CPU_UP_CANCELLED by the cpucontrol lock
|
||||
*/
|
||||
l3->shared = shared;
|
||||
shared = NULL;
|
||||
}
|
||||
#ifdef CONFIG_NUMA
|
||||
if (!l3->alien) {
|
||||
l3->alien = alien;
|
||||
alien = NULL;
|
||||
}
|
||||
#endif
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
|
||||
kfree(shared);
|
||||
free_alien_cache(alien);
|
||||
}
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
break;
|
||||
@ -978,23 +1007,32 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
|
||||
break;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_DEAD:
|
||||
/*
|
||||
* Even if all the cpus of a node are down, we don't free the
|
||||
* kmem_list3 of any cache. This to avoid a race between
|
||||
* cpu_down, and a kmalloc allocation from another cpu for
|
||||
* memory from the node of the cpu going down. The list3
|
||||
* structure is usually allocated from kmem_cache_create() and
|
||||
* gets destroyed at kmem_cache_destroy().
|
||||
*/
|
||||
/* fall thru */
|
||||
case CPU_UP_CANCELED:
|
||||
mutex_lock(&cache_chain_mutex);
|
||||
|
||||
list_for_each_entry(cachep, &cache_chain, next) {
|
||||
struct array_cache *nc;
|
||||
struct array_cache *shared;
|
||||
struct array_cache **alien;
|
||||
cpumask_t mask;
|
||||
|
||||
mask = node_to_cpumask(node);
|
||||
spin_lock(&cachep->spinlock);
|
||||
/* cpu is dead; no one can alloc from it. */
|
||||
nc = cachep->array[cpu];
|
||||
cachep->array[cpu] = NULL;
|
||||
l3 = cachep->nodelists[node];
|
||||
|
||||
if (!l3)
|
||||
goto unlock_cache;
|
||||
goto free_array_cache;
|
||||
|
||||
spin_lock_irq(&l3->list_lock);
|
||||
|
||||
@ -1005,33 +1043,43 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
|
||||
|
||||
if (!cpus_empty(mask)) {
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
goto unlock_cache;
|
||||
goto free_array_cache;
|
||||
}
|
||||
|
||||
if (l3->shared) {
|
||||
shared = l3->shared;
|
||||
if (shared) {
|
||||
free_block(cachep, l3->shared->entry,
|
||||
l3->shared->avail, node);
|
||||
kfree(l3->shared);
|
||||
l3->shared = NULL;
|
||||
}
|
||||
if (l3->alien) {
|
||||
drain_alien_cache(cachep, l3);
|
||||
free_alien_cache(l3->alien);
|
||||
l3->alien = NULL;
|
||||
}
|
||||
|
||||
/* free slabs belonging to this node */
|
||||
if (__node_shrink(cachep, node)) {
|
||||
cachep->nodelists[node] = NULL;
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
kfree(l3);
|
||||
} else {
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
alien = l3->alien;
|
||||
l3->alien = NULL;
|
||||
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
|
||||
kfree(shared);
|
||||
if (alien) {
|
||||
drain_alien_cache(cachep, alien);
|
||||
free_alien_cache(alien);
|
||||
}
|
||||
unlock_cache:
|
||||
spin_unlock(&cachep->spinlock);
|
||||
free_array_cache:
|
||||
kfree(nc);
|
||||
}
|
||||
/*
|
||||
* In the previous loop, all the objects were freed to
|
||||
* the respective cache's slabs, now we can go ahead and
|
||||
* shrink each nodelist to its limit.
|
||||
*/
|
||||
list_for_each_entry(cachep, &cache_chain, next) {
|
||||
l3 = cachep->nodelists[node];
|
||||
if (!l3)
|
||||
continue;
|
||||
spin_lock_irq(&l3->list_lock);
|
||||
/* free slabs belonging to this node */
|
||||
__node_shrink(cachep, node);
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
}
|
||||
mutex_unlock(&cache_chain_mutex);
|
||||
break;
|
||||
#endif
|
||||
@ -2011,7 +2059,6 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
|
||||
|
||||
smp_call_function_all_cpus(do_drain, cachep);
|
||||
check_irq_on();
|
||||
spin_lock(&cachep->spinlock);
|
||||
for_each_online_node(node) {
|
||||
l3 = cachep->nodelists[node];
|
||||
if (l3) {
|
||||
@ -2019,10 +2066,9 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
|
||||
drain_array_locked(cachep, l3->shared, 1, node);
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
if (l3->alien)
|
||||
drain_alien_cache(cachep, l3);
|
||||
drain_alien_cache(cachep, l3->alien);
|
||||
}
|
||||
}
|
||||
spin_unlock(&cachep->spinlock);
|
||||
}
|
||||
|
||||
static int __node_shrink(struct kmem_cache *cachep, int node)
|
||||
@ -3440,7 +3486,7 @@ static void cache_reap(void *unused)
|
||||
|
||||
l3 = searchp->nodelists[numa_node_id()];
|
||||
if (l3->alien)
|
||||
drain_alien_cache(searchp, l3);
|
||||
drain_alien_cache(searchp, l3->alien);
|
||||
spin_lock_irq(&l3->list_lock);
|
||||
|
||||
drain_array_locked(searchp, cpu_cache_get(searchp), 0,
|
||||
@ -3598,7 +3644,8 @@ static int s_show(struct seq_file *m, void *p)
|
||||
num_slabs++;
|
||||
}
|
||||
free_objects += l3->free_objects;
|
||||
shared_avail += l3->shared->avail;
|
||||
if (l3->shared)
|
||||
shared_avail += l3->shared->avail;
|
||||
|
||||
spin_unlock_irq(&l3->list_lock);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user