mm/demotion: add pg_data_t member to track node memory tier details

Also update different helpes to use NODE_DATA()->memtier.  Since node
specific memtier can change based on the reassignment of NUMA node to a
different memory tiers, accessing NODE_DATA()->memtier needs to happen
under an rcu read lock or memory_tier_lock.

Link: https://lkml.kernel.org/r/20220818131042.113280-7-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Acked-by: Wei Xu <weixugc@google.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Hesham Almatary <hesham.almatary@huawei.com>
Cc: Jagdish Gediya <jvgediya.oss@gmail.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tim Chen <tim.c.chen@intel.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Aneesh Kumar K.V 2022-08-18 18:40:38 +05:30 committed by Andrew Morton
parent 6c542ab757
commit 7766cf7a7e
2 changed files with 38 additions and 5 deletions

View File

@ -1246,6 +1246,9 @@ typedef struct pglist_data {
/* Per-node vmstats */ /* Per-node vmstats */
struct per_cpu_nodestat __percpu *per_cpu_nodestats; struct per_cpu_nodestat __percpu *per_cpu_nodestats;
atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
#ifdef CONFIG_NUMA
struct memory_tier __rcu *memtier;
#endif
} pg_data_t; } pg_data_t;
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)

View File

@ -4,6 +4,7 @@
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include <linux/kobject.h> #include <linux/kobject.h>
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/mmzone.h>
#include <linux/memory-tiers.h> #include <linux/memory-tiers.h>
#include "internal.h" #include "internal.h"
@ -141,12 +142,18 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty
static struct memory_tier *__node_get_memory_tier(int node) static struct memory_tier *__node_get_memory_tier(int node)
{ {
struct memory_dev_type *memtype; pg_data_t *pgdat;
memtype = node_memory_types[node]; pgdat = NODE_DATA(node);
if (memtype && node_isset(node, memtype->nodes)) if (!pgdat)
return memtype->memtier; return NULL;
return NULL; /*
* Since we hold memory_tier_lock, we can avoid
* RCU read locks when accessing the details. No
* parallel updates are possible here.
*/
return rcu_dereference_check(pgdat->memtier,
lockdep_is_held(&memory_tier_lock));
} }
#ifdef CONFIG_MIGRATION #ifdef CONFIG_MIGRATION
@ -309,6 +316,8 @@ static struct memory_tier *set_node_memory_tier(int node)
{ {
struct memory_tier *memtier; struct memory_tier *memtier;
struct memory_dev_type *memtype; struct memory_dev_type *memtype;
pg_data_t *pgdat = NODE_DATA(node);
lockdep_assert_held_once(&memory_tier_lock); lockdep_assert_held_once(&memory_tier_lock);
@ -320,24 +329,45 @@ static struct memory_tier *set_node_memory_tier(int node)
memtype = node_memory_types[node].memtype; memtype = node_memory_types[node].memtype;
node_set(node, memtype->nodes); node_set(node, memtype->nodes);
memtier = find_create_memory_tier(memtype); memtier = find_create_memory_tier(memtype);
if (!IS_ERR(memtier))
rcu_assign_pointer(pgdat->memtier, memtier);
return memtier; return memtier;
} }
static void destroy_memory_tier(struct memory_tier *memtier) static void destroy_memory_tier(struct memory_tier *memtier)
{ {
list_del(&memtier->list); list_del(&memtier->list);
/*
* synchronize_rcu in clear_node_memory_tier makes sure
* we don't have rcu access to this memory tier.
*/
kfree(memtier); kfree(memtier);
} }
static bool clear_node_memory_tier(int node) static bool clear_node_memory_tier(int node)
{ {
bool cleared = false; bool cleared = false;
pg_data_t *pgdat;
struct memory_tier *memtier; struct memory_tier *memtier;
pgdat = NODE_DATA(node);
if (!pgdat)
return false;
/*
* Make sure that anybody looking at NODE_DATA who finds
* a valid memtier finds memory_dev_types with nodes still
* linked to the memtier. We achieve this by waiting for
* rcu read section to finish using synchronize_rcu.
* This also enables us to free the destroyed memory tier
* with kfree instead of kfree_rcu
*/
memtier = __node_get_memory_tier(node); memtier = __node_get_memory_tier(node);
if (memtier) { if (memtier) {
struct memory_dev_type *memtype; struct memory_dev_type *memtype;
rcu_assign_pointer(pgdat->memtier, NULL);
synchronize_rcu();
memtype = node_memory_types[node].memtype; memtype = node_memory_types[node].memtype;
node_clear(node, memtype->nodes); node_clear(node, memtype->nodes);
if (nodes_empty(memtype->nodes)) { if (nodes_empty(memtype->nodes)) {