Merge branch 'mvpp2-per-cpu-buffers'

Matteo Croce says:

====================
mvpp2: per-cpu buffers

This patchset workarounds an PP2 HW limitation which prevents to use
per-cpu rx buffers.
The first patch is just a refactor to prepare for the second one.
The second one allocates percpu buffers if the following conditions are met:
- CPU number is less or equal 4
- no port is using jumbo frames

If the following conditions are not met at load time, of jumbo frame is enabled
later on, the shared allocation is reverted.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-09-02 12:07:46 -07:00
commit 67538eb5c0
2 changed files with 237 additions and 41 deletions

View File

@ -683,6 +683,7 @@ enum mvpp2_prs_l3_cast {
#define MVPP2_BM_SHORT_BUF_NUM 2048
#define MVPP2_BM_POOL_SIZE_MAX (16*1024 - MVPP2_BM_POOL_PTR_ALIGN/4)
#define MVPP2_BM_POOL_PTR_ALIGN 128
#define MVPP2_BM_MAX_POOLS 8
/* BM cookie (32 bits) definition */
#define MVPP2_BM_COOKIE_POOL_OFFS 8
@ -787,6 +788,9 @@ struct mvpp2 {
/* Aggregated TXQs */
struct mvpp2_tx_queue *aggr_txqs;
/* Are we using page_pool with per-cpu pools? */
int percpu_pools;
/* BM pools */
struct mvpp2_bm_pool *bm_pools;

View File

@ -292,6 +292,26 @@ static void mvpp2_txq_inc_put(struct mvpp2_port *port,
txq_pcpu->txq_put_index = 0;
}
/* Get number of maximum RXQ */
static int mvpp2_get_nrxqs(struct mvpp2 *priv)
{
unsigned int nrxqs;
if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE)
return 1;
/* According to the PPv2.2 datasheet and our experiments on
* PPv2.1, RX queues have an allocation granularity of 4 (when
* more than a single one on PPv2.2).
* Round up to nearest multiple of 4.
*/
nrxqs = (num_possible_cpus() + 3) & ~0x3;
if (nrxqs > MVPP2_PORT_MAX_RXQ)
nrxqs = MVPP2_PORT_MAX_RXQ;
return nrxqs;
}
/* Get number of physical egress port */
static inline int mvpp2_egress_port(struct mvpp2_port *port)
{
@ -323,8 +343,7 @@ static void mvpp2_frag_free(const struct mvpp2_bm_pool *pool, void *data)
/* Buffer Manager configuration routines */
/* Create pool */
static int mvpp2_bm_pool_create(struct platform_device *pdev,
struct mvpp2 *priv,
static int mvpp2_bm_pool_create(struct device *dev, struct mvpp2 *priv,
struct mvpp2_bm_pool *bm_pool, int size)
{
u32 val;
@ -343,7 +362,7 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
else
bm_pool->size_bytes = 2 * sizeof(u64) * size;
bm_pool->virt_addr = dma_alloc_coherent(&pdev->dev, bm_pool->size_bytes,
bm_pool->virt_addr = dma_alloc_coherent(dev, bm_pool->size_bytes,
&bm_pool->dma_addr,
GFP_KERNEL);
if (!bm_pool->virt_addr)
@ -351,9 +370,9 @@ static int mvpp2_bm_pool_create(struct platform_device *pdev,
if (!IS_ALIGNED((unsigned long)bm_pool->virt_addr,
MVPP2_BM_POOL_PTR_ALIGN)) {
dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
dma_free_coherent(dev, bm_pool->size_bytes,
bm_pool->virt_addr, bm_pool->dma_addr);
dev_err(&pdev->dev, "BM pool %d is not %d bytes aligned\n",
dev_err(dev, "BM pool %d is not %d bytes aligned\n",
bm_pool->id, MVPP2_BM_POOL_PTR_ALIGN);
return -ENOMEM;
}
@ -468,15 +487,14 @@ static int mvpp2_check_hw_buf_num(struct mvpp2 *priv, struct mvpp2_bm_pool *bm_p
}
/* Cleanup pool */
static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
struct mvpp2 *priv,
static int mvpp2_bm_pool_destroy(struct device *dev, struct mvpp2 *priv,
struct mvpp2_bm_pool *bm_pool)
{
int buf_num;
u32 val;
buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
mvpp2_bm_bufs_free(&pdev->dev, priv, bm_pool, buf_num);
mvpp2_bm_bufs_free(dev, priv, bm_pool, buf_num);
/* Check buffer counters after free */
buf_num = mvpp2_check_hw_buf_num(priv, bm_pool);
@ -490,24 +508,26 @@ static int mvpp2_bm_pool_destroy(struct platform_device *pdev,
val |= MVPP2_BM_STOP_MASK;
mvpp2_write(priv, MVPP2_BM_POOL_CTRL_REG(bm_pool->id), val);
dma_free_coherent(&pdev->dev, bm_pool->size_bytes,
dma_free_coherent(dev, bm_pool->size_bytes,
bm_pool->virt_addr,
bm_pool->dma_addr);
return 0;
}
static int mvpp2_bm_pools_init(struct platform_device *pdev,
struct mvpp2 *priv)
static int mvpp2_bm_pools_init(struct device *dev, struct mvpp2 *priv)
{
int i, err, size;
int i, err, size, poolnum = MVPP2_BM_POOLS_NUM;
struct mvpp2_bm_pool *bm_pool;
if (priv->percpu_pools)
poolnum = mvpp2_get_nrxqs(priv) * 2;
/* Create all pools with maximum size */
size = MVPP2_BM_POOL_SIZE_MAX;
for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
for (i = 0; i < poolnum; i++) {
bm_pool = &priv->bm_pools[i];
bm_pool->id = i;
err = mvpp2_bm_pool_create(pdev, priv, bm_pool, size);
err = mvpp2_bm_pool_create(dev, priv, bm_pool, size);
if (err)
goto err_unroll_pools;
mvpp2_bm_pool_bufsize_set(priv, bm_pool, 0);
@ -515,17 +535,23 @@ static int mvpp2_bm_pools_init(struct platform_device *pdev,
return 0;
err_unroll_pools:
dev_err(&pdev->dev, "failed to create BM pool %d, size %d\n", i, size);
dev_err(dev, "failed to create BM pool %d, size %d\n", i, size);
for (i = i - 1; i >= 0; i--)
mvpp2_bm_pool_destroy(pdev, priv, &priv->bm_pools[i]);
mvpp2_bm_pool_destroy(dev, priv, &priv->bm_pools[i]);
return err;
}
static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
static int mvpp2_bm_init(struct device *dev, struct mvpp2 *priv)
{
int i, err;
int i, err, poolnum = MVPP2_BM_POOLS_NUM;
for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
if (priv->percpu_pools)
poolnum = mvpp2_get_nrxqs(priv) * 2;
dev_info(dev, "using %d %s buffers\n", poolnum,
priv->percpu_pools ? "per-cpu" : "shared");
for (i = 0; i < poolnum; i++) {
/* Mask BM all interrupts */
mvpp2_write(priv, MVPP2_BM_INTR_MASK_REG(i), 0);
/* Clear BM cause register */
@ -533,12 +559,12 @@ static int mvpp2_bm_init(struct platform_device *pdev, struct mvpp2 *priv)
}
/* Allocate and initialize BM pools */
priv->bm_pools = devm_kcalloc(&pdev->dev, MVPP2_BM_POOLS_NUM,
priv->bm_pools = devm_kcalloc(dev, poolnum,
sizeof(*priv->bm_pools), GFP_KERNEL);
if (!priv->bm_pools)
return -ENOMEM;
err = mvpp2_bm_pools_init(pdev, priv);
err = mvpp2_bm_pools_init(dev, priv);
if (err < 0)
return err;
return 0;
@ -679,6 +705,13 @@ static int mvpp2_bm_bufs_add(struct mvpp2_port *port,
phys_addr_t phys_addr;
void *buf;
if (port->priv->percpu_pools &&
bm_pool->pkt_size > MVPP2_BM_LONG_PKT_SIZE) {
netdev_err(port->dev,
"attempted to use jumbo frames with per-cpu pools");
return 0;
}
buf_size = MVPP2_RX_BUF_SIZE(bm_pool->pkt_size);
total_size = MVPP2_RX_TOTAL_SIZE(buf_size);
@ -722,7 +755,64 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
int num;
if (pool >= MVPP2_BM_POOLS_NUM) {
if ((port->priv->percpu_pools && pool > mvpp2_get_nrxqs(port->priv) * 2) ||
(!port->priv->percpu_pools && pool >= MVPP2_BM_POOLS_NUM)) {
netdev_err(port->dev, "Invalid pool %d\n", pool);
return NULL;
}
/* Allocate buffers in case BM pool is used as long pool, but packet
* size doesn't match MTU or BM pool hasn't being used yet
*/
if (new_pool->pkt_size == 0) {
int pkts_num;
/* Set default buffer number or free all the buffers in case
* the pool is not empty
*/
pkts_num = new_pool->buf_num;
if (pkts_num == 0) {
if (port->priv->percpu_pools) {
if (pool < port->nrxqs)
pkts_num = mvpp2_pools[MVPP2_BM_SHORT].buf_num;
else
pkts_num = mvpp2_pools[MVPP2_BM_LONG].buf_num;
} else {
pkts_num = mvpp2_pools[pool].buf_num;
}
} else {
mvpp2_bm_bufs_free(port->dev->dev.parent,
port->priv, new_pool, pkts_num);
}
new_pool->pkt_size = pkt_size;
new_pool->frag_size =
SKB_DATA_ALIGN(MVPP2_RX_BUF_SIZE(pkt_size)) +
MVPP2_SKB_SHINFO_SIZE;
/* Allocate buffers for this pool */
num = mvpp2_bm_bufs_add(port, new_pool, pkts_num);
if (num != pkts_num) {
WARN(1, "pool %d: %d of %d allocated\n",
new_pool->id, num, pkts_num);
return NULL;
}
}
mvpp2_bm_pool_bufsize_set(port->priv, new_pool,
MVPP2_RX_BUF_SIZE(new_pool->pkt_size));
return new_pool;
}
static struct mvpp2_bm_pool *
mvpp2_bm_pool_use_percpu(struct mvpp2_port *port, int type,
unsigned int pool, int pkt_size)
{
struct mvpp2_bm_pool *new_pool = &port->priv->bm_pools[pool];
int num;
if (pool > port->nrxqs * 2) {
netdev_err(port->dev, "Invalid pool %d\n", pool);
return NULL;
}
@ -738,7 +828,7 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
*/
pkts_num = new_pool->buf_num;
if (pkts_num == 0)
pkts_num = mvpp2_pools[pool].buf_num;
pkts_num = mvpp2_pools[type].buf_num;
else
mvpp2_bm_bufs_free(port->dev->dev.parent,
port->priv, new_pool, pkts_num);
@ -763,11 +853,11 @@ mvpp2_bm_pool_use(struct mvpp2_port *port, unsigned pool, int pkt_size)
return new_pool;
}
/* Initialize pools for swf */
static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
/* Initialize pools for swf, shared buffers variant */
static int mvpp2_swf_bm_pool_init_shared(struct mvpp2_port *port)
{
int rxq;
enum mvpp2_bm_pool_log_num long_log_pool, short_log_pool;
int rxq;
/* If port pkt_size is higher than 1518B:
* HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
@ -811,6 +901,47 @@ static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
return 0;
}
/* Initialize pools for swf, percpu buffers variant */
static int mvpp2_swf_bm_pool_init_percpu(struct mvpp2_port *port)
{
struct mvpp2_bm_pool *p;
int i;
for (i = 0; i < port->nrxqs; i++) {
p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_SHORT, i,
mvpp2_pools[MVPP2_BM_SHORT].pkt_size);
if (!p)
return -ENOMEM;
port->priv->bm_pools[i].port_map |= BIT(port->id);
mvpp2_rxq_short_pool_set(port, i, port->priv->bm_pools[i].id);
}
for (i = 0; i < port->nrxqs; i++) {
p = mvpp2_bm_pool_use_percpu(port, MVPP2_BM_LONG, i + port->nrxqs,
mvpp2_pools[MVPP2_BM_LONG].pkt_size);
if (!p)
return -ENOMEM;
port->priv->bm_pools[i + port->nrxqs].port_map |= BIT(port->id);
mvpp2_rxq_long_pool_set(port, i,
port->priv->bm_pools[i + port->nrxqs].id);
}
port->pool_long = NULL;
port->pool_short = NULL;
return 0;
}
static int mvpp2_swf_bm_pool_init(struct mvpp2_port *port)
{
if (port->priv->percpu_pools)
return mvpp2_swf_bm_pool_init_percpu(port);
else
return mvpp2_swf_bm_pool_init_shared(port);
}
static void mvpp2_set_hw_csum(struct mvpp2_port *port,
enum mvpp2_bm_pool_log_num new_long_pool)
{
@ -837,6 +968,9 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
enum mvpp2_bm_pool_log_num new_long_pool;
int pkt_size = MVPP2_RX_PKT_SIZE(mtu);
if (port->priv->percpu_pools)
goto out_set;
/* If port MTU is higher than 1518B:
* HW Long pool - SW Jumbo pool, HW Short pool - SW Long pool
* else: HW Long pool - SW Long pool, HW Short pool - SW Short pool
@ -866,6 +1000,7 @@ static int mvpp2_bm_update_mtu(struct net_device *dev, int mtu)
mvpp2_set_hw_csum(port, new_long_pool);
}
out_set:
dev->mtu = mtu;
dev->wanted_features = dev->features;
@ -3699,10 +3834,48 @@ static int mvpp2_set_mac_address(struct net_device *dev, void *p)
return err;
}
/* Shut down all the ports, reconfigure the pools as percpu or shared,
* then bring up again all ports.
*/
static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu)
{
int numbufs = MVPP2_BM_POOLS_NUM, i;
struct mvpp2_port *port = NULL;
bool status[MVPP2_MAX_PORTS];
for (i = 0; i < priv->port_count; i++) {
port = priv->port_list[i];
status[i] = netif_running(port->dev);
if (status[i])
mvpp2_stop(port->dev);
}
/* nrxqs is the same for all ports */
if (priv->percpu_pools)
numbufs = port->nrxqs * 2;
for (i = 0; i < numbufs; i++)
mvpp2_bm_pool_destroy(port->dev->dev.parent, priv, &priv->bm_pools[i]);
devm_kfree(port->dev->dev.parent, priv->bm_pools);
priv->percpu_pools = percpu;
mvpp2_bm_init(port->dev->dev.parent, priv);
for (i = 0; i < priv->port_count; i++) {
port = priv->port_list[i];
mvpp2_swf_bm_pool_init(port);
if (status[i])
mvpp2_open(port->dev);
}
return 0;
}
static int mvpp2_change_mtu(struct net_device *dev, int mtu)
{
struct mvpp2_port *port = netdev_priv(dev);
bool running = netif_running(dev);
struct mvpp2 *priv = port->priv;
int err;
if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) {
@ -3711,6 +3884,31 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu)
mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
}
if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) {
if (priv->percpu_pools) {
netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu);
mvpp2_bm_switch_buffers(priv, false);
}
} else {
bool jumbo = false;
int i;
for (i = 0; i < priv->port_count; i++)
if (priv->port_list[i] != port &&
MVPP2_RX_PKT_SIZE(priv->port_list[i]->dev->mtu) >
MVPP2_BM_LONG_PKT_SIZE) {
jumbo = true;
break;
}
/* No port is using jumbo frames */
if (!jumbo) {
dev_info(port->dev->dev.parent,
"all ports have a low MTU, switching to per-cpu buffers");
mvpp2_bm_switch_buffers(priv, true);
}
}
if (running)
mvpp2_stop_dev(port);
@ -5017,18 +5215,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
}
ntxqs = MVPP2_MAX_TXQ;
if (priv->hw_version == MVPP22 && queue_mode == MVPP2_QDIST_SINGLE_MODE) {
nrxqs = 1;
} else {
/* According to the PPv2.2 datasheet and our experiments on
* PPv2.1, RX queues have an allocation granularity of 4 (when
* more than a single one on PPv2.2).
* Round up to nearest multiple of 4.
*/
nrxqs = (num_possible_cpus() + 3) & ~0x3;
if (nrxqs > MVPP2_PORT_MAX_RXQ)
nrxqs = MVPP2_PORT_MAX_RXQ;
}
nrxqs = mvpp2_get_nrxqs(priv);
dev = alloc_etherdev_mqs(sizeof(*port), ntxqs, nrxqs);
if (!dev)
@ -5190,7 +5377,8 @@ static int mvpp2_port_probe(struct platform_device *pdev,
dev->features |= NETIF_F_NTUPLE;
}
mvpp2_set_hw_csum(port, port->pool_long->id);
if (!port->priv->percpu_pools)
mvpp2_set_hw_csum(port, port->pool_long->id);
dev->vlan_features |= features;
dev->gso_max_segs = MVPP2_MAX_TSO_SEGS;
@ -5482,7 +5670,7 @@ static int mvpp2_init(struct platform_device *pdev, struct mvpp2 *priv)
mvpp2_write(priv, MVPP2_TX_SNOOP_REG, 0x1);
/* Buffer Manager initialization */
err = mvpp2_bm_init(pdev, priv);
err = mvpp2_bm_init(&pdev->dev, priv);
if (err < 0)
return err;
@ -5568,6 +5756,10 @@ static int mvpp2_probe(struct platform_device *pdev)
priv->sysctrl_base = NULL;
}
if (priv->hw_version == MVPP22 &&
mvpp2_get_nrxqs(priv) * 2 <= MVPP2_BM_MAX_POOLS)
priv->percpu_pools = 1;
mvpp2_setup_bm_pool();
@ -5749,7 +5941,7 @@ static int mvpp2_remove(struct platform_device *pdev)
for (i = 0; i < MVPP2_BM_POOLS_NUM; i++) {
struct mvpp2_bm_pool *bm_pool = &priv->bm_pools[i];
mvpp2_bm_pool_destroy(pdev, priv, bm_pool);
mvpp2_bm_pool_destroy(&pdev->dev, priv, bm_pool);
}
for (i = 0; i < MVPP2_MAX_THREADS; i++) {