Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a smaller cycle than normal. One new driver was
  accepted, which is unusual, and at least one more driver remains in
  review on the list.

  Summary:

   - Driver fixes for hns, hfi1, nes, rxe, i40iw, mlx5, cxgb4,
     vmw_pvrdma

   - Many patches from MatthewW converting radix tree and IDR users to
     use xarray

   - Introduction of tracepoints to the MAD layer

   - Build large SGLs at the start for DMA mapping and get the driver to
     split them

   - Generally clean SGL handling code throughout the subsystem

   - Support for restricting RDMA devices to net namespaces for
     containers

   - Progress to remove object allocation boilerplate code from drivers

   - Change in how the mlx5 driver shows representor ports linked to VFs

   - mlx5 uapi feature to access the on chip SW ICM memory

   - Add a new driver for 'EFA'. This is HW that supports user space
     packet processing through QPs in Amazon's cloud"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (186 commits)
  RDMA/ipoib: Allow user space differentiate between valid dev_port
  IB/core, ipoib: Do not overreact to SM LID change event
  RDMA/device: Don't fire uevent before device is fully initialized
  lib/scatterlist: Remove leftover from sg_page_iter comment
  RDMA/efa: Add driver to Kconfig/Makefile
  RDMA/efa: Add the efa module
  RDMA/efa: Add EFA verbs implementation
  RDMA/efa: Add common command handlers
  RDMA/efa: Implement functions that submit and complete admin commands
  RDMA/efa: Add the ABI definitions
  RDMA/efa: Add the com service API definitions
  RDMA/efa: Add the efa_com.h file
  RDMA/efa: Add the efa.h header file
  RDMA/efa: Add EFA device definitions
  RDMA: Add EFA related definitions
  RDMA/umem: Remove hugetlb flag
  RDMA/bnxt_re: Use core helpers to get aligned DMA address
  RDMA/i40iw: Use core helpers to get aligned DMA address within a supported page size
  RDMA/verbs: Add a DMA iterator to return aligned contiguous memory blocks
  RDMA/umem: Add API to find best driver supported page size in an MR
  ...
This commit is contained in:
Linus Torvalds
2019-05-09 09:02:46 -07:00
251 changed files with 12549 additions and 4585 deletions

View File

@@ -745,6 +745,15 @@ S: Supported
F: Documentation/networking/device_drivers/amazon/ena.txt
F: drivers/net/ethernet/amazon/
AMAZON RDMA EFA DRIVER
M: Gal Pressman <galpress@amazon.com>
R: Yossi Leybovich <sleybo@amazon.com>
L: linux-rdma@vger.kernel.org
Q: https://patchwork.kernel.org/project/linux-rdma/list/
S: Supported
F: drivers/infiniband/hw/efa/
F: include/uapi/rdma/efa-abi.h
AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
M: Gary Hook <gary.hook@amd.com>
@@ -4279,7 +4288,7 @@ S: Supported
F: drivers/scsi/cxgbi/cxgb3i
CXGB3 IWARP RNIC DRIVER (IW_CXGB3)
M: Steve Wise <swise@chelsio.com>
M: Potnuri Bharat Teja <bharat@chelsio.com>
L: linux-rdma@vger.kernel.org
W: http://www.openfabrics.org
S: Supported
@@ -4308,7 +4317,7 @@ S: Supported
F: drivers/scsi/cxgbi/cxgb4i
CXGB4 IWARP RNIC DRIVER (IW_CXGB4)
M: Steve Wise <swise@chelsio.com>
M: Potnuri Bharat Teja <bharat@chelsio.com>
L: linux-rdma@vger.kernel.org
W: http://www.openfabrics.org
S: Supported
@@ -7727,6 +7736,10 @@ F: drivers/infiniband/
F: include/uapi/linux/if_infiniband.h
F: include/uapi/rdma/
F: include/rdma/
F: include/trace/events/ib_mad.h
F: include/trace/events/ib_umad.h
F: samples/bpf/ibumad_kern.c
F: samples/bpf/ibumad_user.c
INGENIC JZ4780 DMA Driver
M: Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com>

View File

@@ -93,6 +93,7 @@ source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
source "drivers/infiniband/hw/cxgb4/Kconfig"
source "drivers/infiniband/hw/efa/Kconfig"
source "drivers/infiniband/hw/i40iw/Kconfig"
source "drivers/infiniband/hw/mlx4/Kconfig"
source "drivers/infiniband/hw/mlx5/Kconfig"

View File

@@ -45,6 +45,7 @@
#include <net/ipv6_stubs.h>
#include <net/ip6_route.h>
#include <rdma/ib_addr.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_sa.h>
#include <rdma/ib.h>
#include <rdma/rdma_netlink.h>

View File

@@ -78,11 +78,22 @@ enum gid_table_entry_state {
GID_TABLE_ENTRY_PENDING_DEL = 3,
};
struct roce_gid_ndev_storage {
struct rcu_head rcu_head;
struct net_device *ndev;
};
struct ib_gid_table_entry {
struct kref kref;
struct work_struct del_work;
struct ib_gid_attr attr;
void *context;
/* Store the ndev pointer to release reference later on in
* call_rcu context because by that time gid_table_entry
* and attr might be already freed. So keep a copy of it.
* ndev_storage is freed by rcu callback.
*/
struct roce_gid_ndev_storage *ndev_storage;
enum gid_table_entry_state state;
};
@@ -206,6 +217,20 @@ static void schedule_free_gid(struct kref *kref)
queue_work(ib_wq, &entry->del_work);
}
static void put_gid_ndev(struct rcu_head *head)
{
struct roce_gid_ndev_storage *storage =
container_of(head, struct roce_gid_ndev_storage, rcu_head);
WARN_ON(!storage->ndev);
/* At this point its safe to release netdev reference,
* as all callers working on gid_attr->ndev are done
* using this netdev.
*/
dev_put(storage->ndev);
kfree(storage);
}
static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
{
struct ib_device *device = entry->attr.device;
@@ -228,8 +253,8 @@ static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
/* Now this index is ready to be allocated */
write_unlock_irq(&table->rwlock);
if (entry->attr.ndev)
dev_put(entry->attr.ndev);
if (entry->ndev_storage)
call_rcu(&entry->ndev_storage->rcu_head, put_gid_ndev);
kfree(entry);
}
@@ -266,14 +291,25 @@ static struct ib_gid_table_entry *
alloc_gid_entry(const struct ib_gid_attr *attr)
{
struct ib_gid_table_entry *entry;
struct net_device *ndev;
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return NULL;
ndev = rcu_dereference_protected(attr->ndev, 1);
if (ndev) {
entry->ndev_storage = kzalloc(sizeof(*entry->ndev_storage),
GFP_KERNEL);
if (!entry->ndev_storage) {
kfree(entry);
return NULL;
}
dev_hold(ndev);
entry->ndev_storage->ndev = ndev;
}
kref_init(&entry->kref);
memcpy(&entry->attr, attr, sizeof(*attr));
if (entry->attr.ndev)
dev_hold(entry->attr.ndev);
INIT_WORK(&entry->del_work, free_gid_work);
entry->state = GID_TABLE_ENTRY_INVALID;
return entry;
@@ -343,6 +379,7 @@ static int add_roce_gid(struct ib_gid_table_entry *entry)
static void del_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix)
{
struct roce_gid_ndev_storage *ndev_storage;
struct ib_gid_table_entry *entry;
lockdep_assert_held(&table->lock);
@@ -360,6 +397,13 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);
ndev_storage = entry->ndev_storage;
if (ndev_storage) {
entry->ndev_storage = NULL;
rcu_assign_pointer(entry->attr.ndev, NULL);
call_rcu(&ndev_storage->rcu_head, put_gid_ndev);
}
if (rdma_cap_roce_gid_table(ib_dev, port))
ib_dev->ops.del_gid(&entry->attr, &entry->context);
@@ -543,30 +587,11 @@ out_unlock:
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
union ib_gid *gid, struct ib_gid_attr *attr)
{
struct net_device *idev;
unsigned long mask;
int ret;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_NETDEV;
idev = ib_device_get_netdev(ib_dev, port);
if (idev && attr->ndev != idev) {
union ib_gid default_gid;
/* Adding default GIDs is not permitted */
make_default_gid(idev, &default_gid);
if (!memcmp(gid, &default_gid, sizeof(*gid))) {
dev_put(idev);
return -EPERM;
}
}
if (idev)
dev_put(idev);
mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_NETDEV;
ret = __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
return ret;
return __ib_cache_gid_add(ib_dev, port, gid, attr, mask, false);
}
static int
@@ -1263,11 +1288,72 @@ struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr)
read_lock_irqsave(&table->rwlock, flags);
valid = is_gid_entry_valid(table->data_vec[attr->index]);
if (valid && attr->ndev && (READ_ONCE(attr->ndev->flags) & IFF_UP))
ndev = attr->ndev;
if (valid) {
ndev = rcu_dereference(attr->ndev);
if (!ndev ||
(ndev && ((READ_ONCE(ndev->flags) & IFF_UP) == 0)))
ndev = ERR_PTR(-ENODEV);
}
read_unlock_irqrestore(&table->rwlock, flags);
return ndev;
}
EXPORT_SYMBOL(rdma_read_gid_attr_ndev_rcu);
static int get_lower_dev_vlan(struct net_device *lower_dev, void *data)
{
u16 *vlan_id = data;
if (is_vlan_dev(lower_dev))
*vlan_id = vlan_dev_vlan_id(lower_dev);
/* We are interested only in first level vlan device, so
* always return 1 to stop iterating over next level devices.
*/
return 1;
}
/**
* rdma_read_gid_l2_fields - Read the vlan ID and source MAC address
* of a GID entry.
*
* @attr: GID attribute pointer whose L2 fields to be read
* @vlan_id: Pointer to vlan id to fill up if the GID entry has
* vlan id. It is optional.
* @smac: Pointer to smac to fill up for a GID entry. It is optional.
*
* rdma_read_gid_l2_fields() returns 0 on success and returns vlan id
* (if gid entry has vlan) and source MAC, or returns error.
*/
int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
u16 *vlan_id, u8 *smac)
{
struct net_device *ndev;
rcu_read_lock();
ndev = rcu_dereference(attr->ndev);
if (!ndev) {
rcu_read_unlock();
return -ENODEV;
}
if (smac)
ether_addr_copy(smac, ndev->dev_addr);
if (vlan_id) {
*vlan_id = 0xffff;
if (is_vlan_dev(ndev)) {
*vlan_id = vlan_dev_vlan_id(ndev);
} else {
/* If the netdev is upper device and if it's lower
* device is vlan device, consider vlan id of the
* the lower vlan device for this gid entry.
*/
netdev_walk_all_lower_dev_rcu(attr->ndev,
get_lower_dev_vlan, vlan_id);
}
}
rcu_read_unlock();
return 0;
}
EXPORT_SYMBOL(rdma_read_gid_l2_fields);
static int config_non_roce_gid_cache(struct ib_device *device,
u8 port, int gid_tbl_len)
@@ -1392,7 +1478,6 @@ static void ib_cache_event(struct ib_event_handler *handler,
event->event == IB_EVENT_PORT_ACTIVE ||
event->event == IB_EVENT_LID_CHANGE ||
event->event == IB_EVENT_PKEY_CHANGE ||
event->event == IB_EVENT_SM_CHANGE ||
event->event == IB_EVENT_CLIENT_REREGISTER ||
event->event == IB_EVENT_GID_CHANGE) {
work = kmalloc(sizeof *work, GFP_ATOMIC);

View File

@@ -52,6 +52,7 @@
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include "cm_msgs.h"
#include "core_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
@@ -124,7 +125,8 @@ static struct ib_cm {
struct rb_root remote_qp_table;
struct rb_root remote_id_table;
struct rb_root remote_sidr_table;
struct idr local_id_table;
struct xarray local_id_table;
u32 local_id_next;
__be32 random_id_operand;
struct list_head timewait_list;
struct workqueue_struct *wq;
@@ -219,7 +221,6 @@ struct cm_port {
struct cm_device {
struct list_head list;
struct ib_device *ib_device;
struct device *device;
u8 ack_delay;
int going_down;
struct cm_port *port[0];
@@ -598,35 +599,31 @@ static int cm_init_av_by_path(struct sa_path_rec *path,
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
{
unsigned long flags;
int id;
int err;
u32 id;
idr_preload(GFP_KERNEL);
spin_lock_irqsave(&cm.lock, flags);
id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
spin_unlock_irqrestore(&cm.lock, flags);
idr_preload_end();
err = xa_alloc_cyclic_irq(&cm.local_id_table, &id, cm_id_priv,
xa_limit_32b, &cm.local_id_next, GFP_KERNEL);
cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
return id < 0 ? id : 0;
return err;
}
static u32 cm_local_id(__be32 local_id)
{
return (__force u32) (local_id ^ cm.random_id_operand);
}
static void cm_free_id(__be32 local_id)
{
spin_lock_irq(&cm.lock);
idr_remove(&cm.local_id_table,
(__force int) (local_id ^ cm.random_id_operand));
spin_unlock_irq(&cm.lock);
xa_erase_irq(&cm.local_id_table, cm_local_id(local_id));
}
static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
{
struct cm_id_private *cm_id_priv;
cm_id_priv = idr_find(&cm.local_id_table,
(__force int) (local_id ^ cm.random_id_operand));
cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
if (cm_id_priv) {
if (cm_id_priv->id.remote_id == remote_id)
atomic_inc(&cm_id_priv->refcount);
@@ -1988,11 +1985,12 @@ static int cm_req_handler(struct cm_work *work)
grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
gid_attr = grh->sgid_attr;
if (gid_attr && gid_attr->ndev) {
if (gid_attr &&
rdma_protocol_roce(work->port->cm_dev->ib_device,
work->port->port_num)) {
work->path[0].rec_type =
sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
} else {
/* If no GID attribute or ndev is null, it is not RoCE. */
cm_path_set_rec_type(work->port->cm_dev->ib_device,
work->port->port_num,
&work->path[0],
@@ -2824,9 +2822,8 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
spin_unlock_irq(&cm.lock);
return NULL;
}
cm_id_priv = idr_find(&cm.local_id_table, (__force int)
(timewait_info->work.local_id ^
cm.random_id_operand));
cm_id_priv = xa_load(&cm.local_id_table,
cm_local_id(timewait_info->work.local_id));
if (cm_id_priv) {
if (cm_id_priv->id.remote_id == remote_id)
atomic_inc(&cm_id_priv->refcount);
@@ -4276,18 +4273,6 @@ static struct kobj_type cm_counter_obj_type = {
.default_attrs = cm_counter_default_attrs
};
static void cm_release_port_obj(struct kobject *obj)
{
struct cm_port *cm_port;
cm_port = container_of(obj, struct cm_port, port_obj);
kfree(cm_port);
}
static struct kobj_type cm_port_obj_type = {
.release = cm_release_port_obj
};
static char *cm_devnode(struct device *dev, umode_t *mode)
{
if (mode)
@@ -4306,19 +4291,12 @@ static int cm_create_port_fs(struct cm_port *port)
{
int i, ret;
ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
&port->cm_dev->device->kobj,
"%d", port->port_num);
if (ret) {
kfree(port);
return ret;
}
for (i = 0; i < CM_COUNTER_GROUPS; i++) {
ret = kobject_init_and_add(&port->counter_group[i].obj,
&cm_counter_obj_type,
&port->port_obj,
"%s", counter_group_names[i]);
ret = ib_port_register_module_stat(port->cm_dev->ib_device,
port->port_num,
&port->counter_group[i].obj,
&cm_counter_obj_type,
counter_group_names[i]);
if (ret)
goto error;
}
@@ -4327,8 +4305,7 @@ static int cm_create_port_fs(struct cm_port *port)
error:
while (i--)
kobject_put(&port->counter_group[i].obj);
kobject_put(&port->port_obj);
ib_port_unregister_module_stat(&port->counter_group[i].obj);
return ret;
}
@@ -4338,9 +4315,8 @@ static void cm_remove_port_fs(struct cm_port *port)
int i;
for (i = 0; i < CM_COUNTER_GROUPS; i++)
kobject_put(&port->counter_group[i].obj);
ib_port_unregister_module_stat(&port->counter_group[i].obj);
kobject_put(&port->port_obj);
}
static void cm_add_one(struct ib_device *ib_device)
@@ -4367,13 +4343,6 @@ static void cm_add_one(struct ib_device *ib_device)
cm_dev->ib_device = ib_device;
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
"%s", dev_name(&ib_device->dev));
if (IS_ERR(cm_dev->device)) {
kfree(cm_dev);
return;
}
set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
for (i = 1; i <= ib_device->phys_port_cnt; i++) {
@@ -4440,7 +4409,6 @@ error1:
cm_remove_port_fs(port);
}
free:
device_unregister(cm_dev->device);
kfree(cm_dev);
}
@@ -4494,7 +4462,6 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data)
cm_remove_port_fs(port);
}
device_unregister(cm_dev->device);
kfree(cm_dev);
}
@@ -4502,7 +4469,6 @@ static int __init ib_cm_init(void)
{
int ret;
memset(&cm, 0, sizeof cm);
INIT_LIST_HEAD(&cm.device_list);
rwlock_init(&cm.device_lock);
spin_lock_init(&cm.lock);
@@ -4512,7 +4478,7 @@ static int __init ib_cm_init(void)
cm.remote_id_table = RB_ROOT;
cm.remote_qp_table = RB_ROOT;
cm.remote_sidr_table = RB_ROOT;
idr_init(&cm.local_id_table);
xa_init_flags(&cm.local_id_table, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
INIT_LIST_HEAD(&cm.timewait_list);
@@ -4538,7 +4504,6 @@ error3:
error2:
class_unregister(&cm_class);
error1:
idr_destroy(&cm.local_id_table);
return ret;
}
@@ -4560,9 +4525,8 @@ static void __exit ib_cm_cleanup(void)
}
class_unregister(&cm_class);
idr_destroy(&cm.local_id_table);
WARN_ON(!xa_empty(&cm.local_id_table));
}
module_init(ib_cm_init);
module_exit(ib_cm_cleanup);

View File

@@ -98,7 +98,7 @@ struct cm_req_msg {
u32 private_data[IB_CM_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
} __attribute__ ((packed));
} __packed;
static inline __be32 cm_req_get_local_qpn(struct cm_req_msg *req_msg)
{
@@ -423,7 +423,7 @@ enum cm_msg_response {
u8 private_data[IB_CM_MRA_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
static inline u8 cm_mra_get_msg_mraed(struct cm_mra_msg *mra_msg)
{
@@ -461,7 +461,7 @@ struct cm_rej_msg {
u8 private_data[IB_CM_REJ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
static inline u8 cm_rej_get_msg_rejected(struct cm_rej_msg *rej_msg)
{
@@ -506,7 +506,7 @@ struct cm_rep_msg {
u8 private_data[IB_CM_REP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
static inline __be32 cm_rep_get_local_qpn(struct cm_rep_msg *rep_msg)
{
@@ -614,7 +614,7 @@ struct cm_rtu_msg {
u8 private_data[IB_CM_RTU_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
struct cm_dreq_msg {
struct ib_mad_hdr hdr;
@@ -626,7 +626,7 @@ struct cm_dreq_msg {
u8 private_data[IB_CM_DREQ_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
static inline __be32 cm_dreq_get_remote_qpn(struct cm_dreq_msg *dreq_msg)
{
@@ -647,7 +647,7 @@ struct cm_drep_msg {
u8 private_data[IB_CM_DREP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
struct cm_lap_msg {
struct ib_mad_hdr hdr;
@@ -675,7 +675,7 @@ struct cm_lap_msg {
u8 offset63;
u8 private_data[IB_CM_LAP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
static inline __be32 cm_lap_get_remote_qpn(struct cm_lap_msg *lap_msg)
{
@@ -784,7 +784,7 @@ struct cm_apr_msg {
u8 info[IB_CM_APR_INFO_LENGTH];
u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
struct cm_sidr_req_msg {
struct ib_mad_hdr hdr;
@@ -795,7 +795,7 @@ struct cm_sidr_req_msg {
__be64 service_id;
u32 private_data[IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE / sizeof(u32)];
} __attribute__ ((packed));
} __packed;
struct cm_sidr_rep_msg {
struct ib_mad_hdr hdr;
@@ -811,7 +811,7 @@ struct cm_sidr_rep_msg {
u8 info[IB_CM_SIDR_REP_INFO_LENGTH];
u8 private_data[IB_CM_SIDR_REP_PRIVATE_DATA_SIZE];
} __attribute__ ((packed));
} __packed;
static inline __be32 cm_sidr_rep_get_qpn(struct cm_sidr_rep_msg *sidr_rep_msg)
{

View File

@@ -39,7 +39,7 @@
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/igmp.h>
#include <linux/idr.h>
#include <linux/xarray.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
#include <linux/module.h>
@@ -191,10 +191,10 @@ static struct workqueue_struct *cma_wq;
static unsigned int cma_pernet_id;
struct cma_pernet {
struct idr tcp_ps;
struct idr udp_ps;
struct idr ipoib_ps;
struct idr ib_ps;
struct xarray tcp_ps;
struct xarray udp_ps;
struct xarray ipoib_ps;
struct xarray ib_ps;
};
static struct cma_pernet *cma_pernet(struct net *net)
@@ -202,7 +202,8 @@ static struct cma_pernet *cma_pernet(struct net *net)
return net_generic(net, cma_pernet_id);
}
static struct idr *cma_pernet_idr(struct net *net, enum rdma_ucm_port_space ps)
static
struct xarray *cma_pernet_xa(struct net *net, enum rdma_ucm_port_space ps)
{
struct cma_pernet *pernet = cma_pernet(net);
@@ -247,25 +248,25 @@ struct class_port_info_context {
static int cma_ps_alloc(struct net *net, enum rdma_ucm_port_space ps,
struct rdma_bind_list *bind_list, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
struct xarray *xa = cma_pernet_xa(net, ps);
return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
return xa_insert(xa, snum, bind_list, GFP_KERNEL);
}
static struct rdma_bind_list *cma_ps_find(struct net *net,
enum rdma_ucm_port_space ps, int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
struct xarray *xa = cma_pernet_xa(net, ps);
return idr_find(idr, snum);
return xa_load(xa, snum);
}
static void cma_ps_remove(struct net *net, enum rdma_ucm_port_space ps,
int snum)
{
struct idr *idr = cma_pernet_idr(net, ps);
struct xarray *xa = cma_pernet_xa(net, ps);
idr_remove(idr, snum);
xa_erase(xa, snum);
}
enum {
@@ -615,6 +616,9 @@ cma_validate_port(struct ib_device *device, u8 port,
int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
return ERR_PTR(-ENODEV);
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
return ERR_PTR(-ENODEV);
@@ -1173,18 +1177,31 @@ static inline bool cma_any_addr(const struct sockaddr *addr)
return cma_zero_addr(addr) || cma_loopback_addr(addr);
}
static int cma_addr_cmp(struct sockaddr *src, struct sockaddr *dst)
static int cma_addr_cmp(const struct sockaddr *src, const struct sockaddr *dst)
{
if (src->sa_family != dst->sa_family)
return -1;
switch (src->sa_family) {
case AF_INET:
return ((struct sockaddr_in *) src)->sin_addr.s_addr !=
((struct sockaddr_in *) dst)->sin_addr.s_addr;
case AF_INET6:
return ipv6_addr_cmp(&((struct sockaddr_in6 *) src)->sin6_addr,
&((struct sockaddr_in6 *) dst)->sin6_addr);
return ((struct sockaddr_in *)src)->sin_addr.s_addr !=
((struct sockaddr_in *)dst)->sin_addr.s_addr;
case AF_INET6: {
struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *)src;
struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *)dst;
bool link_local;
if (ipv6_addr_cmp(&src_addr6->sin6_addr,
&dst_addr6->sin6_addr))
return 1;
link_local = ipv6_addr_type(&dst_addr6->sin6_addr) &
IPV6_ADDR_LINKLOCAL;
/* Link local must match their scope_ids */
return link_local ? (src_addr6->sin6_scope_id !=
dst_addr6->sin6_scope_id) :
0;
}
default:
return ib_addr_cmp(&((struct sockaddr_ib *) src)->sib_addr,
&((struct sockaddr_ib *) dst)->sib_addr);
@@ -1469,6 +1486,7 @@ static struct net_device *
roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
{
const struct ib_gid_attr *sgid_attr = NULL;
struct net_device *ndev;
if (ib_event->event == IB_CM_REQ_RECEIVED)
sgid_attr = ib_event->param.req_rcvd.ppath_sgid_attr;
@@ -1477,8 +1495,15 @@ roce_get_net_dev_by_cm_event(const struct ib_cm_event *ib_event)
if (!sgid_attr)
return NULL;
dev_hold(sgid_attr->ndev);
return sgid_attr->ndev;
rcu_read_lock();
ndev = rdma_read_gid_attr_ndev_rcu(sgid_attr);
if (IS_ERR(ndev))
ndev = NULL;
else
dev_hold(ndev);
rcu_read_unlock();
return ndev;
}
static struct net_device *cma_get_net_dev(const struct ib_cm_event *ib_event,
@@ -3247,7 +3272,7 @@ static int cma_alloc_port(enum rdma_ucm_port_space ps,
goto err;
bind_list->ps = ps;
bind_list->port = (unsigned short)ret;
bind_list->port = snum;
cma_bind_port(bind_list, id_priv);
return 0;
err:
@@ -4655,10 +4680,10 @@ static int cma_init_net(struct net *net)
{
struct cma_pernet *pernet = cma_pernet(net);
idr_init(&pernet->tcp_ps);
idr_init(&pernet->udp_ps);
idr_init(&pernet->ipoib_ps);
idr_init(&pernet->ib_ps);
xa_init(&pernet->tcp_ps);
xa_init(&pernet->udp_ps);
xa_init(&pernet->ipoib_ps);
xa_init(&pernet->ib_ps);
return 0;
}
@@ -4667,10 +4692,10 @@ static void cma_exit_net(struct net *net)
{
struct cma_pernet *pernet = cma_pernet(net);
idr_destroy(&pernet->tcp_ps);
idr_destroy(&pernet->udp_ps);
idr_destroy(&pernet->ipoib_ps);
idr_destroy(&pernet->ib_ps);
WARN_ON(!xa_empty(&pernet->tcp_ps));
WARN_ON(!xa_empty(&pernet->udp_ps));
WARN_ON(!xa_empty(&pernet->ipoib_ps));
WARN_ON(!xa_empty(&pernet->ib_ps));
}
static struct pernet_operations cma_pernet_operations = {

View File

@@ -55,6 +55,7 @@ struct pkey_index_qp_list {
};
extern const struct attribute_group ib_dev_attr_group;
extern bool ib_devices_shared_netns;
int ib_device_register_sysfs(struct ib_device *device);
void ib_device_unregister_sysfs(struct ib_device *device);
@@ -279,7 +280,8 @@ static inline void ib_mad_agent_security_change(void)
}
#endif
struct ib_device *ib_device_get_by_index(u32 ifindex);
struct ib_device *ib_device_get_by_index(const struct net *net, u32 index);
/* RDMA device netlink */
void nldev_init(void);
void nldev_exit(void);
@@ -302,6 +304,7 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
qp->device = dev;
qp->pd = pd;
qp->uobject = uobj;
qp->real_qp = qp;
/*
* We don't track XRC QPs for now, because they don't have PD
* and more importantly they are created internaly by driver,
@@ -336,4 +339,17 @@ int roce_resolve_route_from_path(struct sa_path_rec *rec,
const struct ib_gid_attr *attr);
struct net_device *rdma_read_gid_attr_ndev_rcu(const struct ib_gid_attr *attr);
void ib_free_port_attrs(struct ib_core_device *coredev);
int ib_setup_port_attrs(struct ib_core_device *coredev);
int rdma_compatdev_set(u8 enable);
int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
struct kobject *kobj, struct kobj_type *ktype,
const char *name);
void ib_port_unregister_module_stat(struct kobject *kobj);
int ib_device_set_netns_put(struct sk_buff *skb,
struct ib_device *dev, u32 ns_fd);
#endif /* _CORE_PRIV_H */

View File

@@ -128,15 +128,17 @@ static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
* @caller: module owner name.
* @udata: Valid user data or NULL for kernel object
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx, const char *caller)
struct ib_cq *__ib_alloc_cq_user(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector,
enum ib_poll_context poll_ctx,
const char *caller, struct ib_udata *udata)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
@@ -145,7 +147,7 @@ struct ib_cq *__ib_alloc_cq(struct ib_device *dev, void *private,
struct ib_cq *cq;
int ret = -ENOMEM;
cq = dev->ops.create_cq(dev, &cq_attr, NULL, NULL);
cq = dev->ops.create_cq(dev, &cq_attr, NULL);
if (IS_ERR(cq))
return cq;
@@ -193,16 +195,17 @@ out_free_wc:
kfree(cq->wc);
rdma_restrack_del(&cq->res);
out_destroy_cq:
cq->device->ops.destroy_cq(cq);
cq->device->ops.destroy_cq(cq, udata);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(__ib_alloc_cq);
EXPORT_SYMBOL(__ib_alloc_cq_user);
/**
* ib_free_cq - free a completion queue
* @cq: completion queue to free.
* @udata: User data or NULL for kernel object
*/
void ib_free_cq(struct ib_cq *cq)
void ib_free_cq_user(struct ib_cq *cq, struct ib_udata *udata)
{
int ret;
@@ -225,7 +228,7 @@ void ib_free_cq(struct ib_cq *cq)
kfree(cq->wc);
rdma_restrack_del(&cq->res);
ret = cq->device->ops.destroy_cq(cq);
ret = cq->device->ops.destroy_cq(cq, udata);
WARN_ON_ONCE(ret);
}
EXPORT_SYMBOL(ib_free_cq);
EXPORT_SYMBOL(ib_free_cq_user);

File diff suppressed because it is too large Load Diff

View File

@@ -394,7 +394,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
/* destroy the listening endpoint */
cm_id->device->iwcm->destroy_listen(cm_id);
cm_id->device->ops.iw_destroy_listen(cm_id);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_ESTABLISHED:
@@ -417,7 +417,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
*/
cm_id_priv->state = IW_CM_STATE_DESTROYING;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
cm_id->device->iwcm->reject(cm_id, NULL, 0);
cm_id->device->ops.iw_reject(cm_id, NULL, 0);
spin_lock_irqsave(&cm_id_priv->lock, flags);
break;
case IW_CM_STATE_CONN_SENT:
@@ -427,7 +427,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
break;
}
if (cm_id_priv->qp) {
cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
cm_id_priv->qp = NULL;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -504,7 +504,7 @@ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
{
const char *devname = dev_name(&cm_id->device->dev);
const char *ifname = cm_id->device->iwcm->ifname;
const char *ifname = cm_id->device->iw_ifname;
struct iwpm_dev_data pm_reg_msg = {};
struct iwpm_sa_data pm_msg;
int status;
@@ -526,7 +526,7 @@ static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
cm_id->mapped = true;
pm_msg.loc_addr = cm_id->local_addr;
pm_msg.rem_addr = cm_id->remote_addr;
pm_msg.flags = (cm_id->device->iwcm->driver_flags & IW_F_NO_PORT_MAP) ?
pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ?
IWPM_FLAGS_NO_PORT_MAP : 0;
if (active)
status = iwpm_add_and_query_mapping(&pm_msg,
@@ -577,7 +577,8 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = iw_cm_map(cm_id, false);
if (!ret)
ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
ret = cm_id->device->ops.iw_create_listen(cm_id,
backlog);
if (ret)
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
@@ -617,7 +618,7 @@ int iw_cm_reject(struct iw_cm_id *cm_id,
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = cm_id->device->iwcm->reject(cm_id, private_data,
ret = cm_id->device->ops.iw_reject(cm_id, private_data,
private_data_len);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
@@ -653,25 +654,25 @@ int iw_cm_accept(struct iw_cm_id *cm_id,
return -EINVAL;
}
/* Get the ib_qp given the QPN */
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
wake_up_all(&cm_id_priv->connect_wait);
return -EINVAL;
}
cm_id->device->iwcm->add_ref(qp);
cm_id->device->ops.iw_add_ref(qp);
cm_id_priv->qp = qp;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = cm_id->device->iwcm->accept(cm_id, iw_param);
ret = cm_id->device->ops.iw_accept(cm_id, iw_param);
if (ret) {
/* An error on accept precludes provider events */
BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
cm_id_priv->state = IW_CM_STATE_IDLE;
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id_priv->qp) {
cm_id->device->iwcm->rem_ref(qp);
cm_id->device->ops.iw_rem_ref(qp);
cm_id_priv->qp = NULL;
}
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
@@ -712,25 +713,25 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
}
/* Get the ib_qp given the QPN */
qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn);
if (!qp) {
ret = -EINVAL;
goto err;
}
cm_id->device->iwcm->add_ref(qp);
cm_id->device->ops.iw_add_ref(qp);
cm_id_priv->qp = qp;
cm_id_priv->state = IW_CM_STATE_CONN_SENT;
spin_unlock_irqrestore(&cm_id_priv->lock, flags);
ret = iw_cm_map(cm_id, true);
if (!ret)
ret = cm_id->device->iwcm->connect(cm_id, iw_param);
ret = cm_id->device->ops.iw_connect(cm_id, iw_param);
if (!ret)
return 0; /* success */
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id_priv->qp) {
cm_id->device->iwcm->rem_ref(qp);
cm_id->device->ops.iw_rem_ref(qp);
cm_id_priv->qp = NULL;
}
cm_id_priv->state = IW_CM_STATE_IDLE;
@@ -895,7 +896,7 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
} else {
/* REJECTED or RESET */
cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
cm_id_priv->qp = NULL;
cm_id_priv->state = IW_CM_STATE_IDLE;
}
@@ -946,7 +947,7 @@ static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
spin_lock_irqsave(&cm_id_priv->lock, flags);
if (cm_id_priv->qp) {
cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
cm_id_priv->id.device->ops.iw_rem_ref(cm_id_priv->qp);
cm_id_priv->qp = NULL;
}
switch (cm_id_priv->state) {

View File

@@ -3,7 +3,7 @@
* Copyright (c) 2005 Intel Corporation. All rights reserved.
* Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2009 HNR Consulting. All rights reserved.
* Copyright (c) 2014 Intel Corporation. All rights reserved.
* Copyright (c) 2014,2018 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
@@ -38,10 +38,10 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/dma-mapping.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/security.h>
#include <linux/xarray.h>
#include <rdma/ib_cache.h>
#include "mad_priv.h"
@@ -51,6 +51,32 @@
#include "opa_smi.h"
#include "agent.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ib_mad.h>
#ifdef CONFIG_TRACEPOINTS
static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
struct ib_mad_qp_info *qp_info,
struct trace_event_raw_ib_mad_send_template *entry)
{
u16 pkey;
struct ib_device *dev = qp_info->port_priv->device;
u8 pnum = qp_info->port_priv->port_num;
struct ib_ud_wr *wr = &mad_send_wr->send_wr;
struct rdma_ah_attr attr = {};
rdma_query_ah(wr->ah, &attr);
/* These are common */
entry->sl = attr.sl;
ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
entry->pkey = pkey;
entry->rqpn = wr->remote_qpn;
entry->rqkey = wr->remote_qkey;
entry->dlid = rdma_ah_get_dlid(&attr);
}
#endif
static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
@@ -59,12 +85,9 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
/*
* The mlx4 driver uses the top byte to distinguish which virtual function
* generated the MAD, so we must avoid using it.
*/
#define AGENT_ID_LIMIT (1 << 24)
static DEFINE_IDR(ib_mad_clients);
/* Client ID 0 is used for snoop-only clients */
static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
static u32 ib_mad_client_next;
static struct list_head ib_mad_port_list;
/* Port list lock */
@@ -389,18 +412,17 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
goto error4;
}
idr_preload(GFP_KERNEL);
idr_lock(&ib_mad_clients);
ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0,
AGENT_ID_LIMIT, GFP_ATOMIC);
idr_unlock(&ib_mad_clients);
idr_preload_end();
/*
* The mlx4 driver uses the top byte to distinguish which virtual
* function generated the MAD, so we must avoid using it.
*/
ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid,
mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1),
&ib_mad_client_next, GFP_KERNEL);
if (ret2 < 0) {
ret = ERR_PTR(ret2);
goto error5;
}
mad_agent_priv->agent.hi_tid = ret2;
/*
* Make sure MAD registration (if supplied)
@@ -445,12 +467,11 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
}
spin_unlock_irq(&port_priv->reg_lock);
trace_ib_mad_create_agent(mad_agent_priv);
return &mad_agent_priv->agent;
error6:
spin_unlock_irq(&port_priv->reg_lock);
idr_lock(&ib_mad_clients);
idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
idr_unlock(&ib_mad_clients);
xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
error5:
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
error4:
@@ -602,6 +623,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
struct ib_mad_port_private *port_priv;
/* Note that we could still be handling received MADs */
trace_ib_mad_unregister_agent(mad_agent_priv);
/*
* Canceling all sends results in dropping received response
@@ -614,9 +636,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
spin_lock_irq(&port_priv->reg_lock);
remove_mad_reg_req(mad_agent_priv);
spin_unlock_irq(&port_priv->reg_lock);
idr_lock(&ib_mad_clients);
idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
idr_unlock(&ib_mad_clients);
xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
flush_workqueue(port_priv->wq);
ib_cancel_rmpp_recvs(mad_agent_priv);
@@ -821,6 +841,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
u32 opa_drslid;
trace_ib_mad_handle_out_opa_smi(opa_smp);
if ((opa_get_smp_direction(opa_smp)
? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
OPA_LID_PERMISSIVE &&
@@ -846,6 +868,8 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
goto out;
} else {
trace_ib_mad_handle_out_ib_smi(smp);
if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
IB_LID_PERMISSIVE &&
smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
@@ -1223,6 +1247,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
spin_lock_irqsave(&qp_info->send_queue.lock, flags);
if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
trace_ib_mad_ib_send_mad(mad_send_wr, qp_info);
ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
NULL);
list = &qp_info->send_queue.list;
@@ -1756,7 +1781,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
*/
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
rcu_read_lock();
mad_agent = idr_find(&ib_mad_clients, hi_tid);
mad_agent = xa_load(&ib_mad_clients, hi_tid);
if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
mad_agent = NULL;
rcu_read_unlock();
@@ -2077,6 +2102,8 @@ static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv
enum smi_forward_action retsmi;
struct ib_smp *smp = (struct ib_smp *)recv->mad;
trace_ib_mad_handle_ib_smi(smp);
if (smi_handle_dr_smp_recv(smp,
rdma_cap_ib_switch(port_priv->device),
port_num,
@@ -2162,6 +2189,8 @@ handle_opa_smi(struct ib_mad_port_private *port_priv,
enum smi_forward_action retsmi;
struct opa_smp *smp = (struct opa_smp *)recv->mad;
trace_ib_mad_handle_opa_smi(smp);
if (opa_smi_handle_dr_smp_recv(smp,
rdma_cap_ib_switch(port_priv->device),
port_num,
@@ -2286,6 +2315,9 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
goto out;
trace_ib_mad_recv_done_handler(qp_info, wc,
(struct ib_mad_hdr *)recv->mad);
mad_size = recv->mad_size;
response = alloc_mad_private(mad_size, GFP_KERNEL);
if (!response)
@@ -2332,6 +2364,7 @@ static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
if (mad_agent) {
trace_ib_mad_recv_done_agent(mad_agent);
ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
/*
* recv is freed up in error cases in ib_mad_complete_recv
@@ -2496,6 +2529,9 @@ static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
send_queue = mad_list->mad_queue;
qp_info = send_queue->qp_info;
trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv);
trace_ib_mad_send_done_handler(mad_send_wr, wc);
retry:
ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
mad_send_wr->header_mapping,
@@ -2527,6 +2563,7 @@ retry:
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
if (queued_send_wr) {
trace_ib_mad_send_done_resend(queued_send_wr, qp_info);
ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
NULL);
if (ret) {
@@ -2574,6 +2611,7 @@ static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
if (mad_send_wr->retry) {
/* Repost send */
mad_send_wr->retry = 0;
trace_ib_mad_error_handler(mad_send_wr, qp_info);
ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
NULL);
if (!ret)
@@ -3356,9 +3394,6 @@ int ib_mad_init(void)
INIT_LIST_HEAD(&ib_mad_port_list);
/* Client ID 0 is used for snoop-only clients */
idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL);
if (ib_register_client(&mad_client)) {
pr_err("Couldn't register ib_mad client\n");
return -EINVAL;

View File

@@ -73,14 +73,14 @@ struct ib_mad_private_header {
struct ib_mad_recv_wc recv_wc;
struct ib_wc wc;
u64 mapping;
} __attribute__ ((packed));
} __packed;
struct ib_mad_private {
struct ib_mad_private_header header;
size_t mad_size;
struct ib_grh grh;
u8 mad[0];
} __attribute__ ((packed));
} __packed;
struct ib_rmpp_segment {
struct list_head list;

View File

@@ -804,7 +804,6 @@ static void mcast_event_handler(struct ib_event_handler *handler,
switch (event->event) {
case IB_EVENT_PORT_ERR:
case IB_EVENT_LID_CHANGE:
case IB_EVENT_SM_CHANGE:
case IB_EVENT_CLIENT_REREGISTER:
mcast_groups_event(&dev->port[index], MCAST_GROUP_ERROR);
break;

View File

@@ -116,6 +116,10 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
@@ -198,6 +202,8 @@ static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
{
char fw[IB_FW_VERSION_NAME_MAX];
int ret = 0;
u8 port;
if (fill_nldev_handle(msg, device))
return -EMSGSIZE;
@@ -226,7 +232,25 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
return -EMSGSIZE;
return 0;
/*
* Link type is determined on first port and mlx4 device
* which can potentially have two different link type for the same
* IB device is considered as better to be avoided in the future,
*/
port = rdma_start_port(device);
if (rdma_cap_opa_mad(device, port))
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
else if (rdma_protocol_ib(device, port))
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
else if (rdma_protocol_iwarp(device, port))
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
else if (rdma_protocol_roce(device, port))
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
else if (rdma_protocol_usnic(device, port))
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
"usnic");
return ret;
}
static int fill_port_info(struct sk_buff *msg,
@@ -615,7 +639,7 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
device = ib_device_get_by_index(sock_net(skb->sk), index);
if (!device)
return -EINVAL;
@@ -659,7 +683,7 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
device = ib_device_get_by_index(sock_net(skb->sk), index);
if (!device)
return -EINVAL;
@@ -669,9 +693,20 @@ static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
IB_DEVICE_NAME_MAX);
err = ib_device_rename(device, name);
goto done;
}
if (tb[RDMA_NLDEV_NET_NS_FD]) {
u32 ns_fd;
ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
err = ib_device_set_netns_put(skb, device, ns_fd);
goto put_done;
}
done:
ib_device_put(device);
put_done:
return err;
}
@@ -707,7 +742,7 @@ static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
/*
* There is no need to take lock, because
* we are relying on ib_core's lists_rwsem
* we are relying on ib_core's locking.
*/
return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
}
@@ -730,7 +765,7 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
device = ib_device_get_by_index(sock_net(skb->sk), index);
if (!device)
return -EINVAL;
@@ -784,7 +819,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
return -EINVAL;
ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(ifindex);
device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
if (!device)
return -EINVAL;
@@ -839,7 +874,7 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
device = ib_device_get_by_index(sock_net(skb->sk), index);
if (!device)
return -EINVAL;
@@ -887,7 +922,6 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
nlmsg_cancel(skb, nlh);
goto out;
}
nlmsg_end(skb, nlh);
idx++;
@@ -988,7 +1022,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
device = ib_device_get_by_index(sock_net(skb->sk), index);
if (!device)
return -EINVAL;
@@ -1085,7 +1119,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
device = ib_device_get_by_index(sock_net(skb->sk), index);
if (!device)
return -EINVAL;
@@ -1300,7 +1334,7 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
device = ib_device_get_by_index(index);
device = ib_device_get_by_index(sock_net(skb->sk), index);
if (!device)
return -EINVAL;
@@ -1313,6 +1347,55 @@ static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
static int nldev_get_sys_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
struct nlmsghdr *nlh;
int err;
err = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, NULL);
if (err)
return err;
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
RDMA_NLDEV_CMD_SYS_GET),
0, 0);
err = nla_put_u8(skb, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
(u8)ib_devices_shared_netns);
if (err) {
nlmsg_cancel(skb, nlh);
return err;
}
nlmsg_end(skb, nlh);
return skb->len;
}
static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
u8 enable;
int err;
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
nldev_policy, extack);
if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
return -EINVAL;
enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
/* Only 0 and 1 are supported */
if (enable > 1)
return -EINVAL;
err = rdma_compatdev_set(enable);
return err;
}
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
[RDMA_NLDEV_CMD_GET] = {
.doit = nldev_get_doit,
@@ -1358,6 +1441,13 @@ static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
.doit = nldev_res_get_pd_doit,
.dump = nldev_res_get_pd_dumpit,
},
[RDMA_NLDEV_CMD_SYS_GET] = {
.dump = nldev_get_sys_get_dumpit,
},
[RDMA_NLDEV_CMD_SYS_SET] = {
.doit = nldev_set_sys_set_doit,
.flags = RDMA_NL_ADMIN_PERM,
},
};
void __init nldev_init(void)

View File

@@ -125,9 +125,10 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj,
* and consumes the kref on the uobj.
*/
static int uverbs_destroy_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason reason)
enum rdma_remove_reason reason,
struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_file *ufile = uobj->ufile;
struct ib_uverbs_file *ufile = attrs->ufile;
unsigned long flags;
int ret;
@@ -135,7 +136,8 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
assert_uverbs_usecnt(uobj, UVERBS_LOOKUP_WRITE);
if (uobj->object) {
ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason);
ret = uobj->uapi_object->type_class->destroy_hw(uobj, reason,
attrs);
if (ret) {
if (ib_is_destroy_retryable(ret, reason, uobj))
return ret;
@@ -196,9 +198,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
* version requires the caller to have already obtained an
* LOOKUP_DESTROY uobject kref.
*/
int uobj_destroy(struct ib_uobject *uobj)
int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_file *ufile = uobj->ufile;
struct ib_uverbs_file *ufile = attrs->ufile;
int ret;
down_read(&ufile->hw_destroy_rwsem);
@@ -207,7 +209,7 @@ int uobj_destroy(struct ib_uobject *uobj)
if (ret)
goto out_unlock;
ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY);
ret = uverbs_destroy_uobject(uobj, RDMA_REMOVE_DESTROY, attrs);
if (ret) {
atomic_set(&uobj->usecnt, 0);
goto out_unlock;
@@ -224,18 +226,17 @@ out_unlock:
* uverbs_put_destroy.
*/
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
u32 id,
const struct uverbs_attr_bundle *attrs)
u32 id, struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
int ret;
uobj = rdma_lookup_get_uobject(obj, attrs->ufile, id,
UVERBS_LOOKUP_DESTROY);
UVERBS_LOOKUP_DESTROY, attrs);
if (IS_ERR(uobj))
return uobj;
ret = uobj_destroy(uobj);
ret = uobj_destroy(uobj, attrs);
if (ret) {
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
return ERR_PTR(ret);
@@ -249,7 +250,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
* (negative errno on failure). For use by callers that do not need the uobj.
*/
int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
const struct uverbs_attr_bundle *attrs)
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
@@ -296,25 +297,13 @@ static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
static int idr_add_uobj(struct ib_uobject *uobj)
{
int ret;
idr_preload(GFP_KERNEL);
spin_lock(&uobj->ufile->idr_lock);
/*
* We start with allocating an idr pointing to NULL. This represents an
* object which isn't initialized yet. We'll replace it later on with
* the real object once we commit.
*/
ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
if (ret >= 0)
uobj->id = ret;
spin_unlock(&uobj->ufile->idr_lock);
idr_preload_end();
return ret < 0 ? ret : 0;
/*
* We start with allocating an idr pointing to NULL. This represents an
* object which isn't initialized yet. We'll replace it later on with
* the real object once we commit.
*/
return xa_alloc(&uobj->ufile->idr, &uobj->id, NULL, xa_limit_32b,
GFP_KERNEL);
}
/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
@@ -324,29 +313,20 @@ lookup_get_idr_uobject(const struct uverbs_api_object *obj,
enum rdma_lookup_mode mode)
{
struct ib_uobject *uobj;
unsigned long idrno = id;
if (id < 0 || id > ULONG_MAX)
return ERR_PTR(-EINVAL);
rcu_read_lock();
/* object won't be released as we're protected in rcu */
uobj = idr_find(&ufile->idr, idrno);
if (!uobj) {
uobj = ERR_PTR(-ENOENT);
goto free;
}
/*
* The idr_find is guaranteed to return a pointer to something that
* isn't freed yet, or NULL, as the free after idr_remove goes through
* kfree_rcu(). However the object may still have been released and
* kfree() could be called at any time.
*/
if (!kref_get_unless_zero(&uobj->ref))
uobj = xa_load(&ufile->idr, id);
if (!uobj || !kref_get_unless_zero(&uobj->ref))
uobj = ERR_PTR(-ENOENT);
free:
rcu_read_unlock();
return uobj;
}
@@ -393,12 +373,13 @@ lookup_get_fd_uobject(const struct uverbs_api_object *obj,
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
struct ib_uverbs_file *ufile, s64 id,
enum rdma_lookup_mode mode)
enum rdma_lookup_mode mode,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
int ret;
if (IS_ERR(obj) && PTR_ERR(obj) == -ENOMSG) {
if (obj == ERR_PTR(-ENOMSG)) {
/* must be UVERBS_IDR_ANY_OBJECT, see uapi_get_object() */
uobj = lookup_get_idr_uobject(NULL, ufile, id, mode);
if (IS_ERR(uobj))
@@ -431,6 +412,8 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_api_object *obj,
ret = uverbs_try_lock_object(uobj, mode);
if (ret)
goto free;
if (attrs)
attrs->context = uobj->context;
return uobj;
free:
@@ -438,38 +421,6 @@ free:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
}
struct ib_uobject *_uobj_get_read(enum uverbs_default_objects type,
u32 object_id,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile,
object_id, UVERBS_LOOKUP_READ);
if (IS_ERR(uobj))
return uobj;
attrs->context = uobj->context;
return uobj;
}
struct ib_uobject *_uobj_get_write(enum uverbs_default_objects type,
u32 object_id,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *uobj;
uobj = rdma_lookup_get_uobject(uobj_get_type(attrs, type), attrs->ufile,
object_id, UVERBS_LOOKUP_WRITE);
if (IS_ERR(uobj))
return uobj;
attrs->context = uobj->context;
return uobj;
}
static struct ib_uobject *
alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
@@ -489,14 +440,12 @@ alloc_begin_idr_uobject(const struct uverbs_api_object *obj,
ret = ib_rdmacg_try_charge(&uobj->cg_obj, uobj->context->device,
RDMACG_RESOURCE_HCA_OBJECT);
if (ret)
goto idr_remove;
goto remove;
return uobj;
idr_remove:
spin_lock(&ufile->idr_lock);
idr_remove(&ufile->idr, uobj->id);
spin_unlock(&ufile->idr_lock);
remove:
xa_erase(&ufile->idr, uobj->id);
uobj_put:
uverbs_uobject_put(uobj);
return ERR_PTR(ret);
@@ -526,7 +475,8 @@ alloc_begin_fd_uobject(const struct uverbs_api_object *obj,
}
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
struct ib_uverbs_file *ufile)
struct ib_uverbs_file *ufile,
struct uverbs_attr_bundle *attrs)
{
struct ib_uobject *ret;
@@ -546,6 +496,8 @@ struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_api_object *obj,
up_read(&ufile->hw_destroy_rwsem);
return ret;
}
if (attrs)
attrs->context = ret->context;
return ret;
}
@@ -554,18 +506,17 @@ static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
RDMACG_RESOURCE_HCA_OBJECT);
spin_lock(&uobj->ufile->idr_lock);
idr_remove(&uobj->ufile->idr, uobj->id);
spin_unlock(&uobj->ufile->idr_lock);
xa_erase(&uobj->ufile->idr, uobj->id);
}
static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
{
const struct uverbs_obj_idr_type *idr_type =
container_of(uobj->uapi_object->type_attrs,
struct uverbs_obj_idr_type, type);
int ret = idr_type->destroy_object(uobj, why);
int ret = idr_type->destroy_object(uobj, why, attrs);
/*
* We can only fail gracefully if the user requested to destroy the
@@ -586,9 +537,7 @@ static int __must_check destroy_hw_idr_uobject(struct ib_uobject *uobj,
static void remove_handle_idr_uobject(struct ib_uobject *uobj)
{
spin_lock(&uobj->ufile->idr_lock);
idr_remove(&uobj->ufile->idr, uobj->id);
spin_unlock(&uobj->ufile->idr_lock);
xa_erase(&uobj->ufile->idr, uobj->id);
/* Matches the kref in alloc_commit_idr_uobject */
uverbs_uobject_put(uobj);
}
@@ -599,7 +548,8 @@ static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
}
static int __must_check destroy_hw_fd_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
enum rdma_remove_reason why,
struct uverbs_attr_bundle *attrs)
{
const struct uverbs_obj_fd_type *fd_type = container_of(
uobj->uapi_object->type_attrs, struct uverbs_obj_fd_type, type);
@@ -618,17 +568,17 @@ static void remove_handle_fd_uobject(struct ib_uobject *uobj)
static int alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
struct ib_uverbs_file *ufile = uobj->ufile;
void *old;
spin_lock(&ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
* this shouldn't fail.
*
* NOTE: Once we set the IDR we loose ownership of our kref on uobj.
* NOTE: Storing the uobj transfers our kref on uobj to the XArray.
* It will be put by remove_commit_idr_uobject()
*/
WARN_ON(idr_replace(&ufile->idr, uobj, uobj->id));
spin_unlock(&ufile->idr_lock);
old = xa_store(&ufile->idr, uobj->id, uobj, GFP_KERNEL);
WARN_ON(old != NULL);
return 0;
}
@@ -675,15 +625,16 @@ static int alloc_commit_fd_uobject(struct ib_uobject *uobj)
* caller can no longer assume uobj is valid. If this function fails it
* destroys the uboject, including the attached HW object.
*/
int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj,
struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_file *ufile = uobj->ufile;
struct ib_uverbs_file *ufile = attrs->ufile;
int ret;
/* alloc_commit consumes the uobj kref */
ret = uobj->uapi_object->type_class->alloc_commit(uobj);
if (ret) {
uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
up_read(&ufile->hw_destroy_rwsem);
return ret;
}
@@ -707,12 +658,13 @@ int __must_check rdma_alloc_commit_uobject(struct ib_uobject *uobj)
* This consumes the kref for uobj. It is up to the caller to unwind the HW
* object and anything else connected to uobj before calling this.
*/
void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
void rdma_alloc_abort_uobject(struct ib_uobject *uobj,
struct uverbs_attr_bundle *attrs)
{
struct ib_uverbs_file *ufile = uobj->ufile;
uobj->object = NULL;
uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT);
uverbs_destroy_uobject(uobj, RDMA_REMOVE_ABORT, attrs);
/* Matches the down_read in rdma_alloc_begin_uobject */
up_read(&ufile->hw_destroy_rwsem);
@@ -760,29 +712,28 @@ void rdma_lookup_put_uobject(struct ib_uobject *uobj,
void setup_ufile_idr_uobject(struct ib_uverbs_file *ufile)
{
spin_lock_init(&ufile->idr_lock);
idr_init(&ufile->idr);
xa_init_flags(&ufile->idr, XA_FLAGS_ALLOC);
}
void release_ufile_idr_uobject(struct ib_uverbs_file *ufile)
{
struct ib_uobject *entry;
int id;
unsigned long id;
/*
* At this point uverbs_cleanup_ufile() is guaranteed to have run, and
* there are no HW objects left, however the IDR is still populated
* there are no HW objects left, however the xarray is still populated
* with anything that has not been cleaned up by userspace. Since the
* kref on ufile is 0, nothing is allowed to call lookup_get.
*
* This is an optimized equivalent to remove_handle_idr_uobject
*/
idr_for_each_entry(&ufile->idr, entry, id) {
xa_for_each(&ufile->idr, id, entry) {
WARN_ON(entry->object);
uverbs_uobject_put(entry);
}
idr_destroy(&ufile->idr);
xa_destroy(&ufile->idr);
}
const struct uverbs_obj_type_class uverbs_idr_class = {
@@ -814,6 +765,10 @@ void uverbs_close_fd(struct file *f)
{
struct ib_uobject *uobj = f->private_data;
struct ib_uverbs_file *ufile = uobj->ufile;
struct uverbs_attr_bundle attrs = {
.context = uobj->context,
.ufile = ufile,
};
if (down_read_trylock(&ufile->hw_destroy_rwsem)) {
/*
@@ -823,7 +778,7 @@ void uverbs_close_fd(struct file *f)
* write lock here, or we have a kernel bug.
*/
WARN_ON(uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE));
uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE);
uverbs_destroy_uobject(uobj, RDMA_REMOVE_CLOSE, &attrs);
up_read(&ufile->hw_destroy_rwsem);
}
@@ -872,6 +827,7 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
{
struct ib_uobject *obj, *next_obj;
int ret = -EINVAL;
struct uverbs_attr_bundle attrs = { .ufile = ufile };
/*
* This shouldn't run while executing other commands on this
@@ -883,12 +839,13 @@ static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
* other threads (which might still use the FDs) chance to run.
*/
list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
attrs.context = obj->context;
/*
* if we hit this WARN_ON, that means we are
* racing with a lookup_get.
*/
WARN_ON(uverbs_try_lock_object(obj, UVERBS_LOOKUP_WRITE));
if (!uverbs_destroy_uobject(obj, reason))
if (!uverbs_destroy_uobject(obj, reason, &attrs))
ret = 0;
else
atomic_set(&obj->usecnt, 0);
@@ -967,26 +924,25 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
EXPORT_SYMBOL(uverbs_fd_class);
struct ib_uobject *
uverbs_get_uobject_from_file(u16 object_id,
struct ib_uverbs_file *ufile,
enum uverbs_obj_access access, s64 id)
uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
s64 id, struct uverbs_attr_bundle *attrs)
{
const struct uverbs_api_object *obj =
uapi_get_object(ufile->device->uapi, object_id);
uapi_get_object(attrs->ufile->device->uapi, object_id);
switch (access) {
case UVERBS_ACCESS_READ:
return rdma_lookup_get_uobject(obj, ufile, id,
UVERBS_LOOKUP_READ);
return rdma_lookup_get_uobject(obj, attrs->ufile, id,
UVERBS_LOOKUP_READ, attrs);
case UVERBS_ACCESS_DESTROY:
/* Actual destruction is done inside uverbs_handle_method */
return rdma_lookup_get_uobject(obj, ufile, id,
UVERBS_LOOKUP_DESTROY);
return rdma_lookup_get_uobject(obj, attrs->ufile, id,
UVERBS_LOOKUP_DESTROY, attrs);
case UVERBS_ACCESS_WRITE:
return rdma_lookup_get_uobject(obj, ufile, id,
UVERBS_LOOKUP_WRITE);
return rdma_lookup_get_uobject(obj, attrs->ufile, id,
UVERBS_LOOKUP_WRITE, attrs);
case UVERBS_ACCESS_NEW:
return rdma_alloc_begin_uobject(obj, ufile);
return rdma_alloc_begin_uobject(obj, attrs->ufile, attrs);
default:
WARN_ON(true);
return ERR_PTR(-EOPNOTSUPP);
@@ -994,8 +950,8 @@ uverbs_get_uobject_from_file(u16 object_id,
}
int uverbs_finalize_object(struct ib_uobject *uobj,
enum uverbs_obj_access access,
bool commit)
enum uverbs_obj_access access, bool commit,
struct uverbs_attr_bundle *attrs)
{
int ret = 0;
@@ -1018,9 +974,9 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
break;
case UVERBS_ACCESS_NEW:
if (commit)
ret = rdma_alloc_commit_uobject(uobj);
ret = rdma_alloc_commit_uobject(uobj, attrs);
else
rdma_alloc_abort_uobject(uobj);
rdma_alloc_abort_uobject(uobj, attrs);
break;
default:
WARN_ON(true);

View File

@@ -48,7 +48,7 @@ struct ib_uverbs_device;
void uverbs_destroy_ufile_hw(struct ib_uverbs_file *ufile,
enum rdma_remove_reason reason);
int uobj_destroy(struct ib_uobject *uobj);
int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs);
/*
* uverbs_uobject_get is called in order to increase the reference count on
@@ -83,9 +83,8 @@ void uverbs_close_fd(struct file *f);
* uverbs_finalize_objects are called.
*/
struct ib_uobject *
uverbs_get_uobject_from_file(u16 object_id,
struct ib_uverbs_file *ufile,
enum uverbs_obj_access access, s64 id);
uverbs_get_uobject_from_file(u16 object_id, enum uverbs_obj_access access,
s64 id, struct uverbs_attr_bundle *attrs);
/*
* Note that certain finalize stages could return a status:
@@ -103,8 +102,8 @@ uverbs_get_uobject_from_file(u16 object_id,
* object.
*/
int uverbs_finalize_object(struct ib_uobject *uobj,
enum uverbs_obj_access access,
bool commit);
enum uverbs_obj_access access, bool commit,
struct uverbs_attr_bundle *attrs);
int uverbs_output_written(const struct uverbs_attr_bundle *bundle, size_t idx);

View File

@@ -40,7 +40,7 @@
#include <linux/slab.h>
#include <linux/dma-mapping.h>
#include <linux/kref.h>
#include <linux/idr.h>
#include <linux/xarray.h>
#include <linux/workqueue.h>
#include <uapi/linux/if_ether.h>
#include <rdma/ib_pack.h>
@@ -183,8 +183,7 @@ static struct ib_client sa_client = {
.remove = ib_sa_remove_one
};
static DEFINE_SPINLOCK(idr_lock);
static DEFINE_IDR(query_idr);
static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
static DEFINE_SPINLOCK(tid_lock);
static u32 tid;
@@ -1180,14 +1179,14 @@ void ib_sa_cancel_query(int id, struct ib_sa_query *query)
struct ib_mad_agent *agent;
struct ib_mad_send_buf *mad_buf;
spin_lock_irqsave(&idr_lock, flags);
if (idr_find(&query_idr, id) != query) {
spin_unlock_irqrestore(&idr_lock, flags);
xa_lock_irqsave(&queries, flags);
if (xa_load(&queries, id) != query) {
xa_unlock_irqrestore(&queries, flags);
return;
}
agent = query->port->agent;
mad_buf = query->mad_buf;
spin_unlock_irqrestore(&idr_lock, flags);
xa_unlock_irqrestore(&queries, flags);
/*
* If the query is still on the netlink request list, schedule
@@ -1363,21 +1362,14 @@ static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent)
static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
gfp_t gfp_mask)
{
bool preload = gfpflags_allow_blocking(gfp_mask);
unsigned long flags;
int ret, id;
if (preload)
idr_preload(gfp_mask);
spin_lock_irqsave(&idr_lock, flags);
id = idr_alloc(&query_idr, query, 0, 0, GFP_NOWAIT);
spin_unlock_irqrestore(&idr_lock, flags);
if (preload)
idr_preload_end();
if (id < 0)
return id;
xa_lock_irqsave(&queries, flags);
ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask);
xa_unlock_irqrestore(&queries, flags);
if (ret < 0)
return ret;
query->mad_buf->timeout_ms = timeout_ms;
query->mad_buf->context[0] = query;
@@ -1394,9 +1386,9 @@ static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms,
ret = ib_post_send_mad(query->mad_buf, NULL);
if (ret) {
spin_lock_irqsave(&idr_lock, flags);
idr_remove(&query_idr, id);
spin_unlock_irqrestore(&idr_lock, flags);
xa_lock_irqsave(&queries, flags);
__xa_erase(&queries, id);
xa_unlock_irqrestore(&queries, flags);
}
/*
@@ -2188,9 +2180,9 @@ static void send_handler(struct ib_mad_agent *agent,
break;
}
spin_lock_irqsave(&idr_lock, flags);
idr_remove(&query_idr, query->id);
spin_unlock_irqrestore(&idr_lock, flags);
xa_lock_irqsave(&queries, flags);
__xa_erase(&queries, query->id);
xa_unlock_irqrestore(&queries, flags);
free_mad(query);
if (query->client)
@@ -2475,5 +2467,5 @@ void ib_sa_cleanup(void)
destroy_workqueue(ib_nl_wq);
mcast_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
WARN_ON(!xa_empty(&queries));
}

View File

@@ -349,10 +349,15 @@ static struct attribute *port_default_attrs[] = {
static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
if (!gid_attr->ndev)
return -EINVAL;
struct net_device *ndev;
size_t ret = -EINVAL;
return sprintf(buf, "%s\n", gid_attr->ndev->name);
rcu_read_lock();
ndev = rcu_dereference(gid_attr->ndev);
if (ndev)
ret = sprintf(buf, "%s\n", ndev->name);
rcu_read_unlock();
return ret;
}
static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
@@ -1015,8 +1020,10 @@ err_free_stats:
return;
}
static int add_port(struct ib_device *device, int port_num)
static int add_port(struct ib_core_device *coredev, int port_num)
{
struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
bool is_full_dev = &device->coredev == coredev;
struct ib_port *p;
struct ib_port_attr attr;
int i;
@@ -1034,7 +1041,7 @@ static int add_port(struct ib_device *device, int port_num)
p->port_num = port_num;
ret = kobject_init_and_add(&p->kobj, &port_type,
device->ports_kobj,
coredev->ports_kobj,
"%d", port_num);
if (ret) {
kfree(p);
@@ -1055,7 +1062,7 @@ static int add_port(struct ib_device *device, int port_num)
goto err_put;
}
if (device->ops.process_mad) {
if (device->ops.process_mad && is_full_dev) {
p->pma_table = get_counter_table(device, port_num);
ret = sysfs_create_group(&p->kobj, p->pma_table);
if (ret)
@@ -1111,7 +1118,7 @@ static int add_port(struct ib_device *device, int port_num)
if (ret)
goto err_free_pkey;
if (device->ops.init_port) {
if (device->ops.init_port && is_full_dev) {
ret = device->ops.init_port(device, port_num, &p->kobj);
if (ret)
goto err_remove_pkey;
@@ -1122,10 +1129,10 @@ static int add_port(struct ib_device *device, int port_num)
* port, so holder should be device. Therefore skip per port conunter
* initialization.
*/
if (device->ops.alloc_hw_stats && port_num)
if (device->ops.alloc_hw_stats && port_num && is_full_dev)
setup_hw_stats(device, p, port_num);
list_add_tail(&p->kobj.entry, &device->port_list);
list_add_tail(&p->kobj.entry, &coredev->port_list);
kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
@@ -1194,6 +1201,7 @@ static ssize_t node_type_show(struct device *device,
case RDMA_NODE_RNIC: return sprintf(buf, "%d: RNIC\n", dev->node_type);
case RDMA_NODE_USNIC: return sprintf(buf, "%d: usNIC\n", dev->node_type);
case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
case RDMA_NODE_UNSPECIFIED: return sprintf(buf, "%d: unspecified\n", dev->node_type);
case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
default: return sprintf(buf, "%d: <unknown>\n", dev->node_type);
@@ -1279,11 +1287,11 @@ const struct attribute_group ib_dev_attr_group = {
.attrs = ib_dev_attrs,
};
static void ib_free_port_attrs(struct ib_device *device)
void ib_free_port_attrs(struct ib_core_device *coredev)
{
struct kobject *p, *t;
list_for_each_entry_safe(p, t, &device->port_list, entry) {
list_for_each_entry_safe(p, t, &coredev->port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
list_del(&p->entry);
@@ -1303,20 +1311,22 @@ static void ib_free_port_attrs(struct ib_device *device)
kobject_put(p);
}
kobject_put(device->ports_kobj);
kobject_put(coredev->ports_kobj);
}
static int ib_setup_port_attrs(struct ib_device *device)
int ib_setup_port_attrs(struct ib_core_device *coredev)
{
struct ib_device *device = rdma_device_to_ibdev(&coredev->dev);
unsigned int port;
int ret;
device->ports_kobj = kobject_create_and_add("ports", &device->dev.kobj);
if (!device->ports_kobj)
coredev->ports_kobj = kobject_create_and_add("ports",
&coredev->dev.kobj);
if (!coredev->ports_kobj)
return -ENOMEM;
rdma_for_each_port (device, port) {
ret = add_port(device, port);
ret = add_port(coredev, port);
if (ret)
goto err_put;
}
@@ -1324,7 +1334,7 @@ static int ib_setup_port_attrs(struct ib_device *device)
return 0;
err_put:
ib_free_port_attrs(device);
ib_free_port_attrs(coredev);
return ret;
}
@@ -1332,7 +1342,7 @@ int ib_device_register_sysfs(struct ib_device *device)
{
int ret;
ret = ib_setup_port_attrs(device);
ret = ib_setup_port_attrs(&device->coredev);
if (ret)
return ret;
@@ -1348,5 +1358,48 @@ void ib_device_unregister_sysfs(struct ib_device *device)
free_hsag(&device->dev.kobj, device->hw_stats_ag);
kfree(device->hw_stats);
ib_free_port_attrs(device);
ib_free_port_attrs(&device->coredev);
}
/**
* ib_port_register_module_stat - add module counters under relevant port
* of IB device.
*
* @device: IB device to add counters
* @port_num: valid port number
* @kobj: pointer to the kobject to initialize
* @ktype: pointer to the ktype for this kobject.
* @name: the name of the kobject
*/
int ib_port_register_module_stat(struct ib_device *device, u8 port_num,
struct kobject *kobj, struct kobj_type *ktype,
const char *name)
{
struct kobject *p, *t;
int ret;
list_for_each_entry_safe(p, t, &device->coredev.port_list, entry) {
struct ib_port *port = container_of(p, struct ib_port, kobj);
if (port->port_num != port_num)
continue;
ret = kobject_init_and_add(kobj, ktype, &port->kobj, "%s",
name);
if (ret)
return ret;
}
return 0;
}
EXPORT_SYMBOL(ib_port_register_module_stat);
/**
* ib_port_unregister_module_stat - release module counters
* @kobj: pointer to the kobject to release
*/
void ib_port_unregister_module_stat(struct kobject *kobj)
{
kobject_put(kobj);
}
EXPORT_SYMBOL(ib_port_unregister_module_stat);

View File

@@ -42,7 +42,7 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/cdev.h>
#include <linux/idr.h>
#include <linux/xarray.h>
#include <linux/mutex.h>
#include <linux/slab.h>
@@ -125,23 +125,22 @@ static struct ib_client ucm_client = {
.remove = ib_ucm_remove_one
};
static DEFINE_MUTEX(ctx_id_mutex);
static DEFINE_IDR(ctx_id_table);
static DEFINE_XARRAY_ALLOC(ctx_id_table);
static DECLARE_BITMAP(dev_map, IB_UCM_MAX_DEVICES);
static struct ib_ucm_context *ib_ucm_ctx_get(struct ib_ucm_file *file, int id)
{
struct ib_ucm_context *ctx;
mutex_lock(&ctx_id_mutex);
ctx = idr_find(&ctx_id_table, id);
xa_lock(&ctx_id_table);
ctx = xa_load(&ctx_id_table, id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
else if (ctx->file != file)
ctx = ERR_PTR(-EINVAL);
else
atomic_inc(&ctx->ref);
mutex_unlock(&ctx_id_mutex);
xa_unlock(&ctx_id_table);
return ctx;
}
@@ -194,10 +193,7 @@ static struct ib_ucm_context *ib_ucm_ctx_alloc(struct ib_ucm_file *file)
ctx->file = file;
INIT_LIST_HEAD(&ctx->events);
mutex_lock(&ctx_id_mutex);
ctx->id = idr_alloc(&ctx_id_table, ctx, 0, 0, GFP_KERNEL);
mutex_unlock(&ctx_id_mutex);
if (ctx->id < 0)
if (xa_alloc(&ctx_id_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL))
goto error;
list_add_tail(&ctx->file_list, &file->ctxs);
@@ -514,9 +510,7 @@ static ssize_t ib_ucm_create_id(struct ib_ucm_file *file,
err2:
ib_destroy_cm_id(ctx->cm_id);
err1:
mutex_lock(&ctx_id_mutex);
idr_remove(&ctx_id_table, ctx->id);
mutex_unlock(&ctx_id_mutex);
xa_erase(&ctx_id_table, ctx->id);
kfree(ctx);
return result;
}
@@ -536,15 +530,15 @@ static ssize_t ib_ucm_destroy_id(struct ib_ucm_file *file,
if (copy_from_user(&cmd, inbuf, sizeof(cmd)))
return -EFAULT;
mutex_lock(&ctx_id_mutex);
ctx = idr_find(&ctx_id_table, cmd.id);
xa_lock(&ctx_id_table);
ctx = xa_load(&ctx_id_table, cmd.id);
if (!ctx)
ctx = ERR_PTR(-ENOENT);
else if (ctx->file != file)
ctx = ERR_PTR(-EINVAL);
else
idr_remove(&ctx_id_table, ctx->id);
mutex_unlock(&ctx_id_mutex);
__xa_erase(&ctx_id_table, ctx->id);
xa_unlock(&ctx_id_table);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
@@ -1189,10 +1183,7 @@ static int ib_ucm_close(struct inode *inode, struct file *filp)
struct ib_ucm_context, file_list);
mutex_unlock(&file->file_mutex);
mutex_lock(&ctx_id_mutex);
idr_remove(&ctx_id_table, ctx->id);
mutex_unlock(&ctx_id_mutex);
xa_erase(&ctx_id_table, ctx->id);
ib_destroy_cm_id(ctx->cm_id);
ib_ucm_cleanup_events(ctx);
kfree(ctx);
@@ -1352,7 +1343,7 @@ static void __exit ib_ucm_cleanup(void)
class_remove_file(&cm_class, &class_attr_abi_version.attr);
unregister_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_NUM_FIXED_MINOR);
unregister_chrdev_region(dynamic_ucm_dev, IB_UCM_NUM_DYNAMIC_MINOR);
idr_destroy(&ctx_id_table);
WARN_ON(!xa_empty(&ctx_id_table));
}
module_init(ib_ucm_init);

Some files were not shown because too many files have changed in this diff Show More