Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma updates from Jason Gunthorpe:
 "This has been a quiet cycle for RDMA, the big bulk is the usual
  smallish driver updates and bug fixes. About four new uAPI related
  things. Not as much Szykaller patches this time, the bugs it finds are
  getting harder to fix.

  Summary:

   - More work cleaning up the RDMA CM code

   - Usual driver bug fixes and cleanups for qedr, qib, hfi1, hns,
     i40iw, iw_cxgb4, mlx5, rxe

   - Driver specific resource tracking and reporting via netlink

   - Continued work for name space support from Parav

   - MPLS support for the verbs flow steering uAPI

   - A few tricky IPoIB fixes improving robustness

   - HFI1 driver support for the '16B' management packet format

   - Some auditing to not print kernel pointers via %llx or similar

   - Mark the entire 'UCM' user-space interface as BROKEN with the
     intent to remove it entirely. The user space side of this was long
     ago replaced with RDMA-CM and syzkaller is finding bugs in the
     residual UCM interface nobody wishes to fix because nobody uses it.

   - Purge more bogus BUG_ON's from Leon

   - 'flow counters' verbs uAPI

   - T10 fixups for iser/isert, these are Acked by Martin but going
     through the RDMA tree due to dependencies"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (138 commits)
  RDMA/mlx5: Update SPDX tags to show proper license
  RDMA/restrack: Change SPDX tag to properly reflect license
  IB/hfi1: Fix comment on default hdr entry size
  IB/hfi1: Rename exp_lock to exp_mutex
  IB/hfi1: Add bypass register defines and replace blind constants
  IB/hfi1: Remove unused variable
  IB/hfi1: Ensure VL index is within bounds
  IB/hfi1: Fix user context tail allocation for DMA_RTAIL
  IB/hns: Use zeroing memory allocator instead of allocator/memset
  infiniband: fix a possible use-after-free bug
  iw_cxgb4: add INFINIBAND_ADDR_TRANS dependency
  IB/isert: use T10-PI check mask definitions from core layer
  IB/iser: use T10-PI check mask definitions from core layer
  RDMA/core: introduce check masks for T10-PI offload
  IB/isert: fix T10-pi check mask setting
  IB/mlx5: Add counters read support
  IB/mlx5: Add flow counters read support
  IB/mlx5: Add flow counters binding support
  IB/mlx5: Add counters create and destroy support
  IB/uverbs: Add support for flow counters
  ...
This commit is contained in:
Linus Torvalds
2018-06-07 13:04:07 -07:00
144 changed files with 4480 additions and 1792 deletions
+11
View File
@@ -35,6 +35,17 @@ config INFINIBAND_USER_ACCESS
libibverbs, libibcm and a hardware driver library from
rdma-core <https://github.com/linux-rdma/rdma-core>.
config INFINIBAND_USER_ACCESS_UCM
bool "Userspace CM (UCM, DEPRECATED)"
depends on BROKEN
depends on INFINIBAND_USER_ACCESS
help
The UCM module has known security flaws, which no one is
interested to fix. The user-space part of this code was
dropped from the upstream a long time ago.
This option is DEPRECATED and planned to be removed.
config INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI
bool "Allow experimental legacy verbs in new ioctl uAPI (EXPERIMENTAL)"
depends on INFINIBAND_USER_ACCESS
+5 -4
View File
@@ -5,15 +5,16 @@ user_access-$(CONFIG_INFINIBAND_ADDR_TRANS) := rdma_ucm.o
obj-$(CONFIG_INFINIBAND) += ib_core.o ib_cm.o iw_cm.o \
$(infiniband-y)
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o $(user_access-y)
obj-$(CONFIG_INFINIBAND_USER_ACCESS_UCM) += ib_ucm.o $(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
multicast.o mad.o smi.o agent.o mad_rmpp.o \
security.o nldev.o restrack.o
nldev.o restrack.o
ib_core-$(CONFIG_SECURITY_INFINIBAND) += security.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o
ib_core-$(CONFIG_CGROUP_RDMA) += cgroup.o
@@ -36,4 +37,4 @@ ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_marshall.o \
rdma_core.o uverbs_std_types.o uverbs_ioctl.o \
uverbs_ioctl_merge.o uverbs_std_types_cq.o \
uverbs_std_types_flow_action.o uverbs_std_types_dm.o \
uverbs_std_types_mr.o
uverbs_std_types_mr.o uverbs_std_types_counters.o
+58 -112
View File
@@ -56,7 +56,6 @@ struct addr_req {
struct sockaddr_storage src_addr;
struct sockaddr_storage dst_addr;
struct rdma_dev_addr *addr;
struct rdma_addr_client *client;
void *context;
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context);
@@ -68,11 +67,8 @@ struct addr_req {
static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
static void process_req(struct work_struct *work);
static DEFINE_MUTEX(lock);
static DEFINE_SPINLOCK(lock);
static LIST_HEAD(req_list);
static DECLARE_DELAYED_WORK(work, process_req);
static struct workqueue_struct *addr_wq;
static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
@@ -112,7 +108,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
memcpy(&gid, nla_data(curr), nla_len(curr));
}
mutex_lock(&lock);
spin_lock_bh(&lock);
list_for_each_entry(req, &req_list, list) {
if (nlh->nlmsg_seq != req->seq)
continue;
@@ -122,7 +118,7 @@ static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
found = 1;
break;
}
mutex_unlock(&lock);
spin_unlock_bh(&lock);
if (!found)
pr_info("Couldn't find request waiting for DGID: %pI6\n",
@@ -223,28 +219,6 @@ int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr)
}
EXPORT_SYMBOL(rdma_addr_size_kss);
static struct rdma_addr_client self;
void rdma_addr_register_client(struct rdma_addr_client *client)
{
atomic_set(&client->refcount, 1);
init_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_register_client);
static inline void put_client(struct rdma_addr_client *client)
{
if (atomic_dec_and_test(&client->refcount))
complete(&client->comp);
}
void rdma_addr_unregister_client(struct rdma_addr_client *client)
{
put_client(client);
wait_for_completion(&client->comp);
}
EXPORT_SYMBOL(rdma_addr_unregister_client);
void rdma_copy_addr(struct rdma_dev_addr *dev_addr,
const struct net_device *dev,
const unsigned char *dst_dev_addr)
@@ -302,7 +276,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
}
EXPORT_SYMBOL(rdma_translate_ip);
static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
static void set_timeout(struct addr_req *req, unsigned long time)
{
unsigned long delay;
@@ -310,23 +284,15 @@ static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
if ((long)delay < 0)
delay = 0;
mod_delayed_work(addr_wq, delayed_work, delay);
mod_delayed_work(addr_wq, &req->work, delay);
}
static void queue_req(struct addr_req *req)
{
struct addr_req *temp_req;
mutex_lock(&lock);
list_for_each_entry_reverse(temp_req, &req_list, list) {
if (time_after_eq(req->timeout, temp_req->timeout))
break;
}
list_add(&req->list, &temp_req->list);
set_timeout(&req->work, req->timeout);
mutex_unlock(&lock);
spin_lock_bh(&lock);
list_add_tail(&req->list, &req_list);
set_timeout(req, req->timeout);
spin_unlock_bh(&lock);
}
static int ib_nl_fetch_ha(const struct dst_entry *dst,
@@ -584,7 +550,6 @@ static void process_one_req(struct work_struct *_work)
struct addr_req *req;
struct sockaddr *src_in, *dst_in;
mutex_lock(&lock);
req = container_of(_work, struct addr_req, work.work);
if (req->status == -ENODATA) {
@@ -596,72 +561,33 @@ static void process_one_req(struct work_struct *_work)
req->status = -ETIMEDOUT;
} else if (req->status == -ENODATA) {
/* requeue the work for retrying again */
set_timeout(&req->work, req->timeout);
mutex_unlock(&lock);
spin_lock_bh(&lock);
if (!list_empty(&req->list))
set_timeout(req, req->timeout);
spin_unlock_bh(&lock);
return;
}
}
list_del(&req->list);
mutex_unlock(&lock);
/*
* Although the work will normally have been canceled by the
* workqueue, it can still be requeued as long as it is on the
* req_list, so it could have been requeued before we grabbed &lock.
* We need to cancel it after it is removed from req_list to really be
* sure it is safe to free.
*/
cancel_delayed_work(&req->work);
req->callback(req->status, (struct sockaddr *)&req->src_addr,
req->addr, req->context);
put_client(req->client);
kfree(req);
}
req->callback = NULL;
static void process_req(struct work_struct *work)
{
struct addr_req *req, *temp_req;
struct sockaddr *src_in, *dst_in;
struct list_head done_list;
INIT_LIST_HEAD(&done_list);
mutex_lock(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr,
true, req->seq);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA) {
set_timeout(&req->work, req->timeout);
continue;
}
}
list_move_tail(&req->list, &done_list);
}
mutex_unlock(&lock);
list_for_each_entry_safe(req, temp_req, &done_list, list) {
list_del(&req->list);
/* It is safe to cancel other work items from this work item
* because at a time there can be only one work item running
* with this single threaded work queue.
spin_lock_bh(&lock);
if (!list_empty(&req->list)) {
/*
* Although the work will normally have been canceled by the
* workqueue, it can still be requeued as long as it is on the
* req_list.
*/
cancel_delayed_work(&req->work);
req->callback(req->status, (struct sockaddr *) &req->src_addr,
req->addr, req->context);
put_client(req->client);
list_del_init(&req->list);
kfree(req);
}
spin_unlock_bh(&lock);
}
int rdma_resolve_ip(struct rdma_addr_client *client,
struct sockaddr *src_addr, struct sockaddr *dst_addr,
int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
struct rdma_dev_addr *addr, int timeout_ms,
void (*callback)(int status, struct sockaddr *src_addr,
struct rdma_dev_addr *addr, void *context),
@@ -693,8 +619,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->addr = addr;
req->callback = callback;
req->context = context;
req->client = client;
atomic_inc(&client->refcount);
INIT_DELAYED_WORK(&req->work, process_one_req);
req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
@@ -710,7 +634,6 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
break;
default:
ret = req->status;
atomic_dec(&client->refcount);
goto err;
}
return ret;
@@ -742,18 +665,36 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
struct addr_req *found = NULL;
mutex_lock(&lock);
spin_lock_bh(&lock);
list_for_each_entry_safe(req, temp_req, &req_list, list) {
if (req->addr == addr) {
req->status = -ECANCELED;
req->timeout = jiffies;
list_move(&req->list, &req_list);
set_timeout(&req->work, req->timeout);
/*
* Removing from the list means we take ownership of
* the req
*/
list_del_init(&req->list);
found = req;
break;
}
}
mutex_unlock(&lock);
spin_unlock_bh(&lock);
if (!found)
return;
/*
* sync canceling the work after removing it from the req_list
* guarentees no work is running and none will be started.
*/
cancel_delayed_work_sync(&found->work);
if (found->callback)
found->callback(-ECANCELED, (struct sockaddr *)&found->src_addr,
found->addr, found->context);
kfree(found);
}
EXPORT_SYMBOL(rdma_addr_cancel);
@@ -791,8 +732,8 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
dev_addr.net = &init_net;
init_completion(&ctx.comp);
ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
ret = rdma_resolve_ip(&sgid_addr._sockaddr, &dgid_addr._sockaddr,
&dev_addr, 1000, resolve_cb, &ctx);
if (ret)
return ret;
@@ -810,11 +751,17 @@ int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
static int netevent_callback(struct notifier_block *self, unsigned long event,
void *ctx)
{
struct addr_req *req;
if (event == NETEVENT_NEIGH_UPDATE) {
struct neighbour *neigh = ctx;
if (neigh->nud_state & NUD_VALID)
set_timeout(&work, jiffies);
if (neigh->nud_state & NUD_VALID) {
spin_lock_bh(&lock);
list_for_each_entry(req, &req_list, list)
set_timeout(req, jiffies);
spin_unlock_bh(&lock);
}
}
return 0;
}
@@ -830,14 +777,13 @@ int addr_init(void)
return -ENOMEM;
register_netevent_notifier(&nb);
rdma_addr_register_client(&self);
return 0;
}
void addr_cleanup(void)
{
rdma_addr_unregister_client(&self);
unregister_netevent_notifier(&nb);
destroy_workqueue(addr_wq);
WARN_ON(!list_empty(&req_list));
}
+55 -69
View File
@@ -125,6 +125,16 @@ const char *ib_cache_gid_type_str(enum ib_gid_type gid_type)
}
EXPORT_SYMBOL(ib_cache_gid_type_str);
/** rdma_is_zero_gid - Check if given GID is zero or not.
* @gid: GID to check
* Returns true if given GID is zero, returns false otherwise.
*/
bool rdma_is_zero_gid(const union ib_gid *gid)
{
return !memcmp(gid, &zgid, sizeof(*gid));
}
EXPORT_SYMBOL(rdma_is_zero_gid);
int ib_cache_gid_parse_type_str(const char *buf)
{
unsigned int i;
@@ -149,6 +159,11 @@ int ib_cache_gid_parse_type_str(const char *buf)
}
EXPORT_SYMBOL(ib_cache_gid_parse_type_str);
static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
{
return device->cache.ports[port - rdma_start_port(device)].gid;
}
static void del_roce_gid(struct ib_device *device, u8 port_num,
struct ib_gid_table *table, int ix)
{
@@ -231,7 +246,7 @@ static int add_modify_gid(struct ib_gid_table *table,
* So ignore such behavior for IB link layer and don't
* fail the call, but don't add such entry to GID cache.
*/
if (!memcmp(gid, &zgid, sizeof(*gid)))
if (rdma_is_zero_gid(gid))
return 0;
}
@@ -264,7 +279,7 @@ static void del_gid(struct ib_device *ib_dev, u8 port,
if (rdma_protocol_roce(ib_dev, port))
del_roce_gid(ib_dev, port, table, ix);
memcpy(&table->data_vec[ix].gid, &zgid, sizeof(zgid));
memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
table->data_vec[ix].context = NULL;
}
@@ -363,10 +378,10 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
* IB spec version 1.3 section 4.1.1 point (6) and
* section 12.7.10 and section 12.7.20
*/
if (!memcmp(gid, &zgid, sizeof(*gid)))
if (rdma_is_zero_gid(gid))
return -EINVAL;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
@@ -433,7 +448,7 @@ _ib_cache_gid_del(struct ib_device *ib_dev, u8 port,
int ret = 0;
int ix;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
@@ -472,7 +487,7 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
int ix;
bool deleted = false;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
table = rdma_gid_table(ib_dev, port);
mutex_lock(&table->lock);
@@ -496,7 +511,7 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
{
struct ib_gid_table *table;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
table = rdma_gid_table(ib_dev, port);
if (index < 0 || index >= table->sz)
return -EINVAL;
@@ -589,7 +604,7 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
if (!rdma_is_port_valid(ib_dev, port))
return -ENOENT;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
table = rdma_gid_table(ib_dev, port);
if (ndev)
mask |= GID_ATTR_FIND_MASK_NETDEV;
@@ -647,7 +662,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
!rdma_protocol_roce(ib_dev, port))
return -EPROTONOSUPPORT;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
table = rdma_gid_table(ib_dev, port);
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
@@ -724,8 +739,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
if (memcmp(&table->data_vec[i].gid, &zgid,
sizeof(table->data_vec[i].gid))) {
if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
del_gid(ib_dev, port, table, i);
deleted = true;
}
@@ -747,7 +761,7 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
unsigned int gid_type;
unsigned long mask;
table = ib_dev->cache.ports[port - rdma_start_port(ib_dev)].gid;
table = rdma_gid_table(ib_dev, port);
mask = GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_DEFAULT |
@@ -772,8 +786,8 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
}
}
static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table)
{
unsigned int i;
unsigned long roce_gid_type_mask;
@@ -783,8 +797,7 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
num_default_gids = hweight_long(roce_gid_type_mask);
for (i = 0; i < num_default_gids && i < table->sz; i++) {
struct ib_gid_table_entry *entry =
&table->data_vec[i];
struct ib_gid_table_entry *entry = &table->data_vec[i];
entry->props |= GID_TABLE_ENTRY_DEFAULT;
current_gid = find_next_bit(&roce_gid_type_mask,
@@ -792,48 +805,8 @@ static int gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
current_gid);
entry->attr.gid_type = current_gid++;
}
return 0;
}
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
struct ib_gid_table *table;
int err = 0;
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
table =
alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
if (!table) {
err = -ENOMEM;
goto rollback_table_setup;
}
err = gid_table_reserve_default(ib_dev,
port + rdma_start_port(ib_dev),
table);
if (err)
goto rollback_table_setup;
ib_dev->cache.ports[port].gid = table;
}
return 0;
rollback_table_setup:
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
table = ib_dev->cache.ports[port].gid;
cleanup_gid_table_port(ib_dev, port + rdma_start_port(ib_dev),
table);
release_gid_table(table);
}
return err;
}
static void gid_table_release_one(struct ib_device *ib_dev)
{
@@ -847,6 +820,29 @@ static void gid_table_release_one(struct ib_device *ib_dev)
}
}
static int _gid_table_setup_one(struct ib_device *ib_dev)
{
u8 port;
struct ib_gid_table *table;
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
u8 rdma_port = port + rdma_start_port(ib_dev);
table = alloc_gid_table(
ib_dev->port_immutable[rdma_port].gid_tbl_len);
if (!table)
goto rollback_table_setup;
gid_table_reserve_default(ib_dev, rdma_port, table);
ib_dev->cache.ports[port].gid = table;
}
return 0;
rollback_table_setup:
gid_table_release_one(ib_dev);
return -ENOMEM;
}
static void gid_table_cleanup_one(struct ib_device *ib_dev)
{
struct ib_gid_table *table;
@@ -886,7 +882,7 @@ int ib_get_cached_gid(struct ib_device *device,
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
table = device->cache.ports[port_num - rdma_start_port(device)].gid;
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
read_unlock_irqrestore(&table->rwlock, flags);
@@ -1104,7 +1100,7 @@ static int config_non_roce_gid_cache(struct ib_device *device,
gid_attr.device = device;
gid_attr.port_num = port;
table = device->cache.ports[port - rdma_start_port(device)].gid;
table = rdma_gid_table(device, port);
mutex_lock(&table->lock);
for (i = 0; i < gid_tbl_len; ++i) {
@@ -1137,7 +1133,7 @@ static void ib_cache_update(struct ib_device *device,
if (!rdma_is_port_valid(device, port))
return;
table = device->cache.ports[port - rdma_start_port(device)].gid;
table = rdma_gid_table(device, port);
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
@@ -1300,13 +1296,3 @@ void ib_cache_cleanup_one(struct ib_device *device)
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
}
void __init ib_cache_setup(void)
{
roce_gid_mgmt_init();
}
void __exit ib_cache_cleanup(void)
{
roce_gid_mgmt_cleanup();
}
+51 -11
View File
@@ -452,6 +452,32 @@ static void cm_set_private_data(struct cm_id_private *cm_id_priv,
cm_id_priv->private_data_len = private_data_len;
}
static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
struct rdma_ah_attr new_ah_attr;
int ret;
av->port = port;
av->pkey_index = wc->pkey_index;
/*
* av->ah_attr might be initialized based on past wc during incoming
* connect request or while sending out connect request. So initialize
* a new ah_attr on stack. If initialization fails, old ah_attr is
* used for sending any responses. If initialization is successful,
* than new ah_attr is used by overwriting old one.
*/
ret = ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
port->port_num, wc,
grh, &new_ah_attr);
if (ret)
return ret;
memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
return 0;
}
static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
struct ib_grh *grh, struct cm_av *av)
{
@@ -509,6 +535,7 @@ static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct rdma_ah_attr new_ah_attr;
struct cm_device *cm_dev;
struct cm_port *port;
int ret;
@@ -524,15 +551,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
return ret;
av->port = port;
/*
* av->ah_attr might be initialized based on wc or during
* request processing time. So initialize a new ah_attr on stack.
* If initialization fails, old ah_attr is used for sending any
* responses. If initialization is successful, than new ah_attr
* is used by overwriting the old one.
*/
ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
&av->ah_attr);
&new_ah_attr);
if (ret)
return ret;
av->timeout = path->packet_life_time + 1;
ret = add_cm_id_to_port_list(cm_id_priv, av, port);
return ret;
if (ret)
return ret;
memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
return 0;
}
static int cm_alloc_id(struct cm_id_private *cm_id_priv)
@@ -1669,7 +1707,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
spin_lock_irq(&cm_id_priv->lock);
work = cm_dequeue_work(cm_id_priv);
spin_unlock_irq(&cm_id_priv->lock);
BUG_ON(!work);
if (!work)
return;
ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
&work->cm_event);
cm_free_work(work);
@@ -3189,12 +3229,6 @@ static int cm_lap_handler(struct cm_work *work)
if (!cm_id_priv)
return -EINVAL;
ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
if (ret)
goto deref;
param = &work->cm_event.param.lap_rcvd;
memset(&work->path[0], 0, sizeof(work->path[1]));
cm_path_set_rec_type(work->port->cm_dev->ib_device,
@@ -3239,10 +3273,16 @@ static int cm_lap_handler(struct cm_work *work)
goto unlock;
}
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
ret = cm_init_av_for_lap(work->port, work->mad_recv_wc->wc,
work->mad_recv_wc->recv_buf.grh,
&cm_id_priv->av);
if (ret)
goto unlock;
cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
cm_id_priv);
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
ret = atomic_inc_and_test(&cm_id_priv->work_count);
if (!ret)
list_add_tail(&work->list, &cm_id_priv->work_list);
+30 -6
View File
@@ -146,6 +146,34 @@ const void *rdma_consumer_reject_data(struct rdma_cm_id *id,
}
EXPORT_SYMBOL(rdma_consumer_reject_data);
/**
* rdma_iw_cm_id() - return the iw_cm_id pointer for this cm_id.
* @id: Communication Identifier
*/
struct iw_cm_id *rdma_iw_cm_id(struct rdma_cm_id *id)
{
struct rdma_id_private *id_priv;
id_priv = container_of(id, struct rdma_id_private, id);
if (id->device->node_type == RDMA_NODE_RNIC)
return id_priv->cm_id.iw;
return NULL;
}
EXPORT_SYMBOL(rdma_iw_cm_id);
/**
* rdma_res_to_id() - return the rdma_cm_id pointer for this restrack.
* @res: rdma resource tracking entry pointer
*/
struct rdma_cm_id *rdma_res_to_id(struct rdma_restrack_entry *res)
{
struct rdma_id_private *id_priv =
container_of(res, struct rdma_id_private, res);
return &id_priv->id;
}
EXPORT_SYMBOL(rdma_res_to_id);
static void cma_add_one(struct ib_device *device);
static void cma_remove_one(struct ib_device *device, void *client_data);
@@ -156,7 +184,6 @@ static struct ib_client cma_client = {
};
static struct ib_sa_client sa_client;
static struct rdma_addr_client addr_client;
static LIST_HEAD(dev_list);
static LIST_HEAD(listen_any_list);
static DEFINE_MUTEX(lock);
@@ -2103,7 +2130,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
event.param.conn.responder_resources = iw_event->ord;
break;
default:
BUG_ON(1);
goto out;
}
event.status = iw_event->status;
@@ -2936,7 +2963,7 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
if (dst_addr->sa_family == AF_IB) {
ret = cma_resolve_ib_addr(id_priv);
} else {
ret = rdma_resolve_ip(&addr_client, cma_src_addr(id_priv),
ret = rdma_resolve_ip(cma_src_addr(id_priv),
dst_addr, &id->route.addr.dev_addr,
timeout_ms, addr_handler, id_priv);
}
@@ -4573,7 +4600,6 @@ static int __init cma_init(void)
goto err_wq;
ib_sa_register_client(&sa_client);
rdma_addr_register_client(&addr_client);
register_netdevice_notifier(&cma_nb);
ret = ib_register_client(&cma_client);
@@ -4587,7 +4613,6 @@ static int __init cma_init(void)
err:
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
err_wq:
destroy_workqueue(cma_wq);
@@ -4600,7 +4625,6 @@ static void __exit cma_cleanup(void)
rdma_nl_unregister(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
rdma_addr_unregister_client(&addr_client);
ib_sa_unregister_client(&sa_client);
unregister_pernet_subsys(&cma_pernet_operations);
destroy_workqueue(cma_wq);
-3
View File
@@ -88,9 +88,6 @@ int ib_device_register_sysfs(struct ib_device *device,
u8, struct kobject *));
void ib_device_unregister_sysfs(struct ib_device *device);
void ib_cache_setup(void);
void ib_cache_cleanup(void);
typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
struct net_device *idev, void *cookie);
+2 -2
View File
@@ -1225,7 +1225,7 @@ static int __init ib_core_init(void)
nldev_init();
rdma_nl_register(RDMA_NL_LS, ibnl_ls_cb_table);
ib_cache_setup();
roce_gid_mgmt_init();
return 0;
@@ -1248,7 +1248,7 @@ err:
static void __exit ib_core_cleanup(void)
{
ib_cache_cleanup();
roce_gid_mgmt_cleanup();
nldev_exit();
rdma_nl_unregister(RDMA_NL_LS);
unregister_lsm_notifier(&ibdev_lsm_nb);
+7 -5
View File
@@ -651,7 +651,6 @@ static void dequeue_mad(struct ib_mad_list_head *mad_list)
struct ib_mad_queue *mad_queue;
unsigned long flags;
BUG_ON(!mad_list->mad_queue);
mad_queue = mad_list->mad_queue;
spin_lock_irqsave(&mad_queue->lock, flags);
list_del(&mad_list->list);
@@ -1557,7 +1556,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
mad_reg_req->oui, 3)) {
method = &(*vendor_table)->vendor_class[
vclass]->method_table[i];
BUG_ON(!*method);
if (!*method)
goto error3;
goto check_in_use;
}
}
@@ -1567,10 +1567,12 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
vclass]->oui[i])) {
method = &(*vendor_table)->vendor_class[
vclass]->method_table[i];
BUG_ON(*method);
/* Allocate method table for this OUI */
if ((ret = allocate_method_table(method)))
goto error3;
if (!*method) {
ret = allocate_method_table(method);
if (ret)
goto error3;
}
memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
mad_reg_req->oui, 3);
goto check_in_use;
+109 -13
View File
@@ -98,8 +98,83 @@ static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
[RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
.len = IFNAMSIZ },
[RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
[RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
.len = RDMA_NLDEV_ATTR_ENTRY_STRLEN },
[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
[RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
[RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
[RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
[RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
};
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
enum rdma_nldev_print_type print_type)
{
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
return -EMSGSIZE;
if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
return -EMSGSIZE;
return 0;
}
static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
enum rdma_nldev_print_type print_type,
u32 value)
{
if (put_driver_name_print_type(msg, name, print_type))
return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
return -EMSGSIZE;
return 0;
}
static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
enum rdma_nldev_print_type print_type,
u64 value)
{
if (put_driver_name_print_type(msg, name, print_type))
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
return 0;
}
int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
{
return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
value);
}
EXPORT_SYMBOL(rdma_nl_put_driver_u32);
int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
u32 value)
{
return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
value);
}
EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
{
return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
value);
}
EXPORT_SYMBOL(rdma_nl_put_driver_u64);
int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
{
return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
value);
}
EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
{
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
@@ -122,7 +197,8 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
device->attrs.device_cap_flags, 0))
device->attrs.device_cap_flags,
RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
ib_get_device_fw_str(device, fw);
@@ -131,10 +207,12 @@ static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
be64_to_cpu(device->node_guid), 0))
be64_to_cpu(device->node_guid),
RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
be64_to_cpu(device->attrs.sys_image_guid), 0))
be64_to_cpu(device->attrs.sys_image_guid),
RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
return -EMSGSIZE;
@@ -161,11 +239,11 @@ static int fill_port_info(struct sk_buff *msg,
BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
(u64)attr.port_cap_flags, 0))
(u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (rdma_protocol_ib(device, port) &&
nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
attr.subnet_prefix, 0))
attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
return -EMSGSIZE;
if (rdma_protocol_ib(device, port)) {
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
@@ -209,8 +287,8 @@ static int fill_res_info_entry(struct sk_buff *msg,
if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
goto err;
if (nla_put_u64_64bit(msg,
RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr, 0))
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
RDMA_NLDEV_ATTR_PAD))
goto err;
nla_nest_end(msg, entry_attr);
@@ -282,6 +360,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_qp *qp = container_of(res, struct ib_qp, res);
struct rdma_restrack_root *resroot = &qp->device->res;
struct ib_qp_init_attr qp_init_attr;
struct nlattr *entry_attr;
struct ib_qp_attr qp_attr;
@@ -331,6 +410,9 @@ static int fill_res_qp_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
@@ -346,6 +428,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg,
{
struct rdma_id_private *id_priv =
container_of(res, struct rdma_id_private, res);
struct rdma_restrack_root *resroot = &id_priv->id.device->res;
struct rdma_cm_id *cm_id = &id_priv->id;
struct nlattr *entry_attr;
@@ -387,6 +470,9 @@ static int fill_res_cm_id_entry(struct sk_buff *msg,
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
@@ -400,6 +486,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_cq *cq = container_of(res, struct ib_cq, res);
struct rdma_restrack_root *resroot = &cq->device->res;
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_CQ_ENTRY);
@@ -409,7 +496,7 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
goto err;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&cq->usecnt), 0))
atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
goto err;
/* Poll context is only valid for kernel CQs */
@@ -420,6 +507,9 @@ static int fill_res_cq_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
@@ -433,6 +523,7 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_mr *mr = container_of(res, struct ib_mr, res);
struct rdma_restrack_root *resroot = &mr->pd->device->res;
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_MR_ENTRY);
@@ -444,17 +535,18 @@ static int fill_res_mr_entry(struct sk_buff *msg, struct netlink_callback *cb,
goto err;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
goto err;
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_IOVA,
mr->iova, 0))
goto err;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length, 0))
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
RDMA_NLDEV_ATTR_PAD))
goto err;
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
@@ -468,6 +560,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
struct rdma_restrack_entry *res, uint32_t port)
{
struct ib_pd *pd = container_of(res, struct ib_pd, res);
struct rdma_restrack_root *resroot = &pd->device->res;
struct nlattr *entry_attr;
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_PD_ENTRY);
@@ -484,7 +577,7 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
goto err;
}
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
atomic_read(&pd->usecnt), 0))
atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
goto err;
if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
@@ -494,6 +587,9 @@ static int fill_res_pd_entry(struct sk_buff *msg, struct netlink_callback *cb,
if (fill_res_name_pid(msg, res))
goto err;
if (resroot->fill_res_entry(msg, res))
goto err;
nla_nest_end(msg, entry_attr);
return 0;
+8 -1
View File
@@ -1,4 +1,4 @@
/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2017-2018 Mellanox Technologies. All rights reserved.
*/
@@ -12,9 +12,16 @@
#include "cma_priv.h"
static int fill_res_noop(struct sk_buff *msg,
struct rdma_restrack_entry *entry)
{
return 0;
}
void rdma_restrack_init(struct rdma_restrack_root *res)
{
init_rwsem(&res->rwsem);
res->fill_res_entry = fill_res_noop;
}
static const char *type2str(enum rdma_restrack_type type)
-2
View File
@@ -44,8 +44,6 @@
static struct workqueue_struct *gid_cache_wq;
static struct workqueue_struct *gid_cache_wq;
enum gid_op_type {
GID_DEL = 0,
GID_ADD
-4
View File
@@ -30,8 +30,6 @@
* SOFTWARE.
*/
#ifdef CONFIG_SECURITY_INFINIBAND
#include <linux/security.h>
#include <linux/completion.h>
#include <linux/list.h>
@@ -751,5 +749,3 @@ int ib_mad_enforce_security(struct ib_mad_agent_private *map, u16 pkey_index)
pkey_index,
map->agent.security);
}
#endif /* CONFIG_SECURITY_INFINIBAND */
+5 -1
View File
@@ -235,7 +235,7 @@ static struct ucma_multicast* ucma_alloc_multicast(struct ucma_context *ctx)
return NULL;
mutex_lock(&mut);
mc->id = idr_alloc(&multicast_idr, mc, 0, 0, GFP_KERNEL);
mc->id = idr_alloc(&multicast_idr, NULL, 0, 0, GFP_KERNEL);
mutex_unlock(&mut);
if (mc->id < 0)
goto error;
@@ -1421,6 +1421,10 @@ static ssize_t ucma_process_join(struct ucma_file *file,
goto err3;
}
mutex_lock(&mut);
idr_replace(&multicast_idr, mc, mc->id);
mutex_unlock(&mut);
mutex_unlock(&file->mut);
ucma_put_ctx(ctx);
return 0;
+1 -12
View File
@@ -64,8 +64,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
}
sg_free_table(&umem->sg_head);
return;
}
/**
@@ -119,16 +117,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->length = size;
umem->address = addr;
umem->page_shift = PAGE_SHIFT;
/*
* We ask for writable memory if any of the following
* access flags are set. "Local write" and "remote write"
* obviously require write access. "Remote atomic" can do
* things like fetch and add, which will modify memory, and
* "MW bind" can change permissions by binding a window.
*/
umem->writable = !!(access &
(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
umem->writable = ib_access_writable(access);
if (access & IB_ACCESS_ON_DEMAND) {
ret = ib_umem_odp_get(context, umem, access);
+2
View File
@@ -263,6 +263,7 @@ struct ib_uverbs_flow_spec {
struct ib_uverbs_flow_spec_action_tag flow_tag;
struct ib_uverbs_flow_spec_action_drop drop;
struct ib_uverbs_flow_spec_action_handle action;
struct ib_uverbs_flow_spec_action_count flow_count;
};
};
@@ -287,6 +288,7 @@ extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_XRCD);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_DM);
extern const struct uverbs_object_def UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS);
#define IB_UVERBS_DECLARE_CMD(name) \
ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
+99 -12
View File
@@ -2748,43 +2748,82 @@ out_put:
struct ib_uflow_resources {
size_t max;
size_t num;
struct ib_flow_action *collection[0];
size_t collection_num;
size_t counters_num;
struct ib_counters **counters;
struct ib_flow_action **collection;
};
static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
{
struct ib_uflow_resources *resources;
resources =
kmalloc(struct_size(resources, collection, num_specs),
GFP_KERNEL);
resources = kzalloc(sizeof(*resources), GFP_KERNEL);
if (!resources)
return NULL;
goto err_res;
resources->counters =
kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
if (!resources->counters)
goto err_cnt;
resources->collection =
kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
if (!resources->collection)
goto err_collection;
resources->num = 0;
resources->max = num_specs;
return resources;
err_collection:
kfree(resources->counters);
err_cnt:
kfree(resources);
err_res:
return NULL;
}
void ib_uverbs_flow_resources_free(struct ib_uflow_resources *uflow_res)
{
unsigned int i;
for (i = 0; i < uflow_res->num; i++)
for (i = 0; i < uflow_res->collection_num; i++)
atomic_dec(&uflow_res->collection[i]->usecnt);
for (i = 0; i < uflow_res->counters_num; i++)
atomic_dec(&uflow_res->counters[i]->usecnt);
kfree(uflow_res->collection);
kfree(uflow_res->counters);
kfree(uflow_res);
}
static void flow_resources_add(struct ib_uflow_resources *uflow_res,
struct ib_flow_action *action)
enum ib_flow_spec_type type,
void *ibobj)
{
WARN_ON(uflow_res->num >= uflow_res->max);
atomic_inc(&action->usecnt);
uflow_res->collection[uflow_res->num++] = action;
switch (type) {
case IB_FLOW_SPEC_ACTION_HANDLE:
atomic_inc(&((struct ib_flow_action *)ibobj)->usecnt);
uflow_res->collection[uflow_res->collection_num++] =
(struct ib_flow_action *)ibobj;
break;
case IB_FLOW_SPEC_ACTION_COUNT:
atomic_inc(&((struct ib_counters *)ibobj)->usecnt);
uflow_res->counters[uflow_res->counters_num++] =
(struct ib_counters *)ibobj;
break;
default:
WARN_ON(1);
}
uflow_res->num++;
}
static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
@@ -2821,9 +2860,29 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
return -EINVAL;
ib_spec->action.size =
sizeof(struct ib_flow_spec_action_handle);
flow_resources_add(uflow_res, ib_spec->action.act);
flow_resources_add(uflow_res,
IB_FLOW_SPEC_ACTION_HANDLE,
ib_spec->action.act);
uobj_put_obj_read(ib_spec->action.act);
break;
case IB_FLOW_SPEC_ACTION_COUNT:
if (kern_spec->flow_count.size !=
sizeof(struct ib_uverbs_flow_spec_action_count))
return -EINVAL;
ib_spec->flow_count.counters =
uobj_get_obj_read(counters,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
ucontext);
if (!ib_spec->flow_count.counters)
return -EINVAL;
ib_spec->flow_count.size =
sizeof(struct ib_flow_spec_action_count);
flow_resources_add(uflow_res,
IB_FLOW_SPEC_ACTION_COUNT,
ib_spec->flow_count.counters);
uobj_put_obj_read(ib_spec->flow_count.counters);
break;
default:
return -EINVAL;
}
@@ -2948,6 +3007,28 @@ int ib_uverbs_kern_spec_to_ib_spec_filter(enum ib_flow_spec_type type,
memcpy(&ib_spec->esp.val, kern_spec_val, actual_filter_sz);
memcpy(&ib_spec->esp.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_GRE:
ib_filter_sz = offsetof(struct ib_flow_gre_filter, real_sz);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
if (actual_filter_sz <= 0)
return -EINVAL;
ib_spec->gre.size = sizeof(struct ib_flow_spec_gre);
memcpy(&ib_spec->gre.val, kern_spec_val, actual_filter_sz);
memcpy(&ib_spec->gre.mask, kern_spec_mask, actual_filter_sz);
break;
case IB_FLOW_SPEC_MPLS:
ib_filter_sz = offsetof(struct ib_flow_mpls_filter, real_sz);
actual_filter_sz = spec_filter_size(kern_spec_mask,
kern_filter_sz,
ib_filter_sz);
if (actual_filter_sz <= 0)
return -EINVAL;
ib_spec->mpls.size = sizeof(struct ib_flow_spec_mpls);
memcpy(&ib_spec->mpls.val, kern_spec_val, actual_filter_sz);
memcpy(&ib_spec->mpls.mask, kern_spec_mask, actual_filter_sz);
break;
default:
return -EINVAL;
}
@@ -3507,6 +3588,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
uflow_res);
if (err)
goto err_free;
flow_attr->size +=
((union ib_flow_spec *) ib_spec)->size;
cmd.flow_attr.size -= ((struct ib_uverbs_flow_spec *)kern_spec)->size;
@@ -3519,11 +3601,16 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
err = -EINVAL;
goto err_free;
}
flow_id = ib_create_flow(qp, flow_attr, IB_FLOW_DOMAIN_USER);
flow_id = qp->device->create_flow(qp, flow_attr,
IB_FLOW_DOMAIN_USER, uhw);
if (IS_ERR(flow_id)) {
err = PTR_ERR(flow_id);
goto err_free;
}
atomic_inc(&qp->usecnt);
flow_id->qp = qp;
flow_id->uobject = uobj;
uobj->object = flow_id;
uflow = container_of(uobj, typeof(*uflow), uobject);
+41 -1
View File
@@ -41,6 +41,8 @@
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
#include <linux/file.h>
#include <linux/cdev.h>
#include <linux/anon_inodes.h>
@@ -1090,6 +1092,44 @@ err:
return;
}
static void ib_uverbs_disassociate_ucontext(struct ib_ucontext *ibcontext)
{
struct ib_device *ib_dev = ibcontext->device;
struct task_struct *owning_process = NULL;
struct mm_struct *owning_mm = NULL;
owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID);
if (!owning_process)
return;
owning_mm = get_task_mm(owning_process);
if (!owning_mm) {
pr_info("no mm, disassociate ucontext is pending task termination\n");
while (1) {
put_task_struct(owning_process);
usleep_range(1000, 2000);
owning_process = get_pid_task(ibcontext->tgid,
PIDTYPE_PID);
if (!owning_process ||
owning_process->state == TASK_DEAD) {
pr_info("disassociate ucontext done, task was terminated\n");
/* in case task was dead need to release the
* task struct.
*/
if (owning_process)
put_task_struct(owning_process);
return;
}
}
}
down_write(&owning_mm->mmap_sem);
ib_dev->disassociate_ucontext(ibcontext);
up_write(&owning_mm->mmap_sem);
mmput(owning_mm);
put_task_struct(owning_process);
}
static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
struct ib_device *ib_dev)
{
@@ -1130,7 +1170,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
* (e.g mmput).
*/
ib_uverbs_event_handler(&file->event_handler, &event);
ib_dev->disassociate_ucontext(ucontext);
ib_uverbs_disassociate_ucontext(ucontext);
mutex_lock(&file->cleanup_mutex);
ib_uverbs_cleanup_ucontext(file, ucontext, true);
mutex_unlock(&file->cleanup_mutex);
+2 -1
View File
@@ -302,7 +302,8 @@ static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
&UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
&UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
&UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
&UVERBS_OBJECT(UVERBS_OBJECT_DM));
&UVERBS_OBJECT(UVERBS_OBJECT_DM),
&UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
@@ -0,0 +1,157 @@
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
* Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "uverbs.h"
#include <rdma/uverbs_std_types.h>
static int uverbs_free_counters(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_counters *counters = uobject->object;
if (why == RDMA_REMOVE_DESTROY &&
atomic_read(&counters->usecnt))
return -EBUSY;
return counters->device->destroy_counters(counters);
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_CREATE)(struct ib_device *ib_dev,
struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_counters *counters;
struct ib_uobject *uobj;
int ret;
/*
* This check should be removed once the infrastructure
* have the ability to remove methods from parse tree once
* such condition is met.
*/
if (!ib_dev->create_counters)
return -EOPNOTSUPP;
uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_CREATE_COUNTERS_HANDLE);
counters = ib_dev->create_counters(ib_dev, attrs);
if (IS_ERR(counters)) {
ret = PTR_ERR(counters);
goto err_create_counters;
}
counters->device = ib_dev;
counters->uobject = uobj;
uobj->object = counters;
atomic_set(&counters->usecnt, 0);
return 0;
err_create_counters:
return ret;
}
static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(struct ib_device *ib_dev,
struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
struct ib_counters_read_attr read_attr = {};
const struct uverbs_attr *uattr;
struct ib_counters *counters =
uverbs_attr_get_obj(attrs, UVERBS_ATTR_READ_COUNTERS_HANDLE);
int ret;
if (!ib_dev->read_counters)
return -EOPNOTSUPP;
if (!atomic_read(&counters->usecnt))
return -EINVAL;
ret = uverbs_copy_from(&read_attr.flags, attrs,
UVERBS_ATTR_READ_COUNTERS_FLAGS);
if (ret)
return ret;
uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
read_attr.counters_buff = kcalloc(read_attr.ncounters,
sizeof(u64), GFP_KERNEL);
if (!read_attr.counters_buff)
return -ENOMEM;
ret = ib_dev->read_counters(counters,
&read_attr,
attrs);
if (ret)
goto err_read;
ret = uverbs_copy_to(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF,
read_attr.counters_buff,
read_attr.ncounters * sizeof(u64));
err_read:
kfree(read_attr.counters_buff);
return ret;
}
static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
&UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
UVERBS_OBJECT_COUNTERS,
UVERBS_ACCESS_NEW,
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
uverbs_destroy_def_handler,
&UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
UVERBS_OBJECT_COUNTERS,
UVERBS_ACCESS_DESTROY,
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
&UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
UVERBS_OBJECT_COUNTERS,
UVERBS_ACCESS_READ,
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
&UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
&UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
UVERBS_ATTR_TYPE(__u32),
UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
&UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));

Some files were not shown because too many files have changed in this diff Show More