Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma

Pull rdma updates from Doug Ledford:
 "Initial roundup of 4.5 merge window patches

   - Remove usage of ib_query_device and instead store attributes in
     ib_device struct

   - Move iopoll out of block and into lib, rename to irqpoll, and use
     in several places in the rdma stack as our new completion queue
     polling library mechanism.  Update the other block drivers that
     already used iopoll to use the new mechanism too.

   - Replace the per-entry GID table locks with a single GID table lock

   - IPoIB multicast cleanup

   - Cleanups to the IB MR facility

   - Add support for 64bit extended IB counters

   - Fix for netlink oops while parsing RDMA nl messages

   - RoCEv2 support for the core IB code

   - mlx4 RoCEv2 support

   - mlx5 RoCEv2 support

   - Cross Channel support for mlx5

   - Timestamp support for mlx5

   - Atomic support for mlx5

   - Raw QP support for mlx5

   - MAINTAINERS update for mlx4/mlx5

   - Misc ocrdma, qib, nes, usNIC, cxgb3, cxgb4, mlx4, mlx5 updates

   - Add support for remote invalidate to the iSER driver (pushed
     through the RDMA tree due to dependencies, acknowledged by nab)

   - Update to NFSoRDMA (pushed through the RDMA tree due to
     dependencies, acknowledged by Bruce)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (169 commits)
  IB/mlx5: Unify CQ create flags check
  IB/mlx5: Expose Raw Packet QP to user space consumers
  {IB, net}/mlx5: Move the modify QP operation table to mlx5_ib
  IB/mlx5: Support setting Ethernet priority for Raw Packet QPs
  IB/mlx5: Add Raw Packet QP query functionality
  IB/mlx5: Add create and destroy functionality for Raw Packet QP
  IB/mlx5: Refactor mlx5_ib_qp to accommodate other QP types
  IB/mlx5: Allocate a Transport Domain for each ucontext
  net/mlx5_core: Warn on unsupported events of QP/RQ/SQ
  net/mlx5_core: Add RQ and SQ event handling
  net/mlx5_core: Export transport objects
  IB/mlx5: Expose CQE version to user-space
  IB/mlx5: Add CQE version 1 support to user QPs and SRQs
  IB/mlx5: Fix data validation in mlx5_ib_alloc_ucontext
  IB/sa: Fix netlink local service GFP crash
  IB/srpt: Remove redundant wc array
  IB/qib: Improve ipoib UD performance
  IB/mlx4: Advertise RoCE v2 support
  IB/mlx4: Create and use another QP1 for RoCEv2
  IB/mlx4: Enable send of RoCE QP1 packets with IP/UDP headers
  ...
This commit is contained in:
Linus Torvalds
2016-01-23 18:45:06 -08:00
174 changed files with 7122 additions and 4681 deletions
@@ -0,0 +1,22 @@
What: /config/rdma_cm
Date: November 29, 2015
KernelVersion: 4.4.0
Description: Interface is used to configure RDMA-cable HCAs in respect to
RDMA-CM attributes.
Attributes are visible only when configfs is mounted. To mount
configfs in /config directory use:
# mount -t configfs none /config/
In order to set parameters related to a specific HCA, a directory
for this HCA has to be created:
mkdir -p /config/rdma_cm/<hca>
What: /config/rdma_cm/<hca>/ports/<port-num>/default_roce_mode
Date: November 29, 2015
KernelVersion: 4.4.0
Description: RDMA-CM based connections from HCA <hca> at port <port-num>
will be initiated with this RoCE type as default.
The possible RoCE types are either "IB/RoCE v1" or "RoCE v2".
This parameter has RW access.
@@ -0,0 +1,16 @@
What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/ndevs/<gid-index>
Date: November 29, 2015
KernelVersion: 4.4.0
Contact: linux-rdma@vger.kernel.org
Description: The net-device's name associated with the GID resides
at index <gid-index>.
What: /sys/class/infiniband/<hca>/ports/<port-number>/gid_attrs/types/<gid-index>
Date: November 29, 2015
KernelVersion: 4.4.0
Contact: linux-rdma@vger.kernel.org
Description: The RoCE type of the associated GID resides at index <gid-index>.
This could either be "IB/RoCE v1" for IB and RoCE v1 based GODs
or "RoCE v2" for RoCE v2 based GIDs.
@@ -15,7 +15,6 @@ Sleeping and interrupt context
modify_ah
query_ah
destroy_ah
bind_mw
post_send
post_recv
poll_cq
@@ -31,7 +30,6 @@ Sleeping and interrupt context
ib_modify_ah
ib_query_ah
ib_destroy_ah
ib_bind_mw
ib_post_send
ib_post_recv
ib_req_notify_cq
+1 -1
View File
@@ -90,7 +90,7 @@ BLOCK_SOFTIRQ: Do all of the following:
from being initiated from tasks that might run on the CPU to
be de-jittered. (It is OK to force this CPU offline and then
bring it back online before you start your application.)
BLOCK_IOPOLL_SOFTIRQ: Do all of the following:
IRQ_POLL_SOFTIRQ: Do all of the following:
1. Force block-device interrupts onto some other CPU.
2. Initiate any block I/O and block-I/O polling on other CPUs.
3. Once your application has started, prevent CPU-hotplug operations
+25 -7
View File
@@ -7151,27 +7151,45 @@ W: https://linuxtv.org
S: Odd Fixes
F: drivers/media/radio/radio-miropcm20*
Mellanox MLX5 core VPI driver
M: Eli Cohen <eli@mellanox.com>
MELLANOX MLX4 core VPI driver
M: Yishai Hadas <yishaih@mellanox.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
S: Supported
F: drivers/net/ethernet/mellanox/mlx4/
F: include/linux/mlx4/
MELLANOX MLX4 IB driver
M: Yishai Hadas <yishaih@mellanox.com>
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.kernel.org/project/linux-rdma/list/
T: git git://openfabrics.org/~eli/connect-ib.git
S: Supported
F: drivers/infiniband/hw/mlx4/
F: include/linux/mlx4/
MELLANOX MLX5 core VPI driver
M: Matan Barak <matanb@mellanox.com>
M: Leon Romanovsky <leonro@mellanox.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.ozlabs.org/project/netdev/list/
S: Supported
F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
Mellanox MLX5 IB driver
M: Eli Cohen <eli@mellanox.com>
MELLANOX MLX5 IB driver
M: Matan Barak <matanb@mellanox.com>
M: Leon Romanovsky <leonro@mellanox.com>
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
Q: http://patchwork.kernel.org/project/linux-rdma/list/
T: git git://openfabrics.org/~eli/connect-ib.git
S: Supported
F: include/linux/mlx5/
F: drivers/infiniband/hw/mlx5/
F: include/linux/mlx5/
MELEXIS MLX90614 DRIVER
M: Crt Mori <cmo@melexis.com>
+1 -1
View File
@@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \
blk-lib.o blk-mq.o blk-mq-tag.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
+10
View File
@@ -5,6 +5,7 @@ menuconfig INFINIBAND
depends on NET
depends on INET
depends on m || IPV6 != m
select IRQ_POLL
---help---
Core support for InfiniBand (IB). Make sure to also select
any protocols you wish to use as well as drivers for your
@@ -54,6 +55,15 @@ config INFINIBAND_ADDR_TRANS
depends on INFINIBAND
default y
config INFINIBAND_ADDR_TRANS_CONFIGFS
bool
depends on INFINIBAND_ADDR_TRANS && CONFIGFS_FS && !(INFINIBAND=y && CONFIGFS_FS=m)
default y
---help---
ConfigFS support for RDMA communication manager (CM).
This allows the user to config the default GID type that the CM
uses for each device, when initiaing new connections.
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
+3 -1
View File
@@ -8,7 +8,7 @@ obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o \
$(user_access-y)
ib_core-y := packer.o ud_header.o verbs.o sysfs.o \
ib_core-y := packer.o ud_header.o verbs.o cq.o sysfs.o \
device.o fmr_pool.o cache.o netlink.o \
roce_gid_mgmt.o
ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
@@ -24,6 +24,8 @@ iw_cm-y := iwcm.o iwpm_util.o iwpm_msg.o
rdma_cm-y := cma.o
rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
rdma_ucm-y := ucma.o
ib_addr-y := addr.o
+146 -50
View File
@@ -121,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
}
EXPORT_SYMBOL(rdma_copy_addr);
int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
int rdma_translate_ip(const struct sockaddr *addr,
struct rdma_dev_addr *dev_addr,
u16 *vlan_id)
{
struct net_device *dev;
@@ -139,7 +140,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
switch (addr->sa_family) {
case AF_INET:
dev = ip_dev_find(dev_addr->net,
((struct sockaddr_in *) addr)->sin_addr.s_addr);
((const struct sockaddr_in *)addr)->sin_addr.s_addr);
if (!dev)
return ret;
@@ -154,7 +155,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
rcu_read_lock();
for_each_netdev_rcu(dev_addr->net, dev) {
if (ipv6_chk_addr(dev_addr->net,
&((struct sockaddr_in6 *) addr)->sin6_addr,
&((const struct sockaddr_in6 *)addr)->sin6_addr,
dev, 1)) {
ret = rdma_copy_addr(dev_addr, dev, NULL);
if (vlan_id)
@@ -198,7 +199,8 @@ static void queue_req(struct addr_req *req)
mutex_unlock(&lock);
}
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
const void *daddr)
{
struct neighbour *n;
int ret;
@@ -222,8 +224,9 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, v
}
static int addr4_resolve(struct sockaddr_in *src_in,
struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr)
const struct sockaddr_in *dst_in,
struct rdma_dev_addr *addr,
struct rtable **prt)
{
__be32 src_ip = src_in->sin_addr.s_addr;
__be32 dst_ip = dst_in->sin_addr.s_addr;
@@ -243,33 +246,29 @@ static int addr4_resolve(struct sockaddr_in *src_in,
src_in->sin_family = AF_INET;
src_in->sin_addr.s_addr = fl4.saddr;
if (rt->dst.dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
* routable) and we could set the network type accordingly.
*/
if (rt->rt_uses_gateway)
addr->network = RDMA_NETWORK_IPV4;
/* If the device does ARP internally, return 'done' */
if (rt->dst.dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, rt->dst.dev, NULL);
goto put;
}
addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr);
put:
ip_rt_put(rt);
*prt = rt;
return 0;
out:
return ret;
}
#if IS_ENABLED(CONFIG_IPV6)
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
const struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
struct flowi6 fl6;
struct dst_entry *dst;
struct rt6_info *rt;
int ret;
memset(&fl6, 0, sizeof fl6);
@@ -281,6 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
if ((ret = dst->error))
goto put;
rt = (struct rt6_info *)dst;
if (ipv6_addr_any(&fl6.saddr)) {
ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
&fl6.daddr, 0, &fl6.saddr);
@@ -291,43 +291,111 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
src_in->sin6_addr = fl6.saddr;
}
if (dst->dev->flags & IFF_LOOPBACK) {
ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
goto put;
}
/* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
* routable) and we could set the network type accordingly.
*/
if (rt->rt6i_flags & RTF_GATEWAY)
addr->network = RDMA_NETWORK_IPV6;
/* If the device does ARP internally, return 'done' */
if (dst->dev->flags & IFF_NOARP) {
ret = rdma_copy_addr(addr, dst->dev, NULL);
goto put;
}
addr->hoplimit = ip6_dst_hoplimit(dst);
ret = dst_fetch_ha(dst, addr, &fl6.daddr);
*pdst = dst;
return 0;
put:
dst_release(dst);
return ret;
}
#else
static int addr6_resolve(struct sockaddr_in6 *src_in,
struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr)
const struct sockaddr_in6 *dst_in,
struct rdma_dev_addr *addr,
struct dst_entry **pdst)
{
return -EADDRNOTAVAIL;
}
#endif
static int addr_resolve(struct sockaddr *src_in,
struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
static int addr_resolve_neigh(struct dst_entry *dst,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr)
{
if (dst->dev->flags & IFF_LOOPBACK) {
int ret;
ret = rdma_translate_ip(dst_in, addr, NULL);
if (!ret)
memcpy(addr->dst_dev_addr, addr->src_dev_addr,
MAX_ADDR_LEN);
return ret;
}
/* If the device doesn't do ARP internally */
if (!(dst->dev->flags & IFF_NOARP)) {
const struct sockaddr_in *dst_in4 =
(const struct sockaddr_in *)dst_in;
const struct sockaddr_in6 *dst_in6 =
(const struct sockaddr_in6 *)dst_in;
return dst_fetch_ha(dst, addr,
dst_in->sa_family == AF_INET ?
(const void *)&dst_in4->sin_addr.s_addr :
(const void *)&dst_in6->sin6_addr);
}
return rdma_copy_addr(addr, dst->dev, NULL);
}
static int addr_resolve(struct sockaddr *src_in,
const struct sockaddr *dst_in,
struct rdma_dev_addr *addr,
bool resolve_neigh)
{
struct net_device *ndev;
struct dst_entry *dst;
int ret;
if (src_in->sa_family == AF_INET) {
return addr4_resolve((struct sockaddr_in *) src_in,
(struct sockaddr_in *) dst_in, addr);
} else
return addr6_resolve((struct sockaddr_in6 *) src_in,
(struct sockaddr_in6 *) dst_in, addr);
struct rtable *rt = NULL;
const struct sockaddr_in *dst_in4 =
(const struct sockaddr_in *)dst_in;
ret = addr4_resolve((struct sockaddr_in *)src_in,
dst_in4, addr, &rt);
if (ret)
return ret;
if (resolve_neigh)
ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
ndev = rt->dst.dev;
dev_hold(ndev);
ip_rt_put(rt);
} else {
const struct sockaddr_in6 *dst_in6 =
(const struct sockaddr_in6 *)dst_in;
ret = addr6_resolve((struct sockaddr_in6 *)src_in,
dst_in6, addr,
&dst);
if (ret)
return ret;
if (resolve_neigh)
ret = addr_resolve_neigh(dst, dst_in, addr);
ndev = dst->dev;
dev_hold(ndev);
dst_release(dst);
}
addr->bound_dev_if = ndev->ifindex;
addr->net = dev_net(ndev);
dev_put(ndev);
return ret;
}
static void process_req(struct work_struct *work)
@@ -343,7 +411,8 @@ static void process_req(struct work_struct *work)
if (req->status == -ENODATA) {
src_in = (struct sockaddr *) &req->src_addr;
dst_in = (struct sockaddr *) &req->dst_addr;
req->status = addr_resolve(src_in, dst_in, req->addr);
req->status = addr_resolve(src_in, dst_in, req->addr,
true);
if (req->status && time_after_eq(jiffies, req->timeout))
req->status = -ETIMEDOUT;
else if (req->status == -ENODATA)
@@ -403,7 +472,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
req->client = client;
atomic_inc(&client->refcount);
req->status = addr_resolve(src_in, dst_in, addr);
req->status = addr_resolve(src_in, dst_in, addr, true);
switch (req->status) {
case 0:
req->timeout = jiffies;
@@ -425,6 +494,26 @@ err:
}
EXPORT_SYMBOL(rdma_resolve_ip);
int rdma_resolve_ip_route(struct sockaddr *src_addr,
const struct sockaddr *dst_addr,
struct rdma_dev_addr *addr)
{
struct sockaddr_storage ssrc_addr = {};
struct sockaddr *src_in = (struct sockaddr *)&ssrc_addr;
if (src_addr) {
if (src_addr->sa_family != dst_addr->sa_family)
return -EINVAL;
memcpy(src_in, src_addr, rdma_addr_size(src_addr));
} else {
src_in->sa_family = dst_addr->sa_family;
}
return addr_resolve(src_in, dst_addr, addr, false);
}
EXPORT_SYMBOL(rdma_resolve_ip_route);
void rdma_addr_cancel(struct rdma_dev_addr *addr)
{
struct addr_req *req, *temp_req;
@@ -456,8 +545,10 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
complete(&((struct resolve_cb_context *)context)->comp);
}
int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
u8 *dmac, u16 *vlan_id, int if_index)
int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid,
const union ib_gid *dgid,
u8 *dmac, u16 *vlan_id, int *if_index,
int *hoplimit)
{
int ret = 0;
struct rdma_dev_addr dev_addr;
@@ -475,7 +566,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
rdma_gid2ip(&dgid_addr._sockaddr, dgid);
memset(&dev_addr, 0, sizeof(dev_addr));
dev_addr.bound_dev_if = if_index;
if (if_index)
dev_addr.bound_dev_if = *if_index;
dev_addr.net = &init_net;
ctx.addr = &dev_addr;
@@ -491,12 +583,16 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
if (!dev)
return -ENODEV;
if (if_index)
*if_index = dev_addr.bound_dev_if;
if (vlan_id)
*vlan_id = rdma_vlan_dev_vlan_id(dev);
if (hoplimit)
*hoplimit = dev_addr.hoplimit;
dev_put(dev);
return ret;
}
EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
EXPORT_SYMBOL(rdma_addr_find_l2_eth_by_grh);
int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
{
File diff suppressed because it is too large Load Diff
+31 -18
View File
@@ -364,7 +364,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
read_lock_irqsave(&cm.device_lock, flags);
list_for_each_entry(cm_dev, &cm.device_list, list) {
if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
ndev, &p, NULL)) {
path->gid_type, ndev, &p, NULL)) {
port = cm_dev->port[p-1];
break;
}
@@ -782,11 +782,11 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
/* Check if the device started its remove_one */
spin_lock_irq(&cm.lock);
spin_lock_irqsave(&cm.lock, flags);
if (!cm_dev->going_down)
queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
msecs_to_jiffies(wait_time));
spin_unlock_irq(&cm.lock);
spin_unlock_irqrestore(&cm.lock, flags);
cm_id_priv->timewait_info = NULL;
}
@@ -1600,6 +1600,8 @@ static int cm_req_handler(struct cm_work *work)
struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
union ib_gid gid;
struct ib_gid_attr gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1639,11 +1641,31 @@ static int cm_req_handler(struct cm_work *work)
cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
work->port->port_num,
cm_id_priv->av.ah_attr.grh.sgid_index,
&gid, &gid_attr);
if (!ret) {
if (gid_attr.ndev) {
work->path[0].ifindex = gid_attr.ndev->ifindex;
work->path[0].net = dev_net(gid_attr.ndev);
dev_put(gid_attr.ndev);
}
work->path[0].gid_type = gid_attr.gid_type;
ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
}
if (ret) {
ib_get_cached_gid(work->port->cm_dev->ib_device,
work->port->port_num, 0, &work->path[0].sgid,
NULL);
int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
work->port->port_num, 0,
&work->path[0].sgid,
&gid_attr);
if (!err && gid_attr.ndev) {
work->path[0].ifindex = gid_attr.ndev->ifindex;
work->path[0].net = dev_net(gid_attr.ndev);
dev_put(gid_attr.ndev);
}
work->path[0].gid_type = gid_attr.gid_type;
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
&work->path[0].sgid, sizeof work->path[0].sgid,
NULL, 0);
@@ -3482,6 +3504,7 @@ int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
EXPORT_SYMBOL(ib_cm_notify);
static void cm_recv_handler(struct ib_mad_agent *mad_agent,
struct ib_mad_send_buf *send_buf,
struct ib_mad_recv_wc *mad_recv_wc)
{
struct cm_port *port = mad_agent->context;
@@ -3731,16 +3754,6 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
}
EXPORT_SYMBOL(ib_cm_init_qp_attr);
static void cm_get_ack_delay(struct cm_device *cm_dev)
{
struct ib_device_attr attr;
if (ib_query_device(cm_dev->ib_device, &attr))
cm_dev->ack_delay = 0; /* acks will rely on packet life time */
else
cm_dev->ack_delay = attr.local_ca_ack_delay;
}
static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
char *buf)
{
@@ -3852,7 +3865,7 @@ static void cm_add_one(struct ib_device *ib_device)
return;
cm_dev->ib_device = ib_device;
cm_get_ack_delay(cm_dev);
cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
cm_dev->going_down = 0;
cm_dev->device = device_create(&cm_class, &ib_device->dev,
MKDEV(0, 0), NULL,
+238 -27
View File
@@ -38,6 +38,7 @@
#include <linux/in6.h>
#include <linux/mutex.h>
#include <linux/random.h>
#include <linux/igmp.h>
#include <linux/idr.h>
#include <linux/inetdevice.h>
#include <linux/slab.h>
@@ -60,6 +61,8 @@
#include <rdma/ib_sa.h>
#include <rdma/iw_cm.h>
#include "core_priv.h"
MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("Generic RDMA CM Agent");
MODULE_LICENSE("Dual BSD/GPL");
@@ -150,6 +153,7 @@ struct cma_device {
struct completion comp;
atomic_t refcount;
struct list_head id_list;
enum ib_gid_type *default_gid_type;
};
struct rdma_bind_list {
@@ -185,6 +189,67 @@ enum {
CMA_OPTION_AFONLY,
};
void cma_ref_dev(struct cma_device *cma_dev)
{
atomic_inc(&cma_dev->refcount);
}
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
void *cookie)
{
struct cma_device *cma_dev;
struct cma_device *found_cma_dev = NULL;
mutex_lock(&lock);
list_for_each_entry(cma_dev, &dev_list, list)
if (filter(cma_dev->device, cookie)) {
found_cma_dev = cma_dev;
break;
}
if (found_cma_dev)
cma_ref_dev(found_cma_dev);
mutex_unlock(&lock);
return found_cma_dev;
}
int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port)
{
if (port < rdma_start_port(cma_dev->device) ||
port > rdma_end_port(cma_dev->device))
return -EINVAL;
return cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)];
}
int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type)
{
unsigned long supported_gids;
if (port < rdma_start_port(cma_dev->device) ||
port > rdma_end_port(cma_dev->device))
return -EINVAL;
supported_gids = roce_gid_type_mask_support(cma_dev->device, port);
if (!(supported_gids & 1 << default_gid_type))
return -EINVAL;
cma_dev->default_gid_type[port - rdma_start_port(cma_dev->device)] =
default_gid_type;
return 0;
}
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev)
{
return cma_dev->device;
}
/*
* Device removal can occur at anytime, so we need extra handling to
* serialize notifying the user of device removal with other callbacks.
@@ -228,6 +293,7 @@ struct rdma_id_private {
u8 tos;
u8 reuseaddr;
u8 afonly;
enum ib_gid_type gid_type;
};
struct cma_multicast {
@@ -239,6 +305,7 @@ struct cma_multicast {
void *context;
struct sockaddr_storage addr;
struct kref mcref;
bool igmp_joined;
};
struct cma_work {
@@ -335,18 +402,48 @@ static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
}
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
static int cma_igmp_send(struct net_device *ndev, union ib_gid *mgid, bool join)
{
atomic_inc(&cma_dev->refcount);
struct in_device *in_dev = NULL;
if (ndev) {
rtnl_lock();
in_dev = __in_dev_get_rtnl(ndev);
if (in_dev) {
if (join)
ip_mc_inc_group(in_dev,
*(__be32 *)(mgid->raw + 12));
else
ip_mc_dec_group(in_dev,
*(__be32 *)(mgid->raw + 12));
}
rtnl_unlock();
}
return (in_dev) ? 0 : -ENODEV;
}
static void _cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
cma_ref_dev(cma_dev);
id_priv->cma_dev = cma_dev;
id_priv->gid_type = 0;
id_priv->id.device = cma_dev->device;
id_priv->id.route.addr.dev_addr.transport =
rdma_node_get_transport(cma_dev->device->node_type);
list_add_tail(&id_priv->list, &cma_dev->id_list);
}
static inline void cma_deref_dev(struct cma_device *cma_dev)
static void cma_attach_to_dev(struct rdma_id_private *id_priv,
struct cma_device *cma_dev)
{
_cma_attach_to_dev(id_priv, cma_dev);
id_priv->gid_type =
cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(cma_dev->device)];
}
void cma_deref_dev(struct cma_device *cma_dev)
{
if (atomic_dec_and_test(&cma_dev->refcount))
complete(&cma_dev->comp);
@@ -441,6 +538,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
}
static inline int cma_validate_port(struct ib_device *device, u8 port,
enum ib_gid_type gid_type,
union ib_gid *gid, int dev_type,
int bound_if_index)
{
@@ -453,10 +551,25 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
return ret;
if (dev_type == ARPHRD_ETHER)
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
ndev = dev_get_by_index(&init_net, bound_if_index);
if (ndev && ndev->flags & IFF_LOOPBACK) {
pr_info("detected loopback device\n");
dev_put(ndev);
ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
if (!device->get_netdev)
return -EOPNOTSUPP;
ndev = device->get_netdev(device, port);
if (!ndev)
return -ENODEV;
}
} else {
gid_type = IB_GID_TYPE_IB;
}
ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, NULL);
if (ndev)
dev_put(ndev);
@@ -490,7 +603,10 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
ret = cma_validate_port(cma_dev->device, port, gidp,
ret = cma_validate_port(cma_dev->device, port,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
listen_id_priv->gid_type, gidp,
dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
@@ -509,8 +625,11 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
ret = cma_validate_port(cma_dev->device, port, gidp,
dev_addr->dev_type,
ret = cma_validate_port(cma_dev->device, port,
rdma_protocol_ib(cma_dev->device, port) ?
IB_GID_TYPE_IB :
cma_dev->default_gid_type[port - 1],
gidp, dev_addr->dev_type,
dev_addr->bound_dev_if);
if (!ret) {
id_priv->id.port_num = port;
@@ -1437,8 +1556,24 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
id_priv->id.port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
} else
} else {
if (mc->igmp_joined) {
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev = NULL;
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(&init_net,
dev_addr->bound_dev_if);
if (ndev) {
cma_igmp_send(ndev,
&mc->multicast.ib->rec.mgid,
false);
dev_put(ndev);
}
}
kref_put(&mc->mcref, release_mc);
}
}
}
@@ -1896,7 +2031,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
struct rdma_id_private *listen_id, *conn_id;
struct rdma_cm_event event;
int ret;
struct ib_device_attr attr;
struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
@@ -1938,13 +2072,6 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
memcpy(cma_src_addr(conn_id), laddr, rdma_addr_size(laddr));
memcpy(cma_dst_addr(conn_id), raddr, rdma_addr_size(raddr));
ret = ib_query_device(conn_id->id.device, &attr);
if (ret) {
mutex_unlock(&conn_id->handler_mutex);
rdma_destroy_id(new_cm_id);
goto out;
}
memset(&event, 0, sizeof event);
event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
event.param.conn.private_data = iw_event->private_data;
@@ -2051,7 +2178,7 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
memcpy(cma_src_addr(dev_id_priv), cma_src_addr(id_priv),
rdma_addr_size(cma_src_addr(id_priv)));
cma_attach_to_dev(dev_id_priv, cma_dev);
_cma_attach_to_dev(dev_id_priv, cma_dev);
list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
atomic_inc(&id_priv->refcount);
dev_id_priv->internal_id = 1;
@@ -2321,8 +2448,23 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
if (addr->dev_addr.bound_dev_if) {
ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
if (!ndev)
return -ENODEV;
if (ndev->flags & IFF_LOOPBACK) {
dev_put(ndev);
if (!id_priv->id.device->get_netdev)
return -EOPNOTSUPP;
ndev = id_priv->id.device->get_netdev(id_priv->id.device,
id_priv->id.port_num);
if (!ndev)
return -ENODEV;
}
route->path_rec->net = &init_net;
route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
route->path_rec->ifindex = ndev->ifindex;
route->path_rec->gid_type = id_priv->gid_type;
}
if (!ndev) {
ret = -ENODEV;
@@ -2336,7 +2478,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
&route->path_rec->dgid);
route->path_rec->hop_limit = 1;
/* Use the hint from IP Stack to select GID Type */
if (route->path_rec->gid_type < ib_network_to_gid_type(addr->dev_addr.network))
route->path_rec->gid_type = ib_network_to_gid_type(addr->dev_addr.network);
if (((struct sockaddr *)&id_priv->id.route.addr.dst_addr)->sa_family != AF_IB)
/* TODO: get the hoplimit from the inet/inet6 device */
route->path_rec->hop_limit = addr->dev_addr.hoplimit;
else
route->path_rec->hop_limit = 1;
route->path_rec->reversible = 1;
route->path_rec->pkey = cpu_to_be16(0xffff);
route->path_rec->mtu_selector = IB_SA_EQ;
@@ -3534,12 +3683,23 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
event.status = status;
event.param.ud.private_data = mc->context;
if (!status) {
struct rdma_dev_addr *dev_addr =
&id_priv->id.route.addr.dev_addr;
struct net_device *ndev =
dev_get_by_index(&init_net, dev_addr->bound_dev_if);
enum ib_gid_type gid_type =
id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
ib_init_ah_from_mcmember(id_priv->id.device,
id_priv->id.port_num, &multicast->rec,
ndev, gid_type,
&event.param.ud.ah_attr);
event.param.ud.qp_num = 0xFFFFFF;
event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
if (ndev)
dev_put(ndev);
} else
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
@@ -3672,9 +3832,10 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
{
struct iboe_mcast_work *work;
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int err;
int err = 0;
struct sockaddr *addr = (struct sockaddr *)&mc->addr;
struct net_device *ndev = NULL;
enum ib_gid_type gid_type;
if (cma_zero_addr((struct sockaddr *)&mc->addr))
return -EINVAL;
@@ -3704,9 +3865,25 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
mc->multicast.ib->rec.hop_limit = 1;
mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
rdma_start_port(id_priv->cma_dev->device)];
if (addr->sa_family == AF_INET) {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
true);
if (!err) {
mc->igmp_joined = true;
mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
}
} else {
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
err = -ENOTSUPP;
}
dev_put(ndev);
if (!mc->multicast.ib->rec.mtu) {
err = -EINVAL;
if (err || !mc->multicast.ib->rec.mtu) {
if (!err)
err = -EINVAL;
goto out2;
}
rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
@@ -3745,7 +3922,7 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
mc->igmp_joined = false;
spin_lock(&id_priv->lock);
list_add(&mc->list, &id_priv->mc_list);
spin_unlock(&id_priv->lock);
@@ -3790,9 +3967,25 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
} else if (rdma_protocol_roce(id->device, id->port_num))
kref_put(&mc->mcref, release_mc);
} else if (rdma_protocol_roce(id->device, id->port_num)) {
if (mc->igmp_joined) {
struct rdma_dev_addr *dev_addr =
&id->route.addr.dev_addr;
struct net_device *ndev = NULL;
if (dev_addr->bound_dev_if)
ndev = dev_get_by_index(&init_net,
dev_addr->bound_dev_if);
if (ndev) {
cma_igmp_send(ndev,
&mc->multicast.ib->rec.mgid,
false);
dev_put(ndev);
}
mc->igmp_joined = false;
}
kref_put(&mc->mcref, release_mc);
}
return;
}
}
@@ -3861,12 +4054,27 @@ static void cma_add_one(struct ib_device *device)
{
struct cma_device *cma_dev;
struct rdma_id_private *id_priv;
unsigned int i;
unsigned long supported_gids = 0;
cma_dev = kmalloc(sizeof *cma_dev, GFP_KERNEL);
if (!cma_dev)
return;
cma_dev->device = device;
cma_dev->default_gid_type = kcalloc(device->phys_port_cnt,
sizeof(*cma_dev->default_gid_type),
GFP_KERNEL);
if (!cma_dev->default_gid_type) {
kfree(cma_dev);
return;
}
for (i = rdma_start_port(device); i <= rdma_end_port(device); i++) {
supported_gids = roce_gid_type_mask_support(device, i);
WARN_ON(!supported_gids);
cma_dev->default_gid_type[i - rdma_start_port(device)] =
find_first_bit(&supported_gids, BITS_PER_LONG);
}
init_completion(&cma_dev->comp);
atomic_set(&cma_dev->refcount, 1);
@@ -3946,6 +4154,7 @@ static void cma_remove_one(struct ib_device *device, void *client_data)
mutex_unlock(&lock);
cma_process_remove(cma_dev);
kfree(cma_dev->default_gid_type);
kfree(cma_dev);
}
@@ -4079,6 +4288,7 @@ static int __init cma_init(void)
if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
cma_configfs_init();
return 0;
@@ -4093,6 +4303,7 @@ err_wq:
static void __exit cma_cleanup(void)
{
cma_configfs_exit();
ibnl_remove_client(RDMA_NL_RDMA_CM);
ib_unregister_client(&cma_client);
unregister_netdevice_notifier(&cma_nb);
+321
View File
@@ -0,0 +1,321 @@
/*
* Copyright (c) 2015, Mellanox Technologies inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/module.h>
#include <linux/configfs.h>
#include <rdma/ib_verbs.h>
#include "core_priv.h"
struct cma_device;
struct cma_dev_group;
struct cma_dev_port_group {
unsigned int port_num;
struct cma_dev_group *cma_dev_group;
struct config_group group;
};
struct cma_dev_group {
char name[IB_DEVICE_NAME_MAX];
struct config_group device_group;
struct config_group ports_group;
struct config_group *default_dev_group[2];
struct config_group **default_ports_group;
struct cma_dev_port_group *ports;
};
static struct cma_dev_port_group *to_dev_port_group(struct config_item *item)
{
struct config_group *group;
if (!item)
return NULL;
group = container_of(item, struct config_group, cg_item);
return container_of(group, struct cma_dev_port_group, group);
}
static bool filter_by_name(struct ib_device *ib_dev, void *cookie)
{
return !strcmp(ib_dev->name, cookie);
}
static int cma_configfs_params_get(struct config_item *item,
struct cma_device **pcma_dev,
struct cma_dev_port_group **pgroup)
{
struct cma_dev_port_group *group = to_dev_port_group(item);
struct cma_device *cma_dev;
if (!group)
return -ENODEV;
cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
group->cma_dev_group->name);
if (!cma_dev)
return -ENODEV;
*pcma_dev = cma_dev;
*pgroup = group;
return 0;
}
static void cma_configfs_params_put(struct cma_device *cma_dev)
{
cma_deref_dev(cma_dev);
}
static ssize_t default_roce_mode_show(struct config_item *item,
char *buf)
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
int gid_type;
ssize_t ret;
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
gid_type = cma_get_default_gid_type(cma_dev, group->port_num);
cma_configfs_params_put(cma_dev);
if (gid_type < 0)
return gid_type;
return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_type));
}
static ssize_t default_roce_mode_store(struct config_item *item,
const char *buf, size_t count)
{
struct cma_device *cma_dev;
struct cma_dev_port_group *group;
int gid_type = ib_cache_gid_parse_type_str(buf);
ssize_t ret;
if (gid_type < 0)
return -EINVAL;
ret = cma_configfs_params_get(item, &cma_dev, &group);
if (ret)
return ret;
ret = cma_set_default_gid_type(cma_dev, group->port_num, gid_type);
cma_configfs_params_put(cma_dev);
return !ret ? strnlen(buf, count) : ret;
}
CONFIGFS_ATTR(, default_roce_mode);
static struct configfs_attribute *cma_configfs_attributes[] = {
&attr_default_roce_mode,
NULL,
};
static struct config_item_type cma_port_group_type = {
.ct_attrs = cma_configfs_attributes,
.ct_owner = THIS_MODULE
};
static int make_cma_ports(struct cma_dev_group *cma_dev_group,
struct cma_device *cma_dev)
{
struct ib_device *ibdev;
unsigned int i;
unsigned int ports_num;
struct cma_dev_port_group *ports;
struct config_group **ports_group;
int err;
ibdev = cma_get_ib_dev(cma_dev);
if (!ibdev)
return -ENODEV;
ports_num = ibdev->phys_port_cnt;
ports = kcalloc(ports_num, sizeof(*cma_dev_group->ports),
GFP_KERNEL);
ports_group = kcalloc(ports_num + 1, sizeof(*ports_group), GFP_KERNEL);
if (!ports || !ports_group) {
err = -ENOMEM;
goto free;
}
for (i = 0; i < ports_num; i++) {
char port_str[10];
ports[i].port_num = i + 1;
snprintf(port_str, sizeof(port_str), "%u", i + 1);
ports[i].cma_dev_group = cma_dev_group;
config_group_init_type_name(&ports[i].group,
port_str,
&cma_port_group_type);
ports_group[i] = &ports[i].group;
}
ports_group[i] = NULL;
cma_dev_group->default_ports_group = ports_group;
cma_dev_group->ports = ports;
return 0;
free:
kfree(ports);
kfree(ports_group);
cma_dev_group->ports = NULL;
cma_dev_group->default_ports_group = NULL;
return err;
}
static void release_cma_dev(struct config_item *item)
{
struct config_group *group = container_of(item, struct config_group,
cg_item);
struct cma_dev_group *cma_dev_group = container_of(group,
struct cma_dev_group,
device_group);
kfree(cma_dev_group);
};
static void release_cma_ports_group(struct config_item *item)
{
struct config_group *group = container_of(item, struct config_group,
cg_item);
struct cma_dev_group *cma_dev_group = container_of(group,
struct cma_dev_group,
ports_group);
kfree(cma_dev_group->ports);
kfree(cma_dev_group->default_ports_group);
cma_dev_group->ports = NULL;
cma_dev_group->default_ports_group = NULL;
};
static struct configfs_item_operations cma_ports_item_ops = {
.release = release_cma_ports_group
};
static struct config_item_type cma_ports_group_type = {
.ct_item_ops = &cma_ports_item_ops,
.ct_owner = THIS_MODULE
};
static struct configfs_item_operations cma_device_item_ops = {
.release = release_cma_dev
};
static struct config_item_type cma_device_group_type = {
.ct_item_ops = &cma_device_item_ops,
.ct_owner = THIS_MODULE
};
static struct config_group *make_cma_dev(struct config_group *group,
const char *name)
{
int err = -ENODEV;
struct cma_device *cma_dev = cma_enum_devices_by_ibdev(filter_by_name,
(void *)name);
struct cma_dev_group *cma_dev_group = NULL;
if (!cma_dev)
goto fail;
cma_dev_group = kzalloc(sizeof(*cma_dev_group), GFP_KERNEL);
if (!cma_dev_group) {
err = -ENOMEM;
goto fail;
}
strncpy(cma_dev_group->name, name, sizeof(cma_dev_group->name));
err = make_cma_ports(cma_dev_group, cma_dev);
if (err)
goto fail;
cma_dev_group->ports_group.default_groups =
cma_dev_group->default_ports_group;
config_group_init_type_name(&cma_dev_group->ports_group, "ports",
&cma_ports_group_type);
cma_dev_group->device_group.default_groups
= cma_dev_group->default_dev_group;
cma_dev_group->default_dev_group[0] = &cma_dev_group->ports_group;
cma_dev_group->default_dev_group[1] = NULL;
config_group_init_type_name(&cma_dev_group->device_group, name,
&cma_device_group_type);
cma_deref_dev(cma_dev);
return &cma_dev_group->device_group;
fail:
if (cma_dev)
cma_deref_dev(cma_dev);
kfree(cma_dev_group);
return ERR_PTR(err);
}
static struct configfs_group_operations cma_subsys_group_ops = {
.make_group = make_cma_dev,
};
static struct config_item_type cma_subsys_type = {
.ct_group_ops = &cma_subsys_group_ops,
.ct_owner = THIS_MODULE,
};
static struct configfs_subsystem cma_subsys = {
.su_group = {
.cg_item = {
.ci_namebuf = "rdma_cm",
.ci_type = &cma_subsys_type,
},
},
};
int __init cma_configfs_init(void)
{
config_group_init(&cma_subsys.su_group);
mutex_init(&cma_subsys.su_mutex);
return configfs_register_subsystem(&cma_subsys);
}
void __exit cma_configfs_exit(void)
{
configfs_unregister_subsystem(&cma_subsys);
}
+45
View File
@@ -38,6 +38,32 @@
#include <rdma/ib_verbs.h>
#if IS_ENABLED(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS)
int cma_configfs_init(void);
void cma_configfs_exit(void);
#else
static inline int cma_configfs_init(void)
{
return 0;
}
static inline void cma_configfs_exit(void)
{
}
#endif
struct cma_device;
void cma_ref_dev(struct cma_device *cma_dev);
void cma_deref_dev(struct cma_device *cma_dev);
typedef bool (*cma_device_filter)(struct ib_device *, void *);
struct cma_device *cma_enum_devices_by_ibdev(cma_device_filter filter,
void *cookie);
int cma_get_default_gid_type(struct cma_device *cma_dev,
unsigned int port);
int cma_set_default_gid_type(struct cma_device *cma_dev,
unsigned int port,
enum ib_gid_type default_gid_type);
struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev);
int ib_device_register_sysfs(struct ib_device *device,
int (*port_callback)(struct ib_device *,
u8, struct kobject *));
@@ -70,8 +96,13 @@ enum ib_cache_gid_default_mode {
IB_CACHE_GID_DEFAULT_MODE_DELETE
};
int ib_cache_gid_parse_type_str(const char *buf);
const char *ib_cache_gid_type_str(enum ib_gid_type gid_type);
void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
struct net_device *ndev,
unsigned long gid_type_mask,
enum ib_cache_gid_default_mode mode);
int ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
@@ -87,9 +118,23 @@ int roce_gid_mgmt_init(void);
void roce_gid_mgmt_cleanup(void);
int roce_rescan_device(struct ib_device *ib_dev);
unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port);
int ib_cache_setup_one(struct ib_device *device);
void ib_cache_cleanup_one(struct ib_device *device);
void ib_cache_release_one(struct ib_device *device);
static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
struct net_device *upper)
{
struct net_device *_upper = NULL;
struct list_head *iter;
netdev_for_each_all_upper_dev_rcu(dev, _upper, iter)
if (_upper == upper)
break;
return _upper == upper;
}
#endif /* _CORE_PRIV_H */
+209
View File
@@ -0,0 +1,209 @@
/*
* Copyright (c) 2015 HGST, a Western Digital Company.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <linux/module.h>
#include <linux/err.h>
#include <linux/slab.h>
#include <rdma/ib_verbs.h>
/* # of WCs to poll for with a single call to ib_poll_cq */
#define IB_POLL_BATCH 16
/* # of WCs to iterate over before yielding */
#define IB_POLL_BUDGET_IRQ 256
#define IB_POLL_BUDGET_WORKQUEUE 65536
#define IB_POLL_FLAGS \
(IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS)
static int __ib_process_cq(struct ib_cq *cq, int budget)
{
int i, n, completed = 0;
while ((n = ib_poll_cq(cq, IB_POLL_BATCH, cq->wc)) > 0) {
for (i = 0; i < n; i++) {
struct ib_wc *wc = &cq->wc[i];
if (wc->wr_cqe)
wc->wr_cqe->done(cq, wc);
else
WARN_ON_ONCE(wc->status == IB_WC_SUCCESS);
}
completed += n;
if (n != IB_POLL_BATCH ||
(budget != -1 && completed >= budget))
break;
}
return completed;
}
/**
* ib_process_direct_cq - process a CQ in caller context
* @cq: CQ to process
* @budget: number of CQEs to poll for
*
* This function is used to process all outstanding CQ entries on a
* %IB_POLL_DIRECT CQ. It does not offload CQ processing to a different
* context and does not ask for completion interrupts from the HCA.
*
* Note: for compatibility reasons -1 can be passed in %budget for unlimited
* polling. Do not use this feature in new code, it will be removed soon.
*/
int ib_process_cq_direct(struct ib_cq *cq, int budget)
{
WARN_ON_ONCE(cq->poll_ctx != IB_POLL_DIRECT);
return __ib_process_cq(cq, budget);
}
EXPORT_SYMBOL(ib_process_cq_direct);
static void ib_cq_completion_direct(struct ib_cq *cq, void *private)
{
WARN_ONCE(1, "got unsolicited completion for CQ 0x%p\n", cq);
}
static int ib_poll_handler(struct irq_poll *iop, int budget)
{
struct ib_cq *cq = container_of(iop, struct ib_cq, iop);
int completed;
completed = __ib_process_cq(cq, budget);
if (completed < budget) {
irq_poll_complete(&cq->iop);
if (ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
irq_poll_sched(&cq->iop);
}
return completed;
}
static void ib_cq_completion_softirq(struct ib_cq *cq, void *private)
{
irq_poll_sched(&cq->iop);
}
static void ib_cq_poll_work(struct work_struct *work)
{
struct ib_cq *cq = container_of(work, struct ib_cq, work);
int completed;
completed = __ib_process_cq(cq, IB_POLL_BUDGET_WORKQUEUE);
if (completed >= IB_POLL_BUDGET_WORKQUEUE ||
ib_req_notify_cq(cq, IB_POLL_FLAGS) > 0)
queue_work(ib_comp_wq, &cq->work);
}
static void ib_cq_completion_workqueue(struct ib_cq *cq, void *private)
{
queue_work(ib_comp_wq, &cq->work);
}
/**
* ib_alloc_cq - allocate a completion queue
* @dev: device to allocate the CQ for
* @private: driver private data, accessible from cq->cq_context
* @nr_cqe: number of CQEs to allocate
* @comp_vector: HCA completion vectors for this CQ
* @poll_ctx: context to poll the CQ from.
*
* This is the proper interface to allocate a CQ for in-kernel users. A
* CQ allocated with this interface will automatically be polled from the
* specified context. The ULP needs must use wr->wr_cqe instead of wr->wr_id
* to use this CQ abstraction.
*/
struct ib_cq *ib_alloc_cq(struct ib_device *dev, void *private,
int nr_cqe, int comp_vector, enum ib_poll_context poll_ctx)
{
struct ib_cq_init_attr cq_attr = {
.cqe = nr_cqe,
.comp_vector = comp_vector,
};
struct ib_cq *cq;
int ret = -ENOMEM;
cq = dev->create_cq(dev, &cq_attr, NULL, NULL);
if (IS_ERR(cq))
return cq;
cq->device = dev;
cq->uobject = NULL;
cq->event_handler = NULL;
cq->cq_context = private;
cq->poll_ctx = poll_ctx;
atomic_set(&cq->usecnt, 0);
cq->wc = kmalloc_array(IB_POLL_BATCH, sizeof(*cq->wc), GFP_KERNEL);
if (!cq->wc)
goto out_destroy_cq;
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
cq->comp_handler = ib_cq_completion_direct;
break;
case IB_POLL_SOFTIRQ:
cq->comp_handler = ib_cq_completion_softirq;
irq_poll_init(&cq->iop, IB_POLL_BUDGET_IRQ, ib_poll_handler);
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break;
case IB_POLL_WORKQUEUE:
cq->comp_handler = ib_cq_completion_workqueue;
INIT_WORK(&cq->work, ib_cq_poll_work);
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
break;
default:
ret = -EINVAL;
goto out_free_wc;
}
return cq;
out_free_wc:
kfree(cq->wc);
out_destroy_cq:
cq->device->destroy_cq(cq);
return ERR_PTR(ret);
}
EXPORT_SYMBOL(ib_alloc_cq);
/**
* ib_free_cq - free a completion queue
* @cq: completion queue to free.
*/
void ib_free_cq(struct ib_cq *cq)
{
int ret;
if (WARN_ON_ONCE(atomic_read(&cq->usecnt)))
return;
switch (cq->poll_ctx) {
case IB_POLL_DIRECT:
break;
case IB_POLL_SOFTIRQ:
irq_poll_disable(&cq->iop);
break;
case IB_POLL_WORKQUEUE:
flush_work(&cq->work);
break;
default:
WARN_ON_ONCE(1);
}
kfree(cq->wc);
ret = cq->device->destroy_cq(cq);
WARN_ON_ONCE(ret);
}
EXPORT_SYMBOL(ib_free_cq);
+28 -23
View File
@@ -58,6 +58,7 @@ struct ib_client_data {
bool going_down;
};
struct workqueue_struct *ib_comp_wq;
struct workqueue_struct *ib_wq;
EXPORT_SYMBOL_GPL(ib_wq);
@@ -325,6 +326,7 @@ int ib_register_device(struct ib_device *device,
{
int ret;
struct ib_client *client;
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
mutex_lock(&device_mutex);
@@ -352,6 +354,13 @@ int ib_register_device(struct ib_device *device,
goto out;
}
memset(&device->attrs, 0, sizeof(device->attrs));
ret = device->query_device(device, &device->attrs, &uhw);
if (ret) {
printk(KERN_WARNING "Couldn't query the device attributes\n");
goto out;
}
ret = ib_device_register_sysfs(device, port_callback);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
@@ -627,25 +636,6 @@ void ib_dispatch_event(struct ib_event *event)
}
EXPORT_SYMBOL(ib_dispatch_event);
/**
* ib_query_device - Query IB device attributes
* @device:Device to query
* @device_attr:Device attributes
*
* ib_query_device() returns the attributes of a device through the
* @device_attr pointer.
*/
int ib_query_device(struct ib_device *device,
struct ib_device_attr *device_attr)
{
struct ib_udata uhw = {.outlen = 0, .inlen = 0};
memset(device_attr, 0, sizeof(*device_attr));
return device->query_device(device, device_attr, &uhw);
}
EXPORT_SYMBOL(ib_query_device);
/**
* ib_query_port - Query IB port attributes
* @device:Device to query
@@ -825,26 +815,31 @@ EXPORT_SYMBOL(ib_modify_port);
* a specified GID value occurs.
* @device: The device to query.
* @gid: The GID value to search for.
* @gid_type: Type of GID.
* @ndev: The ndev related to the GID to search for.
* @port_num: The port number of the device where the GID value was found.
* @index: The index into the GID table where the GID was found. This
* parameter may be NULL.
*/
int ib_find_gid(struct ib_device *device, union ib_gid *gid,
struct net_device *ndev, u8 *port_num, u16 *index)
enum ib_gid_type gid_type, struct net_device *ndev,
u8 *port_num, u16 *index)
{
union ib_gid tmp_gid;
int ret, port, i;
for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
if (rdma_cap_roce_gid_table(device, port)) {
if (!ib_find_cached_gid_by_port(device, gid, port,
if (!ib_find_cached_gid_by_port(device, gid, gid_type, port,
ndev, index)) {
*port_num = port;
return 0;
}
}
if (gid_type != IB_GID_TYPE_IB)
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
if (ret)
@@ -954,10 +949,18 @@ static int __init ib_core_init(void)
if (!ib_wq)
return -ENOMEM;
ib_comp_wq = alloc_workqueue("ib-comp-wq",
WQ_UNBOUND | WQ_HIGHPRI | WQ_MEM_RECLAIM,
WQ_UNBOUND_MAX_ACTIVE);
if (!ib_comp_wq) {
ret = -ENOMEM;
goto err;
}
ret = class_register(&ib_class);
if (ret) {
printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
goto err;
goto err_comp;
}
ret = ibnl_init();
@@ -972,7 +975,8 @@ static int __init ib_core_init(void)
err_sysfs:
class_unregister(&ib_class);
err_comp:
destroy_workqueue(ib_comp_wq);
err:
destroy_workqueue(ib_wq);
return ret;
@@ -983,6 +987,7 @@ static void __exit ib_core_cleanup(void)
ib_cache_cleanup();
ibnl_cleanup();
class_unregister(&ib_class);
destroy_workqueue(ib_comp_wq);
/* Make sure that any pending umem accounting work is done. */
destroy_workqueue(ib_wq);
}
+2 -18
View File
@@ -212,7 +212,6 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
{
struct ib_device *device;
struct ib_fmr_pool *pool;
struct ib_device_attr *attr;
int i;
int ret;
int max_remaps;
@@ -228,25 +227,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd,
return ERR_PTR(-ENOSYS);
}
attr = kmalloc(sizeof *attr, GFP_KERNEL);
if (!attr) {
printk(KERN_WARNING PFX "couldn't allocate device attr struct\n");
return ERR_PTR(-ENOMEM);
}
ret = ib_query_device(device, attr);
if (ret) {
printk(KERN_WARNING PFX "couldn't query device: %d\n", ret);
kfree(attr);
return ERR_PTR(ret);
}
if (!attr->max_map_per_fmr)
if (!device->attrs.max_map_per_fmr)
max_remaps = IB_FMR_MAX_REMAPS;
else
max_remaps = attr->max_map_per_fmr;
kfree(attr);
max_remaps = device->attrs.max_map_per_fmr;
pool = kmalloc(sizeof *pool, GFP_KERNEL);
if (!pool) {
+68 -94
View File
@@ -84,6 +84,9 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
u8 mgmt_class);
static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
struct ib_mad_agent_private *agent_priv);
static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
struct ib_wc *wc);
static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
/*
* Returns a ib_mad_port_private structure or NULL for a device/port
@@ -681,7 +684,7 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
atomic_inc(&mad_snoop_priv->refcount);
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent, NULL,
mad_recv_wc);
deref_snoop_agent(mad_snoop_priv);
spin_lock_irqsave(&qp_info->snoop_lock, flags);
@@ -689,12 +692,11 @@ static void snoop_recv(struct ib_mad_qp_info *qp_info,
spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
}
static void build_smp_wc(struct ib_qp *qp,
u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
struct ib_wc *wc)
static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
u16 pkey_index, u8 port_num, struct ib_wc *wc)
{
memset(wc, 0, sizeof *wc);
wc->wr_id = wr_id;
wc->wr_cqe = cqe;
wc->status = IB_WC_SUCCESS;
wc->opcode = IB_WC_RECV;
wc->pkey_index = pkey_index;
@@ -832,7 +834,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
}
build_smp_wc(mad_agent_priv->agent.qp,
send_wr->wr.wr_id, drslid,
send_wr->wr.wr_cqe, drslid,
send_wr->pkey_index,
send_wr->port_num, &mad_wc);
@@ -1039,7 +1041,9 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
mad_send_wr->send_wr.wr.num_sge = 2;
mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
@@ -1151,8 +1155,9 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
/* Set WR ID to find mad_send_wr upon completion */
qp_info = mad_send_wr->mad_agent_priv->qp_info;
mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
mad_agent = mad_send_wr->send_buf.mad_agent;
sge = mad_send_wr->sg_list;
@@ -1982,9 +1987,9 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
/* user rmpp is in effect
* and this is an active RMPP MAD
*/
mad_recv_wc->wc->wr_id = 0;
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_recv_wc);
mad_agent_priv->agent.recv_handler(
&mad_agent_priv->agent, NULL,
mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
} else {
/* not user rmpp, revert to normal behavior and
@@ -1998,9 +2003,10 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
/* Defined behavior is to complete response before request */
mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_recv_wc);
mad_agent_priv->agent.recv_handler(
&mad_agent_priv->agent,
&mad_send_wr->send_buf,
mad_recv_wc);
atomic_dec(&mad_agent_priv->refcount);
mad_send_wc.status = IB_WC_SUCCESS;
@@ -2009,7 +2015,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
}
} else {
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
mad_recv_wc);
deref_mad_agent(mad_agent_priv);
}
@@ -2172,13 +2178,14 @@ handle_smi(struct ib_mad_port_private *port_priv,
return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
}
static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
struct ib_wc *wc)
static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_mad_port_private *port_priv = cq->cq_context;
struct ib_mad_list_head *mad_list =
container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_qp_info *qp_info;
struct ib_mad_private_header *mad_priv_hdr;
struct ib_mad_private *recv, *response = NULL;
struct ib_mad_list_head *mad_list;
struct ib_mad_agent_private *mad_agent;
int port_num;
int ret = IB_MAD_RESULT_SUCCESS;
@@ -2186,7 +2193,17 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
u16 resp_mad_pkey_index = 0;
bool opa;
mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
if (list_empty_careful(&port_priv->port_list))
return;
if (wc->status != IB_WC_SUCCESS) {
/*
* Receive errors indicate that the QP has entered the error
* state - error handling/shutdown code will cleanup
*/
return;
}
qp_info = mad_list->mad_queue->qp_info;
dequeue_mad(mad_list);
@@ -2227,7 +2244,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
response = alloc_mad_private(mad_size, GFP_KERNEL);
if (!response) {
dev_err(&port_priv->device->dev,
"ib_mad_recv_done_handler no memory for response buffer\n");
"%s: no memory for response buffer\n", __func__);
goto out;
}
@@ -2413,11 +2430,12 @@ done:
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
struct ib_wc *wc)
static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_mad_port_private *port_priv = cq->cq_context;
struct ib_mad_list_head *mad_list =
container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_send_wr_private *mad_send_wr, *queued_send_wr;
struct ib_mad_list_head *mad_list;
struct ib_mad_qp_info *qp_info;
struct ib_mad_queue *send_queue;
struct ib_send_wr *bad_send_wr;
@@ -2425,7 +2443,14 @@ static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
unsigned long flags;
int ret;
mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
if (list_empty_careful(&port_priv->port_list))
return;
if (wc->status != IB_WC_SUCCESS) {
if (!ib_mad_send_error(port_priv, wc))
return;
}
mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
mad_list);
send_queue = mad_list->mad_queue;
@@ -2490,24 +2515,15 @@ static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
}
static void mad_error_handler(struct ib_mad_port_private *port_priv,
struct ib_wc *wc)
static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
struct ib_wc *wc)
{
struct ib_mad_list_head *mad_list;
struct ib_mad_qp_info *qp_info;
struct ib_mad_list_head *mad_list =
container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
struct ib_mad_send_wr_private *mad_send_wr;
int ret;
/* Determine if failure was a send or receive */
mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
qp_info = mad_list->mad_queue->qp_info;
if (mad_list->mad_queue == &qp_info->recv_queue)
/*
* Receive errors indicate that the QP has entered the error
* state - error handling/shutdown code will cleanup
*/
return;
/*
* Send errors will transition the QP to SQE - move
* QP to RTS and repost flushed work requests
@@ -2522,10 +2538,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
mad_send_wr->retry = 0;
ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
&bad_send_wr);
if (ret)
ib_mad_send_done_handler(port_priv, wc);
} else
ib_mad_send_done_handler(port_priv, wc);
if (!ret)
return false;
}
} else {
struct ib_qp_attr *attr;
@@ -2539,42 +2554,14 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
kfree(attr);
if (ret)
dev_err(&port_priv->device->dev,
"mad_error_handler - ib_modify_qp to RTS : %d\n",
ret);
"%s - ib_modify_qp to RTS: %d\n",
__func__, ret);
else
mark_sends_for_retry(qp_info);
}
ib_mad_send_done_handler(port_priv, wc);
}
}
/*
* IB MAD completion callback
*/
static void ib_mad_completion_handler(struct work_struct *work)
{
struct ib_mad_port_private *port_priv;
struct ib_wc wc;
port_priv = container_of(work, struct ib_mad_port_private, work);
ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
if (wc.status == IB_WC_SUCCESS) {
switch (wc.opcode) {
case IB_WC_SEND:
ib_mad_send_done_handler(port_priv, &wc);
break;
case IB_WC_RECV:
ib_mad_recv_done_handler(port_priv, &wc);
break;
default:
BUG_ON(1);
break;
}
} else
mad_error_handler(port_priv, &wc);
}
return true;
}
static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
@@ -2716,7 +2703,7 @@ static void local_completions(struct work_struct *work)
* before request
*/
build_smp_wc(recv_mad_agent->agent.qp,
(unsigned long) local->mad_send_wr,
local->mad_send_wr->send_wr.wr.wr_cqe,
be16_to_cpu(IB_LID_PERMISSIVE),
local->mad_send_wr->send_wr.pkey_index,
recv_mad_agent->agent.port_num, &wc);
@@ -2744,6 +2731,7 @@ static void local_completions(struct work_struct *work)
IB_MAD_SNOOP_RECVS);
recv_mad_agent->agent.recv_handler(
&recv_mad_agent->agent,
&local->mad_send_wr->send_buf,
&local->mad_priv->header.recv_wc);
spin_lock_irqsave(&recv_mad_agent->lock, flags);
atomic_dec(&recv_mad_agent->refcount);
@@ -2855,17 +2843,6 @@ static void timeout_sends(struct work_struct *work)
spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
}
static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
{
struct ib_mad_port_private *port_priv = cq->cq_context;
unsigned long flags;
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
if (!list_empty(&port_priv->port_list))
queue_work(port_priv->wq, &port_priv->work);
spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
}
/*
* Allocate receive MADs and post receive WRs for them
*/
@@ -2913,8 +2890,9 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
break;
}
mad_priv->header.mapping = sg_list.addr;
recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
mad_priv->header.mad_list.mad_queue = recv_queue;
mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
/* Post receive WR */
spin_lock_irqsave(&recv_queue->lock, flags);
@@ -3151,7 +3129,6 @@ static int ib_mad_port_open(struct ib_device *device,
unsigned long flags;
char name[sizeof "ib_mad123"];
int has_smi;
struct ib_cq_init_attr cq_attr = {};
if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
return -EFAULT;
@@ -3179,10 +3156,8 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
cq_attr.cqe = cq_size;
port_priv->cq = ib_create_cq(port_priv->device,
ib_mad_thread_completion_handler,
NULL, port_priv, &cq_attr);
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
@@ -3211,7 +3186,6 @@ static int ib_mad_port_open(struct ib_device *device,
ret = -ENOMEM;
goto error8;
}
INIT_WORK(&port_priv->work, ib_mad_completion_handler);
spin_lock_irqsave(&ib_mad_port_list_lock, flags);
list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -3238,7 +3212,7 @@ error7:
error6:
ib_dealloc_pd(port_priv->pd);
error4:
ib_destroy_cq(port_priv->cq);
ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
error3:
@@ -3271,7 +3245,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dealloc_pd(port_priv->pd);
ib_destroy_cq(port_priv->cq);
ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
+1 -1
View File
@@ -64,6 +64,7 @@
struct ib_mad_list_head {
struct list_head list;
struct ib_cqe cqe;
struct ib_mad_queue *mad_queue;
};
@@ -204,7 +205,6 @@ struct ib_mad_port_private {
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
struct list_head agent_list;
struct workqueue_struct *wq;
struct work_struct work;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
+15 -2
View File
@@ -723,14 +723,27 @@ EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
struct ib_ah_attr *ah_attr)
{
int ret;
u16 gid_index;
u8 p;
ret = ib_find_cached_gid(device, &rec->port_gid,
NULL, &p, &gid_index);
if (rdma_protocol_roce(device, port_num)) {
ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
gid_type, port_num,
ndev,
&gid_index);
} else if (rdma_protocol_ib(device, port_num)) {
ret = ib_find_cached_gid(device, &rec->port_gid,
IB_GID_TYPE_IB, NULL, &p,
&gid_index);
} else {
ret = -EINVAL;
}
if (ret)
return ret;

Some files were not shown because too many files have changed in this diff Show More