Merge tag 'iommu-updates-v3.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu

Pull IOMMU updates from Joerg Roedel:
 "The most important part of these updates is the IOMMU groups code
  enhancement written by Alex Williamson.  It abstracts the problem that
  a given hardware IOMMU can't isolate any given device from any other
  device (e.g.  32 bit PCI devices can't usually be isolated).  Devices
  that can't be isolated are grouped together.  This code is required
  for the upcoming VFIO framework.

  Another IOMMU-API change written by me is the introduction of domain
  attributes.  This makes it easier to handle GART-like IOMMUs with the
  IOMMU-API because now the start-address and the size of the domain
  address space can be queried.

  Besides that there are a few cleanups and fixes for the NVidia Tegra
  IOMMU drivers and the reworked init-code for the AMD IOMMU.  The
  latter is from my patch-set to support interrupt remapping.  The rest
  of this patch-set requires x86 changes which are not mergabe yet.  So
  full support for interrupt remapping with AMD IOMMUs will come in a
  future merge window."

* tag 'iommu-updates-v3.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (33 commits)
  iommu/amd: Fix hotplug with iommu=pt
  iommu/amd: Add missing spin_lock initialization
  iommu/amd: Convert iommu initialization to state machine
  iommu/amd: Introduce amd_iommu_init_dma routine
  iommu/amd: Move unmap_flush message to amd_iommu_init_dma_ops()
  iommu/amd: Split enable_iommus() routine
  iommu/amd: Introduce early_amd_iommu_init routine
  iommu/amd: Move informational prinks out of iommu_enable
  iommu/amd: Split out PCI related parts of IOMMU initialization
  iommu/amd: Use acpi_get_table instead of acpi_table_parse
  iommu/amd: Fix sparse warnings
  iommu/tegra: Don't call alloc_pdir with as->lock
  iommu/tegra: smmu: Fix unsleepable memory allocation at alloc_pdir()
  iommu/tegra: smmu: Remove unnecessary sanity check at alloc_pdir()
  iommu/exynos: Implement DOMAIN_ATTR_GEOMETRY attribute
  iommu/tegra: Implement DOMAIN_ATTR_GEOMETRY attribute
  iommu/msm: Implement DOMAIN_ATTR_GEOMETRY attribute
  iommu/omap: Implement DOMAIN_ATTR_GEOMETRY attribute
  iommu/vt-d: Implement DOMAIN_ATTR_GEOMETRY attribute
  iommu/amd: Implement DOMAIN_ATTR_GEOMETRY attribute
  ...
This commit is contained in:
Linus Torvalds
2012-07-24 16:24:11 -07:00
25 changed files with 1535 additions and 489 deletions
@@ -0,0 +1,14 @@
What: /sys/kernel/iommu_groups/
Date: May 2012
KernelVersion: v3.5
Contact: Alex Williamson <alex.williamson@redhat.com>
Description: /sys/kernel/iommu_groups/ contains a number of sub-
directories, each representing an IOMMU group. The
name of the sub-directory matches the iommu_group_id()
for the group, which is an integer value. Within each
subdirectory is another directory named "devices" with
links to the sysfs devices contained in this group.
The group directory also optionally contains a "name"
file if the IOMMU driver has chosen to register a more
common name for the group.
Users:
@@ -0,0 +1,21 @@
NVIDIA Tegra 30 IOMMU H/W, SMMU (System Memory Management Unit)
Required properties:
- compatible : "nvidia,tegra30-smmu"
- reg : Should contain 3 register banks(address and length) for each
of the SMMU register blocks.
- interrupts : Should contain MC General interrupt.
- nvidia,#asids : # of ASIDs
- dma-window : IOVA start address and length.
- nvidia,ahb : phandle to the ahb bus connected to SMMU.
Example:
smmu {
compatible = "nvidia,tegra30-smmu";
reg = <0x7000f010 0x02c
0x7000f1f0 0x010
0x7000f228 0x05c>;
nvidia,#asids = <4>; /* # of ASIDs */
dma-window = <0 0x40000000>; /* IOVA start & length */
nvidia,ahb = <&ahb>;
};
-1
View File
@@ -1134,7 +1134,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
forcesac
soft
pt [x86, IA-64]
group_mf [x86, IA-64]
io7= [HW] IO7 for Marvel based alpha systems
-2
View File
@@ -11,12 +11,10 @@ extern void no_iommu_init(void);
extern int force_iommu, no_iommu;
extern int iommu_pass_through;
extern int iommu_detected;
extern int iommu_group_mf;
#else
#define iommu_pass_through (0)
#define no_iommu (1)
#define iommu_detected (0)
#define iommu_group_mf (0)
#endif
extern void iommu_dma_init(void);
extern void machvec_init(const char *name);
-1
View File
@@ -32,7 +32,6 @@ int force_iommu __read_mostly;
#endif
int iommu_pass_through;
int iommu_group_mf;
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
-1
View File
@@ -5,7 +5,6 @@ extern struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_pass_through;
extern int iommu_group_mf;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
-11
View File
@@ -45,15 +45,6 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
/*
* Group multi-function PCI devices into a single device-group for the
* iommu_device_group interface. This tells the iommu driver to pretend
* it cannot distinguish between functions of a device, exposing only one
* group for the device. Useful for disallowing use of individual PCI
* functions from userspace drivers.
*/
int iommu_group_mf __read_mostly;
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
/* Dummy device used for NULL arguments (normally ISA). */
@@ -194,8 +185,6 @@ static __init int iommu_setup(char *p)
#endif
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
if (!strncmp(p, "group_mf", 8))
iommu_group_mf = 1;
gart_parse_options(p);
+5 -1
View File
@@ -13,6 +13,10 @@ menuconfig IOMMU_SUPPORT
if IOMMU_SUPPORT
config OF_IOMMU
def_bool y
depends on OF
# MSM IOMMU support
config MSM_IOMMU
bool "MSM IOMMU Support"
@@ -154,7 +158,7 @@ config TEGRA_IOMMU_GART
config TEGRA_IOMMU_SMMU
bool "Tegra SMMU IOMMU Support"
depends on ARCH_TEGRA_3x_SOC
depends on ARCH_TEGRA_3x_SOC && TEGRA_AHB
select IOMMU_API
help
Enables support for remapping discontiguous physical memory
+1
View File
@@ -1,4 +1,5 @@
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_OF_IOMMU) += of_iommu.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
+71 -28
View File
@@ -256,11 +256,21 @@ static bool check_device(struct device *dev)
return true;
}
static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
{
pci_dev_put(*from);
*from = to;
}
#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
static int iommu_init_device(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev);
struct iommu_dev_data *dev_data;
struct iommu_group *group;
u16 alias;
int ret;
if (dev->archdata.iommu)
return 0;
@@ -281,8 +291,43 @@ static int iommu_init_device(struct device *dev)
return -ENOTSUPP;
}
dev_data->alias_data = alias_data;
dma_pdev = pci_get_bus_and_slot(alias >> 8, alias & 0xff);
} else
dma_pdev = pci_dev_get(pdev);
swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
if (dma_pdev->multifunction &&
!pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
swap_pci_ref(&dma_pdev,
pci_get_slot(dma_pdev->bus,
PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
0)));
while (!pci_is_root_bus(dma_pdev->bus)) {
if (pci_acs_path_enabled(dma_pdev->bus->self,
NULL, REQ_ACS_FLAGS))
break;
swap_pci_ref(&dma_pdev, pci_dev_get(dma_pdev->bus->self));
}
group = iommu_group_get(&dma_pdev->dev);
pci_dev_put(dma_pdev);
if (!group) {
group = iommu_group_alloc();
if (IS_ERR(group))
return PTR_ERR(group);
}
ret = iommu_group_add_device(group, dev);
iommu_group_put(group);
if (ret)
return ret;
if (pci_iommuv2_capable(pdev)) {
struct amd_iommu *iommu;
@@ -311,6 +356,8 @@ static void iommu_ignore_device(struct device *dev)
static void iommu_uninit_device(struct device *dev)
{
iommu_group_remove_device(dev);
/*
* Nothing to do here - we keep dev_data around for unplugged devices
* and reuse it when the device is re-plugged - not doing so would
@@ -384,7 +431,6 @@ DECLARE_STATS_COUNTER(invalidate_iotlb);
DECLARE_STATS_COUNTER(invalidate_iotlb_all);
DECLARE_STATS_COUNTER(pri_requests);
static struct dentry *stats_dir;
static struct dentry *de_fflush;
@@ -2073,7 +2119,7 @@ out_err:
/* FIXME: Move this to PCI code */
#define PCI_PRI_TLP_OFF (1 << 15)
bool pci_pri_tlp_required(struct pci_dev *pdev)
static bool pci_pri_tlp_required(struct pci_dev *pdev)
{
u16 status;
int pos;
@@ -2254,6 +2300,18 @@ static int device_change_notifier(struct notifier_block *nb,
iommu_init_device(dev);
/*
* dev_data is still NULL and
* got initialized in iommu_init_device
*/
dev_data = get_dev_data(dev);
if (iommu_pass_through || dev_data->iommu_v2) {
dev_data->passthrough = true;
attach_device(dev, pt_domain);
break;
}
domain = domain_for_device(dev);
/* allocate a protection domain if a device is added */
@@ -2271,10 +2329,7 @@ static int device_change_notifier(struct notifier_block *nb,
dev_data = get_dev_data(dev);
if (!dev_data->passthrough)
dev->archdata.dma_ops = &amd_iommu_dma_ops;
else
dev->archdata.dma_ops = &nommu_dma_ops;
dev->archdata.dma_ops = &amd_iommu_dma_ops;
break;
case BUS_NOTIFY_DEL_DEVICE:
@@ -2972,6 +3027,11 @@ int __init amd_iommu_init_dma_ops(void)
amd_iommu_stats_init();
if (amd_iommu_unmap_flush)
pr_info("AMD-Vi: IO/TLB flush on unmap enabled\n");
else
pr_info("AMD-Vi: Lazy IO/TLB flushing enabled\n");
return 0;
free_domains:
@@ -3078,6 +3138,10 @@ static int amd_iommu_domain_init(struct iommu_domain *dom)
dom->priv = domain;
dom->geometry.aperture_start = 0;
dom->geometry.aperture_end = ~0ULL;
dom->geometry.force_aperture = true;
return 0;
out_free:
@@ -3236,26 +3300,6 @@ static int amd_iommu_domain_has_cap(struct iommu_domain *domain,
return 0;
}
static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
{
struct iommu_dev_data *dev_data = dev->archdata.iommu;
struct pci_dev *pdev = to_pci_dev(dev);
u16 devid;
if (!dev_data)
return -ENODEV;
if (pdev->is_virtfn || !iommu_group_mf)
devid = dev_data->devid;
else
devid = calc_devid(pdev->bus->number,
PCI_DEVFN(PCI_SLOT(pdev->devfn), 0));
*groupid = amd_iommu_alias_table[devid];
return 0;
}
static struct iommu_ops amd_iommu_ops = {
.domain_init = amd_iommu_domain_init,
.domain_destroy = amd_iommu_domain_destroy,
@@ -3265,7 +3309,6 @@ static struct iommu_ops amd_iommu_ops = {
.unmap = amd_iommu_unmap,
.iova_to_phys = amd_iommu_iova_to_phys,
.domain_has_cap = amd_iommu_domain_has_cap,
.device_group = amd_iommu_device_group,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
};
File diff suppressed because it is too large Load Diff
+10 -3
View File
@@ -487,7 +487,7 @@ struct amd_iommu {
/* physical address of MMIO space */
u64 mmio_phys;
/* virtual address of MMIO space */
u8 *mmio_base;
u8 __iomem *mmio_base;
/* capabilities of that IOMMU read from ACPI */
u32 cap;
@@ -501,6 +501,9 @@ struct amd_iommu {
/* IOMMUv2 */
bool is_iommu_v2;
/* PCI device id of the IOMMU device */
u16 devid;
/*
* Capability pointer. There could be more than one IOMMU per PCI
* device function if there are more than one AMD IOMMU capability
@@ -530,8 +533,6 @@ struct amd_iommu {
u32 evt_buf_size;
/* event buffer virtual address */
u8 *evt_buf;
/* MSI number for event interrupt */
u16 evt_msi_num;
/* Base of the PPR log, if present */
u8 *ppr_log;
@@ -664,6 +665,12 @@ extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
/*
* This function flushes all internal caches of
* the IOMMU used by this driver.
*/
extern void iommu_flush_all_caches(struct amd_iommu *iommu);
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
+3 -1
View File
@@ -81,7 +81,7 @@ struct fault {
u16 flags;
};
struct device_state **state_table;
static struct device_state **state_table;
static spinlock_t state_lock;
/* List and lock for all pasid_states */
@@ -681,6 +681,8 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, int pasid,
atomic_set(&pasid_state->count, 1);
init_waitqueue_head(&pasid_state->wq);
spin_lock_init(&pasid_state->lock);
pasid_state->task = task;
pasid_state->mm = get_task_mm(task);
pasid_state->device_state = dev_state;
+4
View File
@@ -732,6 +732,10 @@ static int exynos_iommu_domain_init(struct iommu_domain *domain)
spin_lock_init(&priv->pgtablelock);
INIT_LIST_HEAD(&priv->clients);
dom->geometry.aperture_start = 0;
dom->geometry.aperture_end = ~0UL;
dom->geometry.force_aperture = true;
domain->priv = priv;
return 0;
+59 -36
View File
@@ -3932,6 +3932,10 @@ static int intel_iommu_domain_init(struct iommu_domain *domain)
domain_update_iommu_cap(dmar_domain);
domain->priv = dmar_domain;
domain->geometry.aperture_start = 0;
domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
domain->geometry.force_aperture = true;
return 0;
}
@@ -4090,52 +4094,70 @@ static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
return 0;
}
/*
* Group numbers are arbitrary. Device with the same group number
* indicate the iommu cannot differentiate between them. To avoid
* tracking used groups we just use the seg|bus|devfn of the lowest
* level we're able to differentiate devices
*/
static int intel_iommu_device_group(struct device *dev, unsigned int *groupid)
static void swap_pci_ref(struct pci_dev **from, struct pci_dev *to)
{
pci_dev_put(*from);
*from = to;
}
#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
static int intel_iommu_add_device(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_dev *bridge;
union {
struct {
u8 devfn;
u8 bus;
u16 segment;
} pci;
u32 group;
} id;
struct pci_dev *bridge, *dma_pdev;
struct iommu_group *group;
int ret;
if (iommu_no_mapping(dev))
return -ENODEV;
id.pci.segment = pci_domain_nr(pdev->bus);
id.pci.bus = pdev->bus->number;
id.pci.devfn = pdev->devfn;
if (!device_to_iommu(id.pci.segment, id.pci.bus, id.pci.devfn))
if (!device_to_iommu(pci_domain_nr(pdev->bus),
pdev->bus->number, pdev->devfn))
return -ENODEV;
bridge = pci_find_upstream_pcie_bridge(pdev);
if (bridge) {
if (pci_is_pcie(bridge)) {
id.pci.bus = bridge->subordinate->number;
id.pci.devfn = 0;
} else {
id.pci.bus = bridge->bus->number;
id.pci.devfn = bridge->devfn;
}
if (pci_is_pcie(bridge))
dma_pdev = pci_get_domain_bus_and_slot(
pci_domain_nr(pdev->bus),
bridge->subordinate->number, 0);
else
dma_pdev = pci_dev_get(bridge);
} else
dma_pdev = pci_dev_get(pdev);
swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
if (dma_pdev->multifunction &&
!pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS))
swap_pci_ref(&dma_pdev,
pci_get_slot(dma_pdev->bus,
PCI_DEVFN(PCI_SLOT(dma_pdev->devfn),
0)));
while (!pci_is_root_bus(dma_pdev->bus)) {
if (pci_acs_path_enabled(dma_pdev->bus->self,
NULL, REQ_ACS_FLAGS))
break;
swap_pci_ref(&dma_pdev, pci_dev_get(dma_pdev->bus->self));
}
if (!pdev->is_virtfn && iommu_group_mf)
id.pci.devfn = PCI_DEVFN(PCI_SLOT(id.pci.devfn), 0);
group = iommu_group_get(&dma_pdev->dev);
pci_dev_put(dma_pdev);
if (!group) {
group = iommu_group_alloc();
if (IS_ERR(group))
return PTR_ERR(group);
}
*groupid = id.group;
ret = iommu_group_add_device(group, dev);
return 0;
iommu_group_put(group);
return ret;
}
static void intel_iommu_remove_device(struct device *dev)
{
iommu_group_remove_device(dev);
}
static struct iommu_ops intel_iommu_ops = {
@@ -4147,7 +4169,8 @@ static struct iommu_ops intel_iommu_ops = {
.unmap = intel_iommu_unmap,
.iova_to_phys = intel_iommu_iova_to_phys,
.domain_has_cap = intel_iommu_domain_has_cap,
.device_group = intel_iommu_device_group,
.add_device = intel_iommu_add_device,
.remove_device = intel_iommu_remove_device,
.pgsize_bitmap = INTEL_IOMMU_PGSIZES,
};
+587 -36
View File
File diff suppressed because it is too large Load Diff
+5
View File
@@ -1,6 +1,11 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/cpumask.h>
#include <linux/errno.h>
#include <linux/msi.h>
#include <asm/hw_irq.h>
#include <asm/irq_remapping.h>
#include "irq_remapping.h"
+5
View File
@@ -226,6 +226,11 @@ static int msm_iommu_domain_init(struct iommu_domain *domain)
memset(priv->pgtable, 0, SZ_16K);
domain->priv = priv;
domain->geometry.aperture_start = 0;
domain->geometry.aperture_end = (1ULL << 32) - 1;
domain->geometry.force_aperture = true;
return 0;
fail_nomem:
+90
View File
@@ -0,0 +1,90 @@
/*
* OF helpers for IOMMU
*
* Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <linux/export.h>
#include <linux/limits.h>
#include <linux/of.h>
/**
* of_get_dma_window - Parse *dma-window property and returns 0 if found.
*
* @dn: device node
* @prefix: prefix for property name if any
* @index: index to start to parse
* @busno: Returns busno if supported. Otherwise pass NULL
* @addr: Returns address that DMA starts
* @size: Returns the range that DMA can handle
*
* This supports different formats flexibly. "prefix" can be
* configured if any. "busno" and "index" are optionally
* specified. Set 0(or NULL) if not used.
*/
int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
unsigned long *busno, dma_addr_t *addr, size_t *size)
{
const __be32 *dma_window, *end;
int bytes, cur_index = 0;
char propname[NAME_MAX], addrname[NAME_MAX], sizename[NAME_MAX];
if (!dn || !addr || !size)
return -EINVAL;
if (!prefix)
prefix = "";
snprintf(propname, sizeof(propname), "%sdma-window", prefix);
snprintf(addrname, sizeof(addrname), "%s#dma-address-cells", prefix);
snprintf(sizename, sizeof(sizename), "%s#dma-size-cells", prefix);
dma_window = of_get_property(dn, propname, &bytes);
if (!dma_window)
return -ENODEV;
end = dma_window + bytes / sizeof(*dma_window);
while (dma_window < end) {
u32 cells;
const void *prop;
/* busno is one cell if supported */
if (busno)
*busno = be32_to_cpup(dma_window++);
prop = of_get_property(dn, addrname, NULL);
if (!prop)
prop = of_get_property(dn, "#address-cells", NULL);
cells = prop ? be32_to_cpup(prop) : of_n_addr_cells(dn);
if (!cells)
return -EINVAL;
*addr = of_read_number(dma_window, cells);
dma_window += cells;
prop = of_get_property(dn, sizename, NULL);
cells = prop ? be32_to_cpup(prop) : of_n_size_cells(dn);
if (!cells)
return -EINVAL;
*size = of_read_number(dma_window, cells);
dma_window += cells;
if (cur_index++ == index)
break;
}
return 0;
}
EXPORT_SYMBOL_GPL(of_get_dma_window);
+4
View File
@@ -1148,6 +1148,10 @@ static int omap_iommu_domain_init(struct iommu_domain *domain)
domain->priv = omap_domain;
domain->geometry.aperture_start = 0;
domain->geometry.aperture_end = (1ULL << 32) - 1;
domain->geometry.force_aperture = true;
return 0;
fail_nomem:

Some files were not shown because too many files have changed in this diff Show More