Merge branch 'perf/urgent' into perf/core

Conflicts:
	tools/perf/util/python.c

Merge reason: resolve the conflict with perf/urgent.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar
2011-06-04 12:28:05 +02:00
47 changed files with 844 additions and 203 deletions
+4 -1
View File
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
With this option on every unmap_single operation will
result in a hardware IOTLB flush operation as opposed
to batching them for performance.
sp_off [Default Off]
By default, super page will be supported if Intel IOMMU
has the capability. With this option, super page will
not be supported.
intremap= [X86-64, Intel-IOMMU]
Format: { on (default) | off | nosid }
on enable Interrupt Remapping (default)
+1 -1
View File
@@ -1,5 +1,5 @@
# This creates the demonstration utility "lguest" which runs a Linux guest.
# Missing headers? Add "-I../../include -I../../arch/x86/include"
# Missing headers? Add "-I../../../include -I../../../arch/x86/include"
CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
all: lguest
+2 -20
View File
@@ -49,7 +49,7 @@
#include <linux/virtio_rng.h>
#include <linux/virtio_ring.h>
#include <asm/bootparam.h>
#include "../../include/linux/lguest_launcher.h"
#include "../../../include/linux/lguest_launcher.h"
/*L:110
* We can ignore the 42 include files we need for this program, but I do want
* to draw attention to the use of kernel-style types.
@@ -135,9 +135,6 @@ struct device {
/* Is it operational */
bool running;
/* Does Guest want an intrrupt on empty? */
bool irq_on_empty;
/* Device-specific data. */
void *priv;
};
@@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq)
/* If they don't want an interrupt, don't send one... */
if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
/* ... unless they've asked us to force one on empty. */
if (!vq->dev->irq_on_empty
|| lg_last_avail(vq) != vq->vring.avail->idx)
return;
return;
}
/* Send the Guest an interrupt tell them we used something up. */
@@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq)
close(vq->eventfd);
}
static bool accepted_feature(struct device *dev, unsigned int bit)
{
const u8 *features = get_feature_bits(dev) + dev->feature_len;
if (dev->feature_len < bit / CHAR_BIT)
return false;
return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
}
static void start_device(struct device *dev)
{
unsigned int i;
@@ -1079,8 +1064,6 @@ static void start_device(struct device *dev)
verbose(" %02x", get_feature_bits(dev)
[dev->feature_len+i]);
dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
for (vq = dev->vq; vq; vq = vq->next) {
if (vq->service)
create_thread(vq);
@@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg)
/* Set up the tun device. */
configure_device(ipfd, tapif, ip);
add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
/* Expect Guest to handle everything except UFO */
add_feature(dev, VIRTIO_NET_F_CSUM);
add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
+2 -1
View File
@@ -320,11 +320,12 @@
#define __NR_clock_adjtime 1328
#define __NR_syncfs 1329
#define __NR_setns 1330
#define __NR_sendmmsg 1331
#ifdef __KERNEL__
#define NR_syscalls 307 /* length of syscall table */
#define NR_syscalls 308 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
+1
View File
@@ -1776,6 +1776,7 @@ sys_call_table:
data8 sys_clock_adjtime
data8 sys_syncfs
data8 sys_setns // 1330
data8 sys_sendmmsg
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
+2 -1
View File
@@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = {
static int __init init_pmacpic_syscore(void)
{
register_syscore_ops(&pmacpic_syscore_ops);
if (pmac_irq_hw[0])
register_syscore_ops(&pmacpic_syscore_ops);
return 0;
}
+2
View File
@@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_pvclock.o = -pg
@@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp)
GCOV_PROFILE_vsyscall_64.o := n
GCOV_PROFILE_hpet.o := n
GCOV_PROFILE_tsc.o := n
GCOV_PROFILE_vread_tsc_64.o := n
GCOV_PROFILE_paravirt.o := n
# vread_tsc_64 is hot and should be fully optimized:
+1 -1
View File
@@ -642,7 +642,7 @@ static int __init idle_setup(char *str)
boot_option_idle_override = IDLE_POLL;
} else if (!strcmp(str, "mwait")) {
boot_option_idle_override = IDLE_FORCE_MWAIT;
WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n");
WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
} else if (!strcmp(str, "halt")) {
/*
* When the boot option of idle=halt is added, halt is
+1 -1
View File
@@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void)
void *mwait_ptr;
struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
return;
if (!this_cpu_has(X86_FEATURE_CLFLSH))
return;
+1
View File
@@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
static void lguest_time_init(void)
{
/* Set up the timer interrupt (0) to go to our simple timer routine */
lguest_setup_irq(0);
irq_set_handler(0, lguest_time_irq);
clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
-1
View File
@@ -321,7 +321,6 @@ static void pcd_init_units(void)
strcpy(disk->disk_name, cd->name); /* umm... */
disk->fops = &pcd_bdops;
disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
disk->events = DISK_EVENT_MEDIA_CHANGE;
}
}
+80 -11
View File
@@ -6,10 +6,13 @@
#include <linux/virtio.h>
#include <linux/virtio_blk.h>
#include <linux/scatterlist.h>
#include <linux/string_helpers.h>
#include <scsi/scsi_cmnd.h>
#define PART_BITS 4
static int major, index;
struct workqueue_struct *virtblk_wq;
struct virtio_blk
{
@@ -26,6 +29,9 @@ struct virtio_blk
mempool_t *pool;
/* Process context for config space updates */
struct work_struct config_work;
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems;
@@ -141,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
sizeof(vbr->in_hdr));
}
@@ -291,6 +297,46 @@ static ssize_t virtblk_serial_show(struct device *dev,
}
DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
static void virtblk_config_changed_work(struct work_struct *work)
{
struct virtio_blk *vblk =
container_of(work, struct virtio_blk, config_work);
struct virtio_device *vdev = vblk->vdev;
struct request_queue *q = vblk->disk->queue;
char cap_str_2[10], cap_str_10[10];
u64 capacity, size;
/* Host must always specify the capacity. */
vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
&capacity, sizeof(capacity));
/* If capacity is too big, truncate with warning. */
if ((sector_t)capacity != capacity) {
dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
(unsigned long long)capacity);
capacity = (sector_t)-1;
}
size = capacity * queue_logical_block_size(q);
string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
dev_notice(&vdev->dev,
"new size: %llu %d-byte logical blocks (%s/%s)\n",
(unsigned long long)capacity,
queue_logical_block_size(q),
cap_str_10, cap_str_2);
set_capacity(vblk->disk, capacity);
}
static void virtblk_config_changed(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
queue_work(virtblk_wq, &vblk->config_work);
}
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
@@ -327,6 +373,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
/* We expect one virtqueue, for output. */
vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
@@ -477,6 +524,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
flush_work(&vblk->config_work);
/* Nothing should be pending. */
BUG_ON(!list_empty(&vblk->reqs));
@@ -508,27 +557,47 @@ static unsigned int features[] = {
* Use __refdata to avoid this warning.
*/
static struct virtio_driver __refdata virtio_blk = {
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtblk_probe,
.remove = __devexit_p(virtblk_remove),
.feature_table = features,
.feature_table_size = ARRAY_SIZE(features),
.driver.name = KBUILD_MODNAME,
.driver.owner = THIS_MODULE,
.id_table = id_table,
.probe = virtblk_probe,
.remove = __devexit_p(virtblk_remove),
.config_changed = virtblk_config_changed,
};
static int __init init(void)
{
int error;
virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
if (!virtblk_wq)
return -ENOMEM;
major = register_blkdev(0, "virtblk");
if (major < 0)
return major;
return register_virtio_driver(&virtio_blk);
if (major < 0) {
error = major;
goto out_destroy_workqueue;
}
error = register_virtio_driver(&virtio_blk);
if (error)
goto out_unregister_blkdev;
return 0;
out_unregister_blkdev:
unregister_blkdev(major, "virtblk");
out_destroy_workqueue:
destroy_workqueue(virtblk_wq);
return error;
}
static void __exit fini(void)
{
unregister_blkdev(major, "virtblk");
unregister_virtio_driver(&virtio_blk);
destroy_workqueue(virtblk_wq);
}
module_init(init);
module_exit(fini);
-1
View File
@@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
gendisk->fops = &viocd_fops;
gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
gendisk->events = DISK_EVENT_MEDIA_CHANGE;
set_capacity(gendisk, 0);
gendisk->private_data = d;
d->viocd_disk = gendisk;
-5
View File
@@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
portdev->config.max_nr_ports = 1;
if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
multiport = true;
vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT;
vdev->config->get(vdev, offsetof(struct virtio_console_config,
max_nr_ports),
&portdev->config.max_nr_ports,
sizeof(portdev->config.max_nr_ports));
}
/* Let the Host know we support multiple ports.*/
vdev->config->finalize_features(vdev);
err = init_vqs(portdev);
if (err < 0) {
dev_err(&vdev->dev, "Error %d initializing vqs\n", err);
-1
View File
@@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive)
ide_cd_read_toc(drive, &sense);
g->fops = &idecd_ops;
g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
g->events = DISK_EVENT_MEDIA_CHANGE;
add_disk(g);
return 0;
+4 -1
View File
@@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg)
return 1;
}
/* Readjust the instruction pointer if needed */
instruction_pointer_set(&kgdbts_regs, ip + offset);
ip += offset;
#ifdef GDB_ADJUSTS_BREAK_OFFSET
instruction_pointer_set(&kgdbts_regs, ip);
#endif
return 0;
}
+1 -1
View File
@@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
* before it gets out of hand. Naturally, this wastes entries. */
if (capacity < 2+MAX_SKB_FRAGS) {
netif_stop_queue(dev);
if (unlikely(!virtqueue_enable_cb(vi->svq))) {
if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
/* More just got used, free them then recheck. */
capacity += free_old_xmit_skbs(vi);
if (capacity >= 2+MAX_SKB_FRAGS) {
+1 -6
View File
@@ -698,12 +698,7 @@ int __init detect_intel_iommu(void)
{
#ifdef CONFIG_INTR_REMAP
struct acpi_table_dmar *dmar;
/*
* for now we will disable dma-remapping when interrupt
* remapping is enabled.
* When support for queued invalidation for IOTLB invalidation
* is added, we will not need this any more.
*/
dmar = (struct acpi_table_dmar *) dmar_tbl;
if (ret && cpu_has_x2apic && dmar->flags & 0x1)
printk(KERN_INFO
+201 -39
View File
@@ -47,6 +47,8 @@
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
#define IS_BRIDGE_HOST_DEVICE(pdev) \
((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
return (pfn + level_size(level) - 1) & level_mask(level);
}
static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
{
return 1 << ((lvl - 1) * LEVEL_STRIDE);
}
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
are never going to work. */
static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -142,6 +149,12 @@ static struct intel_iommu **g_iommus;
static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
/*
* set to 1 to panic kernel if can't successfully enable VT-d
* (used when kernel is launched w/ TXT)
*/
static int force_on = 0;
/*
* 0: Present
* 1-11: Reserved
@@ -338,6 +351,9 @@ struct dmar_domain {
int iommu_coherency;/* indicate coherency of iommu access */
int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
int iommu_superpage;/* Level of superpages supported:
0 == 4KiB (no superpages), 1 == 2MiB,
2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
};
@@ -387,6 +403,7 @@ int dmar_disabled = 1;
static int dmar_map_gfx = 1;
static int dmar_forcedac;
static int intel_iommu_strict;
static int intel_iommu_superpage = 1;
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
static DEFINE_SPINLOCK(device_domain_lock);
@@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
printk(KERN_INFO
"Intel-IOMMU: disable batched IOTLB flush\n");
intel_iommu_strict = 1;
} else if (!strncmp(str, "sp_off", 6)) {
printk(KERN_INFO
"Intel-IOMMU: disable supported super page\n");
intel_iommu_superpage = 0;
}
str += strcspn(str, ",");
@@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
}
}
static void domain_update_iommu_superpage(struct dmar_domain *domain)
{
int i, mask = 0xf;
if (!intel_iommu_superpage) {
domain->iommu_superpage = 0;
return;
}
domain->iommu_superpage = 4; /* 1TiB */
for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
mask |= cap_super_page_val(g_iommus[i]->cap);
if (!mask) {
break;
}
}
domain->iommu_superpage = fls(mask);
}
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain_update_iommu_snooping(domain);
domain_update_iommu_superpage(domain);
}
static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -689,23 +731,31 @@ out:
}
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
unsigned long pfn)
unsigned long pfn, int large_level)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(domain->agaw);
int offset;
int offset, target_level;
BUG_ON(!domain->pgd);
BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
parent = domain->pgd;
/* Search pte */
if (!large_level)
target_level = 1;
else
target_level = large_level;
while (level > 0) {
void *tmp_page;
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
if (level == 1)
if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
break;
if (level == target_level)
break;
if (!dma_pte_present(pte)) {
@@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
return pte;
}
/* return address's pte at specific level */
static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
unsigned long pfn,
int level)
int level, int *large_page)
{
struct dma_pte *parent, *pte = NULL;
int total = agaw_to_level(domain->agaw);
@@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
if (level == total)
return pte;
if (!dma_pte_present(pte))
if (!dma_pte_present(pte)) {
*large_page = total;
break;
}
if (pte->val & DMA_PTE_LARGE_PAGE) {
*large_page = total;
return pte;
}
parent = phys_to_virt(dma_pte_addr(pte));
total--;
}
@@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned long last_pfn)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned int large_page = 1;
struct dma_pte *first_pte, *pte;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
/* we don't need lock here; nobody else touches the iova range */
do {
first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
large_page = 1;
first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
if (!pte) {
start_pfn = align_to_level(start_pfn + 1, 2);
start_pfn = align_to_level(start_pfn + 1, large_page + 1);
continue;
}
do {
do {
dma_clear_pte(pte);
start_pfn++;
start_pfn += lvl_to_nr_pages(large_page);
pte++;
} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
@@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
int total = agaw_to_level(domain->agaw);
int level;
unsigned long tmp;
int large_page = 2;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
return;
do {
first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
large_page = level;
first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
if (large_page > level)
level = large_page + 1;
if (!pte) {
tmp = align_to_level(tmp + 1, level + 1);
continue;
@@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_snooping = 0;
domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
domain->iommu_count = 1;
domain->nid = iommu->node;
@@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
if (!domain)
return;
/* Flush any lazy unmaps that may reference this domain */
if (!intel_iommu_strict)
flush_unmaps_timeout(0);
domain_remove_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
@@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr,
return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
}
/* Return largest possible superpage level for a given mapping */
static inline int hardware_largepage_caps(struct dmar_domain *domain,
unsigned long iov_pfn,
unsigned long phy_pfn,
unsigned long pages)
{
int support, level = 1;
unsigned long pfnmerge;
support = domain->iommu_superpage;
/* To use a large page, the virtual *and* physical addresses
must be aligned to 2MiB/1GiB/etc. Lower bits set in either
of them will mean we have to use smaller pages. So just
merge them and check both at once. */
pfnmerge = iov_pfn | phy_pfn;
while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
pages >>= VTD_STRIDE_SHIFT;
if (!pages)
break;
pfnmerge >>= VTD_STRIDE_SHIFT;
level++;
support--;
}
return level;
}
static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
struct scatterlist *sg, unsigned long phys_pfn,
unsigned long nr_pages, int prot)
@@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
phys_addr_t uninitialized_var(pteval);
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
unsigned long sg_res;
unsigned int largepage_lvl = 0;
unsigned long lvl_pages = 0;
BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
@@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
}
while (nr_pages--) {
while (nr_pages > 0) {
uint64_t tmp;
if (!sg_res) {
@@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
sg->dma_length = sg->length;
pteval = page_to_phys(sg_page(sg)) | prot;
phys_pfn = pteval >> VTD_PAGE_SHIFT;
}
if (!pte) {
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
if (!pte)
return -ENOMEM;
/* It is large page*/
if (largepage_lvl > 1)
pteval |= DMA_PTE_LARGE_PAGE;
else
pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
}
/* We don't need lock here, nobody else
* touches the iova range
@@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
}
WARN_ON(1);
}
lvl_pages = lvl_to_nr_pages(largepage_lvl);
BUG_ON(nr_pages < lvl_pages);
BUG_ON(sg_res < lvl_pages);
nr_pages -= lvl_pages;
iov_pfn += lvl_pages;
phys_pfn += lvl_pages;
pteval += lvl_pages * VTD_PAGE_SIZE;
sg_res -= lvl_pages;
/* If the next PTE would be the first in a new page, then we
need to flush the cache on the entries we've just written.
And then we'll need to recalculate 'pte', so clear it and
let it get set again in the if (!pte) block above.
If we're done (!nr_pages) we need to flush the cache too.
Also if we've been setting superpages, we may need to
recalculate 'pte' and switch back to smaller pages for the
end of the mapping, if the trailing size is not enough to
use another superpage (i.e. sg_res < lvl_pages). */
pte++;
if (!nr_pages || first_pte_in_page(pte)) {
if (!nr_pages || first_pte_in_page(pte) ||
(largepage_lvl > 1 && sg_res < lvl_pages)) {
domain_flush_cache(domain, first_pte,
(void *)pte - (void *)first_pte);
pte = NULL;
}
iov_pfn++;
pteval += VTD_PAGE_SIZE;
sg_res--;
if (!sg_res)
if (!sg_res && nr_pages)
sg = sg_next(sg);
}
return 0;
@@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
return 0;
return iommu_prepare_identity_map(pdev, rmrr->base_address,
rmrr->end_address + 1);
rmrr->end_address);
}
#ifdef CONFIG_DMAR_FLOPPY_WA
@@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void)
return;
printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
if (ret)
printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
@@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev)
if (likely(!iommu_identity_mapping))
return 0;
info = pdev->dev.archdata.iommu;
if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
return (info->domain == si_domain);
list_for_each_entry(info, &si_domain->devices, link)
if (info->dev == pdev)
return 1;
return 0;
}
@@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
* Assume that they will -- if they turn out not to be, then we can
* take them out of the 1:1 domain later.
*/
if (!startup)
return pdev->dma_mask > DMA_BIT_MASK(32);
if (!startup) {
/*
* If the device's dma_mask is less than the system's memory
* size then this is not a candidate for identity mapping.
*/
u64 dma_mask = pdev->dma_mask;
if (pdev->dev.coherent_dma_mask &&
pdev->dev.coherent_dma_mask < dma_mask)
dma_mask = pdev->dev.coherent_dma_mask;
return dma_mask >= dma_get_required_mask(&pdev->dev);
}
return 1;
}
@@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
return -EFAULT;
for_each_pci_dev(pdev) {
/* Skip Host/PCI Bridge devices */
if (IS_BRIDGE_HOST_DEVICE(pdev))
continue;
if (iommu_should_identity_map(pdev, 1)) {
printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
hw ? "hardware" : "software", pci_name(pdev));
@@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw)
return 0;
}
static int __init init_dmars(int force_on)
static int __init init_dmars(void)
{
struct dmar_drhd_unit *drhd;
struct dmar_rmrr_unit *rmrr;
@@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
iommu = domain_get_iommu(domain);
size = aligned_nrpages(paddr, size);
iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
pdev->dma_mask);
iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
if (!iova)
goto error;
@@ -3118,7 +3263,17 @@ static int init_iommu_hw(void)
if (iommu->qi)
dmar_reenable_qi(iommu);
for_each_active_iommu(iommu, drhd) {
for_each_iommu(iommu, drhd) {
if (drhd->ignored) {
/*
* we always have to disable PMRs or DMA may fail on
* this device
*/
if (force_on)
iommu_disable_protect_mem_regions(iommu);
continue;
}
iommu_flush_write_buffer(iommu);
iommu_set_root_entry(iommu);
@@ -3127,7 +3282,8 @@ static int init_iommu_hw(void)
DMA_CCMD_GLOBAL_INVL);
iommu->flush.flush_iotlb(iommu, 0, 0, 0,
DMA_TLB_GLOBAL_FLUSH);
iommu_enable_translation(iommu);
if (iommu_enable_translation(iommu))
return 1;
iommu_disable_protect_mem_regions(iommu);
}
@@ -3194,7 +3350,10 @@ static void iommu_resume(void)
unsigned long flag;
if (init_iommu_hw()) {
WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
if (force_on)
panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
else
WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
return;
}
@@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = {
int __init intel_iommu_init(void)
{
int ret = 0;
int force_on = 0;
/* VT-d is required for a TXT/tboot launch, so enforce that */
force_on = tboot_force_iommu();
@@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void)
init_no_remapping_devices();
ret = init_dmars(force_on);
ret = init_dmars();
if (ret) {
if (force_on)
panic("tboot: Failed to initialize DMARs\n");
@@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
spin_lock_irqsave(&device_domain_lock, flags);
list_for_each_safe(entry, tmp, &domain->devices) {
info = list_entry(entry, struct device_domain_info, link);
/* No need to compare PCI domain; it has to be the same */
if (info->bus == pdev->bus->number &&
if (info->segment == pci_domain_nr(pdev->bus) &&
info->bus == pdev->bus->number &&
info->devfn == pdev->devfn) {
list_del(&info->link);
list_del(&info->global);
@@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain,
domain_update_iommu_cap(domain);
spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
spin_lock_irqsave(&iommu->lock, tmp_flags);
clear_bit(domain->id, iommu->domain_ids);
iommu->domains[domain->id] = NULL;
spin_unlock_irqrestore(&iommu->lock, tmp_flags);
if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
!(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
spin_lock_irqsave(&iommu->lock, tmp_flags);
clear_bit(domain->id, iommu->domain_ids);
iommu->domains[domain->id] = NULL;
spin_unlock_irqrestore(&iommu->lock, tmp_flags);
}
}
spin_unlock_irqrestore(&device_domain_lock, flags);
@@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->iommu_count = 0;
domain->iommu_coherency = 0;
domain->iommu_snooping = 0;
domain->iommu_superpage = 0;
domain->max_addr = 0;
domain->nid = -1;
@@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
struct dma_pte *pte;
u64 phys = 0;
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
if (pte)
phys = dma_pte_addr(pte);
+10 -2
View File
@@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
curr = iovad->cached32_node;
cached_iova = container_of(curr, struct iova, node);
if (free->pfn_lo >= cached_iova->pfn_lo)
iovad->cached32_node = rb_next(&free->node);
if (free->pfn_lo >= cached_iova->pfn_lo) {
struct rb_node *node = rb_next(&free->node);
struct iova *iova = container_of(node, struct iova, node);
/* only cache if it's below 32bit pfn */
if (node && iova->pfn_lo < iovad->dma_32bit_pfn)
iovad->cached32_node = node;
else
iovad->cached32_node = NULL;
}
}
/* Computes the padding size required, to make the

Some files were not shown because too many files have changed in this diff Show More