From 9fa65e09730a2e63c5b45d008f269b2b271b74bc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 28 Jun 2013 16:04:02 +0200 Subject: [PATCH 01/80] writeback: Fix periodic writeback after fs mount commit a5faeaf9109578e65e1a32e2a3e76c8b47e7dcb6 upstream. Code in blkdev.c moves a device inode to default_backing_dev_info when the last reference to the device is put and moves the device inode back to its bdi when the first reference is acquired. This includes moving to wb.b_dirty list if the device inode is dirty. The code however doesn't setup timer to wake corresponding flusher thread and while wb.b_dirty list is non-empty __mark_inode_dirty() will not set it up either. Thus periodic writeback is effectively disabled until a sync(2) call which can lead to unexpected data loss in case of crash or power failure. Fix the problem by setting up a timer for periodic writeback in case we add the first dirty inode to wb.b_dirty list in bdev_inode_switch_bdi(). Reported-by: Bert De Jonghe Signed-off-by: Jan Kara Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/block_dev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index 2091db8cdd78..85f5c85ec91c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { struct backing_dev_info *old = inode->i_data.backing_dev_info; + bool wakeup_bdi = false; if (unlikely(dst == old)) /* deadlock avoidance */ return; bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&old->wb.list_lock); spin_unlock(&dst->wb.list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ From b692b29868295c504e1e9c306762aa89ab3997a2 Mon Sep 17 00:00:00 2001 From: Olivier DANET Date: Wed, 10 Jul 2013 13:56:10 -0700 Subject: [PATCH 02/80] sparc32: vm_area_struct access for old Sun SPARCs. upstream commit 961246b4ed8da3bcf4ee1eb9147f341013553e3c. Commit e4c6bfd2d79d063017ab19a18915f0bc759f32d9 ("mm: rearrange vm_area_struct for fewer cache misses") changed the layout of the vm_area_struct structure, it broke several SPARC32 assembly routines which used numerical constants for accessing the vm_mm field. This patch defines the VMA_VM_MM constant to replace the immediate values. Signed-off-by: Olivier DANET Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- arch/sparc/kernel/asm-offsets.c | 2 ++ arch/sparc/mm/hypersparc.S | 8 ++++---- arch/sparc/mm/swift.S | 8 ++++---- arch/sparc/mm/tsunami.S | 6 +++--- arch/sparc/mm/viking.S | 10 +++++----- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c index 961b87f99e69..f76389a32342 100644 --- a/arch/sparc/kernel/asm-offsets.c +++ b/arch/sparc/kernel/asm-offsets.c @@ -49,6 +49,8 @@ int foo(void) DEFINE(AOFF_task_thread, offsetof(struct task_struct, thread)); BLANK(); DEFINE(AOFF_mm_context, offsetof(struct mm_struct, context)); + BLANK(); + DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); /* DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); */ return 0; diff --git a/arch/sparc/mm/hypersparc.S b/arch/sparc/mm/hypersparc.S index 44aad32eeb4e..969f96450f69 100644 --- a/arch/sparc/mm/hypersparc.S +++ b/arch/sparc/mm/hypersparc.S @@ -74,7 +74,7 @@ hypersparc_flush_cache_mm_out: /* The things we do for performance... */ hypersparc_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #ifndef CONFIG_SMP ld [%o0 + AOFF_mm_context], %g1 cmp %g1, -1 @@ -163,7 +163,7 @@ hypersparc_flush_cache_range_out: */ /* Verified, my ass... */ hypersparc_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %g2 #ifndef CONFIG_SMP cmp %g2, -1 @@ -284,7 +284,7 @@ hypersparc_flush_tlb_mm_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -307,7 +307,7 @@ hypersparc_flush_tlb_range_out: sta %g5, [%g1] ASI_M_MMUREGS hypersparc_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/swift.S b/arch/sparc/mm/swift.S index c801c3953a00..5d2b88d39424 100644 --- a/arch/sparc/mm/swift.S +++ b/arch/sparc/mm/swift.S @@ -105,7 +105,7 @@ swift_flush_cache_mm_out: .globl swift_flush_cache_range swift_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 sub %o2, %o1, %o2 sethi %hi(4096), %o3 cmp %o2, %o3 @@ -116,7 +116,7 @@ swift_flush_cache_range: .globl swift_flush_cache_page swift_flush_cache_page: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 70: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -219,7 +219,7 @@ swift_flush_sig_insns: .globl swift_flush_tlb_range .globl swift_flush_tlb_all swift_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 swift_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -233,7 +233,7 @@ swift_flush_tlb_all_out: .globl swift_flush_tlb_page swift_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/tsunami.S b/arch/sparc/mm/tsunami.S index 4e55e8f76648..bf10a345fa8b 100644 --- a/arch/sparc/mm/tsunami.S +++ b/arch/sparc/mm/tsunami.S @@ -24,7 +24,7 @@ /* Sliiick... */ tsunami_flush_cache_page: tsunami_flush_cache_range: - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_cache_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -46,7 +46,7 @@ tsunami_flush_sig_insns: /* More slick stuff... */ tsunami_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 tsunami_flush_tlb_mm: ld [%o0 + AOFF_mm_context], %g2 cmp %g2, -1 @@ -65,7 +65,7 @@ tsunami_flush_tlb_out: /* This one can be done in a fine grained manner... */ tsunami_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 andn %o1, (PAGE_SIZE - 1), %o1 diff --git a/arch/sparc/mm/viking.S b/arch/sparc/mm/viking.S index bf8ee0613ae7..852257fcc82b 100644 --- a/arch/sparc/mm/viking.S +++ b/arch/sparc/mm/viking.S @@ -108,7 +108,7 @@ viking_mxcc_flush_page: viking_flush_cache_page: viking_flush_cache_range: #ifndef CONFIG_SMP - ld [%o0 + 0x0], %o0 /* XXX vma->vm_mm, GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 #endif viking_flush_cache_mm: #ifndef CONFIG_SMP @@ -148,7 +148,7 @@ viking_flush_tlb_mm: #endif viking_flush_tlb_range: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -173,7 +173,7 @@ viking_flush_tlb_range: #endif viking_flush_tlb_page: - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 mov SRMMU_CTX_REG, %g1 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 @@ -239,7 +239,7 @@ sun4dsmp_flush_tlb_range: tst %g5 bne 3f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 sethi %hi(~((1 << SRMMU_PGDIR_SHIFT) - 1)), %o4 @@ -265,7 +265,7 @@ sun4dsmp_flush_tlb_page: tst %g5 bne 2f mov SRMMU_CTX_REG, %g1 - ld [%o0 + 0x00], %o0 /* XXX vma->vm_mm GROSS XXX */ + ld [%o0 + VMA_VM_MM], %o0 ld [%o0 + AOFF_mm_context], %o3 lda [%g1] ASI_M_MMUREGS, %g5 and %o1, PAGE_MASK, %o1 From ce08aa04215ea3309c47677f55d3b52d31d6a97a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 27 Jun 2013 22:46:04 +0200 Subject: [PATCH 03/80] ipv6: only apply anti-spoofing checks to not-pointopoint tunnels [ Upstream commit 5c29fb12e8fb8a8105ea048cb160fd79a85a52bb ] Because of commit 218774dc341f219bfcf940304a081b121a0e8099 ("ipv6: add anti-spoofing checks for 6to4 and 6rd") the sit driver dropped packets for 2002::/16 destinations and sources even when configured to work as a tunnel with fixed endpoint. We may only apply the 6rd/6to4 anti-spoofing checks if the device is not in pointopoint mode. This was an oversight from me in the above commit, sorry. Thanks to Roman Mamedov for reporting this! Reported-by: Roman Mamedov Cc: David Miller Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bbf..60df36d15390 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -589,7 +589,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel->dev->stats.rx_errors++; goto out; } - } else { + } else if (!(tunnel->dev->flags&IFF_POINTOPOINT)) { if (is_spoofed_6rd(tunnel, iph->saddr, &ipv6_hdr(skb)->saddr) || is_spoofed_6rd(tunnel, iph->daddr, From ac294f13d331cb3e315fd922ae505619f853818b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 28 Jun 2013 02:37:42 -0700 Subject: [PATCH 04/80] neighbour: fix a race in neigh_destroy() [ Upstream commit c9ab4d85de222f3390c67aedc9c18a50e767531e ] There is a race in neighbour code, because neigh_destroy() uses skb_queue_purge(&neigh->arp_queue) without holding neighbour lock, while other parts of the code assume neighbour rwlock is what protects arp_queue Convert all skb_queue_purge() calls to the __skb_queue_purge() variant Use __skb_queue_head_init() instead of skb_queue_head_init() to make clear we do not use arp_queue.lock And hold neigh->lock in neigh_destroy() to close the race. Reported-by: Joe Jin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5c56b217b999..ce90b0264db5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - skb_queue_purge(&n->arp_queue); + __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device if (!n) goto out_entries; - skb_queue_head_init(&n->arp_queue); + __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); seqlock_init(&n->ha_lock); n->updated = n->used = now; @@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) pr_warn("Impossible event\n"); - skb_queue_purge(&neigh->arp_queue); + write_lock_bh(&neigh->lock); + __skb_queue_purge(&neigh->arp_queue); + write_unlock_bh(&neigh->lock); neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) @@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh) neigh->ops->error_report(neigh, skb); write_lock(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } @@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_lock_bh(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } out: From ea4c218f2c3e67f1bca8078ff1f80d7e0c2ab791 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 28 Jun 2013 12:13:52 -0400 Subject: [PATCH 05/80] x25: Fix broken locking in ioctl error paths. [ Upstream commit 4ccb93ce7439b63c31bc7597bfffd13567fa483d ] Two of the x25 ioctl cases have error paths that break out of the function without unlocking the socket, leading to this warning: ================================================ [ BUG: lock held when returning to user space! ] 3.10.0-rc7+ #36 Not tainted ------------------------------------------------ trinity-child2/31407 is leaving the kernel with locks still held! 1 lock held by trinity-child2/31407: #0: (sk_lock-AF_X25){+.+.+.}, at: [] x25_ioctl+0x8a/0x740 [x25] Signed-off-by: Dave Jones Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/x25/af_x25.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aabe..22c88d2e6846 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1583,11 +1583,11 @@ out_cud_release: case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) - break; - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + if (sk->sk_state == TCP_CLOSE) { + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + rc = 0; + } release_sock(sk); - rc = 0; break; } @@ -1595,14 +1595,15 @@ out_cud_release: rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) - break; + goto out_sendcallaccpt_release; /* must call accptapprv above */ if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - break; + goto out_sendcallaccpt_release; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - release_sock(sk); rc = 0; +out_sendcallaccpt_release: + release_sock(sk); break; } From 51778da544ca660615ea02b09d395430159b4d0c Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 29 Jun 2013 00:15:51 +0800 Subject: [PATCH 06/80] net: Swap ver and type in pppoe_hdr [ Upstream commit b1a5a34bd0b8767ea689e68f8ea513e9710b671e ] Ver and type in pppoe_hdr should be swapped as defined by RFC2516 section-4. Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/if_pppox.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/if_pppox.h b/include/uapi/linux/if_pppox.h index 0b46fd57c8f6..e36a4aecd311 100644 --- a/include/uapi/linux/if_pppox.h +++ b/include/uapi/linux/if_pppox.h @@ -135,11 +135,11 @@ struct pppoe_tag { struct pppoe_hdr { #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 ver : 4; __u8 type : 4; + __u8 ver : 4; #elif defined(__BIG_ENDIAN_BITFIELD) - __u8 type : 4; __u8 ver : 4; + __u8 type : 4; #else #error "Please fix " #endif From 3d3fa8bca30d8fd9cf5b95bd3eb9d7e2a50dec1c Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 12:02:59 +0800 Subject: [PATCH 07/80] gre: fix a regression in ioctl [ Upstream commit 6c734fb8592f6768170e48e7102cb2f0a1bb9759 ] When testing GRE tunnel, I got: # ip tunnel show get tunnel gre0 failed: Invalid argument get tunnel gre1 failed: Invalid argument This is a regression introduced by commit c54419321455631079c7d ("GRE: Refactor GRE tunneling code.") because previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. After this patch I got: # ip tunnel show gre0: gre/ip remote any local any ttl inherit nopmtudisc gre1: gre/ip remote 192.168.122.101 local 192.168.122.45 ttl inherit Signed-off-by: Cong Wang Cc: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_gre.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a83591492dd..855004f08325 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -503,10 +503,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + return -EINVAL; } p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); From da04e7df0494a9a6ccfe2aada8a262e38c284914 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 29 Jun 2013 13:00:57 +0800 Subject: [PATCH 08/80] vti: remove duplicated code to fix a memory leak [ Upstream commit ab6c7a0a43c2eaafa57583822b619b22637b49c7 ] vti module allocates dev->tstats twice: in vti_fb_tunnel_init() and in vti_tunnel_init(), this lead to a memory leak of dev->tstats. Just remove the duplicated operations in vti_fb_tunnel_init(). (candidate for -stable) Signed-off-by: Cong Wang Cc: Stephen Hemminger Cc: Saurabh Mohan Acked-by: Stephen Hemminger Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_vti.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c118f6b576bb..17cc0ffa8c0d 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -606,17 +606,10 @@ static int __net_init vti_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - iph->version = 4; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - dev_hold(dev); rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; From 36bddbad5049d3f3916abc9f594526d47ad3ab84 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Sat, 29 Jun 2013 21:30:49 +0800 Subject: [PATCH 09/80] ipv6,mcast: always hold idev->lock before mca_lock [ Upstream commit 8965779d2c0e6ab246c82a405236b1fb2adae6b2, with some bits from commit b7b1bfce0bb68bd8f6e62a28295922785cc63781 ("ipv6: split duplicate address detection and router solicitation timer") to get the __ipv6_get_lladdr() used by this patch. ] dingtianhong reported the following deadlock detected by lockdep: ====================================================== [ INFO: possible circular locking dependency detected ] 3.4.24.05-0.1-default #1 Not tainted ------------------------------------------------------- ksoftirqd/0/3 is trying to acquire lock: (&ndev->lock){+.+...}, at: [] ipv6_get_lladdr+0x74/0x120 but task is already holding lock: (&mc->mca_lock){+.+...}, at: [] mld_send_report+0x40/0x150 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&mc->mca_lock){+.+...}: [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_spin_lock+0x4a/0x60 [] igmp6_group_added+0x3b/0x120 [] ipv6_mc_up+0x38/0x60 [] ipv6_find_idev+0x3d/0x80 [] addrconf_notify+0x3d5/0x4b0 [] notifier_call_chain+0x3f/0x80 [] raw_notifier_call_chain+0x11/0x20 [] call_netdevice_notifiers+0x32/0x60 [] __dev_notify_flags+0x34/0x80 [] dev_change_flags+0x40/0x70 [] do_setlink+0x237/0x8a0 [] rtnl_newlink+0x3ec/0x600 [] rtnetlink_rcv_msg+0x160/0x310 [] netlink_rcv_skb+0x89/0xb0 [] rtnetlink_rcv+0x27/0x40 [] netlink_unicast+0x140/0x180 [] netlink_sendmsg+0x33e/0x380 [] sock_sendmsg+0x112/0x130 [] __sys_sendmsg+0x44e/0x460 [] sys_sendmsg+0x44/0x70 [] system_call_fastpath+0x16/0x1b -> #0 (&ndev->lock){+.+...}: [] check_prev_add+0x3de/0x440 [] validate_chain+0x637/0x730 [] __lock_acquire+0x2f7/0x500 [] lock_acquire+0x114/0x150 [] rt_read_lock+0x42/0x60 [] ipv6_get_lladdr+0x74/0x120 [] mld_newpack+0xb6/0x160 [] add_grhead+0xab/0xc0 [] add_grec+0x3ab/0x460 [] mld_send_report+0x5a/0x150 [] igmp6_timer_handler+0x4e/0xb0 [] call_timer_fn+0xca/0x1d0 [] run_timer_softirq+0x1df/0x2e0 [] handle_pending_softirqs+0xf7/0x1f0 [] __do_softirq_common+0x7b/0xf0 [] __thread_do_softirq+0x1af/0x210 [] run_ksoftirqd+0xe1/0x1f0 [] kthread+0xae/0xc0 [] kernel_thread_helper+0x4/0x10 actually we can just hold idev->lock before taking pmc->mca_lock, and avoid taking idev->lock again when iterating idev->addr_list, since the upper callers of mld_newpack() already take read_lock_bh(&idev->lock). Reported-by: dingtianhong Cc: dingtianhong Cc: Hideaki YOSHIFUJI Cc: David S. Miller Cc: Hannes Frederic Sowa Tested-by: Ding Tianhong Tested-by: Chen Weilong Signed-off-by: Cong Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/addrconf.h | 3 +++ net/ipv6/addrconf.c | 28 ++++++++++++++++++---------- net/ipv6/mcast.c | 18 ++++++++++-------- 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 21f702704f24..01b1a1ad77d2 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -86,6 +86,9 @@ extern int ipv6_dev_get_saddr(struct net *net, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); +extern int __ipv6_get_lladdr(struct inet6_dev *idev, + struct in6_addr *addr, + unsigned char banned_flags); extern int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ab4c38958c6..fb8c94c4ab86 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1448,6 +1448,23 @@ try_nextdev: } EXPORT_SYMBOL(ipv6_dev_get_saddr); +int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_ifaddr *ifp; + int err = -EADDRNOTAVAIL; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + *addr = ifp->addr; + err = 0; + break; + } + } + return err; +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) { @@ -1457,17 +1474,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ifp->scope == IFA_LINK && - !(ifp->flags & banned_flags)) { - *addr = ifp->addr; - err = 0; - break; - } - } + err = __ipv6_get_lladdr(idev, addr, banned_flags); read_unlock_bh(&idev->lock); } rcu_read_unlock(); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2a..c3998c2bbc5a 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1343,8 +1343,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct net_device *dev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) { + struct net_device *dev = idev->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; @@ -1369,7 +1370,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_reserve(skb, hlen); - if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { + if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* : * use unspecified address as the source address * when a valid link-local address is not available. @@ -1465,7 +1466,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr; if (!skb) - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(pmc->idev, dev->mtu); if (!skb) return NULL; pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); @@ -1485,7 +1486,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) { - struct net_device *dev = pmc->idev->dev; + struct inet6_dev *idev = pmc->idev; + struct net_device *dev = idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; @@ -1514,7 +1516,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); } } first = 1; @@ -1541,7 +1543,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); first = 1; scount = 0; } @@ -1596,8 +1598,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) struct sk_buff *skb = NULL; int type; + read_lock_bh(&idev->lock); if (!pmc) { - read_lock_bh(&idev->lock); for (pmc=idev->mc_list; pmc; pmc=pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; @@ -1609,7 +1611,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); } else { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) @@ -1619,6 +1620,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } From edb42cad3392ba6a1cd3a688cfb7af16d6b2137f Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 2 Jul 2013 10:57:33 -0700 Subject: [PATCH 10/80] ip_tunnels: Use skb-len to PMTU check. [ Upstream commit 23a3647bc4f93bac3776c66dc2c7f7f68b3cd662 ] In path mtu check, ip header total length works for gre device but not for gre-tap device. Use skb len which is consistent for all tunneling types. This is old bug in gre. This also fixes mtu calculation bug introduced by commit c54419321455631079c7d (GRE: Refactor GRE tunneling code). Reported-by: Timo Teras Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 97 ++++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 43 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 7fa8f08fa7ae..d05bd02886ed 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -486,6 +486,53 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); +static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + struct rtable *rt, __be16 df) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int pkt_size = skb->len - tunnel->hlen; + int mtu; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr) - tunnel->hlen; + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && + (df & htons(IP_DF)) && mtu < pkt_size) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + return -E2BIG; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < pkt_size) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -E2BIG; + } + } +#endif + return 0; +} + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params) { @@ -499,7 +546,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int mtu; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -579,50 +625,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - df = tnl_params->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (inner_iph->frag_off&htons(IP_DF)); - - if (!skb_is_gso(skb) && - (inner_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(inner_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + ip_rt_put(rt); + goto tx_error; } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && - mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - - if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && - mtu < skb->len) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } - } -#endif if (tunnel->err_count > 0) { if (time_before(jiffies, @@ -646,6 +653,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP)) + df |= (inner_iph->frag_off&htons(IP_DF)); + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) + rt->dst.header_len; if (max_headroom > dev->needed_headroom) { From ed7f614ab8c605aadfa163d468a980642b3929da Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 2 Jul 2013 09:02:07 +0800 Subject: [PATCH 11/80] l2tp: add missing .owner to struct pppox_proto [ Upstream commit e1558a93b61962710733dc8c11a2bc765607f1cd ] Add missing .owner of struct pppox_proto. This prevents the module from being removed from underneath its users. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/l2tp/l2tp_ppp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 8dec6876dc50..5ebee2ded9e9 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -1793,7 +1793,8 @@ static const struct proto_ops pppol2tp_ops = { static const struct pppox_proto pppol2tp_proto = { .create = pppol2tp_create, - .ioctl = pppol2tp_ioctl + .ioctl = pppol2tp_ioctl, + .owner = THIS_MODULE, }; #ifdef CONFIG_L2TP_V3 From 0e7eadef8d9f5c3ebb664afcbaaad031321bfa58 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 2 Jul 2013 14:49:34 +0800 Subject: [PATCH 12/80] ipip: fix a regression in ioctl [ Upstream commit 3b7b514f44bff05d26a6499c4d4fac2a83938e6e ] This is a regression introduced by commit fd58156e456d9f68fe0448 (IPIP: Use ip-tunneling code.) Similar to GRE tunnel, previously we only check the parameters for SIOCADDTUNNEL and SIOCCHGTUNNEL, after that commit, the check is moved for all commands. So, just check for SIOCADDTUNNEL and SIOCCHGTUNNEL. Also, the check for i_key, o_key etc. is suspicious too, which did not exist before, reset them before passing to ip_tunnel_ioctl(). Signed-off-by: Cong Wang Cc: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipip.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe5..7cfc45624b6d 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -240,11 +240,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - return -EINVAL; - if (p.i_key || p.o_key || p.i_flags || p.o_flags) - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + } + + p.i_key = p.o_key = p.i_flags = p.o_flags = 0; if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); From 07243c3d963dc56facf2512f51783448f5d9cf6f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 1 Jul 2013 20:21:30 +0200 Subject: [PATCH 13/80] ipv6: call udp_push_pending_frames when uncorking a socket with AF_INET pending data [ Upstream commit 8822b64a0fa64a5dd1dfcf837c5b0be83f8c05d1 ] We accidentally call down to ip6_push_pending_frames when uncorking pending AF_INET data on a ipv6 socket. This results in the following splat (from Dave Jones): skbuff: skb_under_panic: text:ffffffff816765f6 len:48 put:40 head:ffff88013deb6df0 data:ffff88013deb6dec tail:0x2c end:0xc0 dev: ------------[ cut here ]------------ kernel BUG at net/core/skbuff.c:126! invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC Modules linked in: dccp_ipv4 dccp 8021q garp bridge stp dlci mpoa snd_seq_dummy sctp fuse hidp tun bnep nfnetlink scsi_transport_iscsi rfcomm can_raw can_bcm af_802154 appletalk caif_socket can caif ipt_ULOG x25 rose af_key pppoe pppox ipx phonet irda llc2 ppp_generic slhc p8023 psnap p8022 llc crc_ccitt atm bluetooth +netrom ax25 nfc rfkill rds af_rxrpc coretemp hwmon kvm_intel kvm crc32c_intel snd_hda_codec_realtek ghash_clmulni_intel microcode pcspkr snd_hda_codec_hdmi snd_hda_intel snd_hda_codec snd_hwdep usb_debug snd_seq snd_seq_device snd_pcm e1000e snd_page_alloc snd_timer ptp snd pps_core soundcore xfs libcrc32c CPU: 2 PID: 8095 Comm: trinity-child2 Not tainted 3.10.0-rc7+ #37 task: ffff8801f52c2520 ti: ffff8801e6430000 task.ti: ffff8801e6430000 RIP: 0010:[] [] skb_panic+0x63/0x65 RSP: 0018:ffff8801e6431de8 EFLAGS: 00010282 RAX: 0000000000000086 RBX: ffff8802353d3cc0 RCX: 0000000000000006 RDX: 0000000000003b90 RSI: ffff8801f52c2ca0 RDI: ffff8801f52c2520 RBP: ffff8801e6431e08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff88022ea0c800 R13: ffff88022ea0cdf8 R14: ffff8802353ecb40 R15: ffffffff81cc7800 FS: 00007f5720a10740(0000) GS:ffff880244c00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000005862000 CR3: 000000022843c000 CR4: 00000000001407e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 Stack: ffff88013deb6dec 000000000000002c 00000000000000c0 ffffffff81a3f6e4 ffff8801e6431e18 ffffffff8159a9aa ffff8801e6431e90 ffffffff816765f6 ffffffff810b756b 0000000700000002 ffff8801e6431e40 0000fea9292aa8c0 Call Trace: [] skb_push+0x3a/0x40 [] ip6_push_pending_frames+0x1f6/0x4d0 [] ? mark_held_locks+0xbb/0x140 [] udp_v6_push_pending_frames+0x2b9/0x3d0 [] ? udplite_getfrag+0x20/0x20 [] udp_lib_setsockopt+0x1aa/0x1f0 [] ? fget_light+0x387/0x4f0 [] udpv6_setsockopt+0x34/0x40 [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xd0 [] tracesys+0xdd/0xe2 Code: 00 00 48 89 44 24 10 8b 87 d8 00 00 00 48 89 44 24 08 48 8b 87 e8 00 00 00 48 c7 c7 c0 04 aa 81 48 89 04 24 31 c0 e8 e1 7e ff ff <0f> 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 48 89 e5 0f 0b 55 RIP [] skb_panic+0x63/0x65 RSP This patch adds a check if the pending data is of address family AF_INET and directly calls udp_push_ending_frames from udp_v6_push_pending_frames if that is the case. This bug was found by Dave Jones with trinity. (Also move the initialization of fl6 below the AF_INET check, even if not strictly necessary.) Signed-off-by: Hannes Frederic Sowa Cc: Dave Jones Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/udp.h | 1 + net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 7 ++++++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 065f379c6503..ad99eedc6168 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -181,6 +181,7 @@ extern int udp_get_port(struct sock *sk, unsigned short snum, extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); +extern int udp_push_pending_frames(struct sock *sk); extern void udp_flush_pending_frames(struct sock *sk); extern int udp_rcv(struct sk_buff *skb); extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..93b731d53221 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -799,7 +799,7 @@ send: /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -818,6 +818,7 @@ out: up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..e7b28f9bb02b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -955,11 +955,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl6 = &inet->cork.fl.u.ip6; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; From 7852c5bf367a01828edd02974c0a185b1be7b577 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 2 Jul 2013 08:04:05 +0200 Subject: [PATCH 14/80] ipv6: ip6_append_data_mtu did not care about pmtudisc and frag_size [ Upstream commit 75a493e60ac4bbe2e977e7129d6d8cbb0dd236be ] If the socket had an IPV6_MTU value set, ip6_append_data_mtu lost track of this when appending the second frame on a corked socket. This results in the following splat: [37598.993962] ------------[ cut here ]------------ [37598.994008] kernel BUG at net/core/skbuff.c:2064! [37598.994008] invalid opcode: 0000 [#1] SMP [37598.994008] Modules linked in: tcp_lp uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_core videodev media vfat fat usb_storage fuse ebtable_nat xt_CHECKSUM bridge stp llc ipt_MASQUERADE nf_conntrack_netbios_ns nf_conntrack_broadcast ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat +nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i cxgb3 mdio libcxgbi ib_iser rdma_cm ib_addr iw_cm ib_cm ib_sa ib_mad ib_core iscsi_tcp libiscsi_tcp libiscsi +scsi_transport_iscsi rfcomm bnep iTCO_wdt iTCO_vendor_support snd_hda_codec_conexant arc4 iwldvm mac80211 snd_hda_intel acpi_cpufreq mperf coretemp snd_hda_codec microcode cdc_wdm cdc_acm [37598.994008] snd_hwdep cdc_ether snd_seq snd_seq_device usbnet mii joydev btusb snd_pcm bluetooth i2c_i801 e1000e lpc_ich mfd_core ptp iwlwifi pps_core snd_page_alloc mei cfg80211 snd_timer thinkpad_acpi snd tpm_tis soundcore rfkill tpm tpm_bios vhost_net tun macvtap macvlan kvm_intel kvm uinput binfmt_misc +dm_crypt i915 i2c_algo_bit drm_kms_helper drm i2c_core wmi video [37598.994008] CPU 0 [37598.994008] Pid: 27320, comm: t2 Not tainted 3.9.6-200.fc18.x86_64 #1 LENOVO 27744PG/27744PG [37598.994008] RIP: 0010:[] [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP: 0018:ffff88003670da18 EFLAGS: 00010202 [37598.994008] RAX: ffff88018105c018 RBX: 0000000000000004 RCX: 00000000000006c0 [37598.994008] RDX: ffff88018105a6c0 RSI: ffff88018105a000 RDI: ffff8801e1b0aa00 [37598.994008] RBP: ffff88003670da78 R08: 0000000000000000 R09: ffff88018105c040 [37598.994008] R10: ffff8801e1b0aa00 R11: 0000000000000000 R12: 000000000000fff8 [37598.994008] R13: 00000000000004fc R14: 00000000ffff0504 R15: 0000000000000000 [37598.994008] FS: 00007f28eea59740(0000) GS:ffff88023bc00000(0000) knlGS:0000000000000000 [37598.994008] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [37598.994008] CR2: 0000003d935789e0 CR3: 00000000365cb000 CR4: 00000000000407f0 [37598.994008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [37598.994008] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [37598.994008] Process t2 (pid: 27320, threadinfo ffff88003670c000, task ffff88022c162ee0) [37598.994008] Stack: [37598.994008] ffff88022e098a00 ffff88020f973fc0 0000000000000008 00000000000004c8 [37598.994008] ffff88020f973fc0 00000000000004c4 ffff88003670da78 ffff8801e1b0a200 [37598.994008] 0000000000000018 00000000000004c8 ffff88020f973fc0 00000000000004c4 [37598.994008] Call Trace: [37598.994008] [] ip6_append_data+0xccf/0xfe0 [37598.994008] [] ? ip_copy_metadata+0x1a0/0x1a0 [37598.994008] [] ? _raw_spin_lock_bh+0x16/0x40 [37598.994008] [] udpv6_sendmsg+0x1ed/0xc10 [37598.994008] [] ? sock_has_perm+0x75/0x90 [37598.994008] [] inet_sendmsg+0x63/0xb0 [37598.994008] [] ? selinux_socket_sendmsg+0x23/0x30 [37598.994008] [] sock_sendmsg+0xb0/0xe0 [37598.994008] [] ? __switch_to+0x181/0x4a0 [37598.994008] [] sys_sendto+0x12d/0x180 [37598.994008] [] ? __audit_syscall_entry+0x94/0xf0 [37598.994008] [] ? syscall_trace_enter+0x231/0x240 [37598.994008] [] tracesys+0xdd/0xe2 [37598.994008] Code: fe 07 00 00 48 c7 c7 04 28 a6 81 89 45 a0 4c 89 4d b8 44 89 5d a8 e8 1b ac b1 ff 44 8b 5d a8 4c 8b 4d b8 8b 45 a0 e9 cf fe ff ff <0f> 0b 66 0f 1f 84 00 00 00 00 00 66 66 66 66 90 55 48 89 e5 48 [37598.994008] RIP [] skb_copy_and_csum_bits+0x325/0x330 [37598.994008] RSP [37599.007323] ---[ end trace d69f6a17f8ac8eee ]--- While there, also check if path mtu discovery is activated for this socket. The logic was adapted from ip6_append_data when first writing on the corked socket. This bug was introduced with commit 0c1833797a5a6ec23ea9261d979aa18078720b74 ("ipv6: fix incorrect ipsec fragment"). v2: a) Replace IPV6_PMTU_DISC_DO with IPV6_PMTUDISC_PROBE. b) Don't pass ipv6_pinfo to ip6_append_data_mtu (suggestion by Gao feng, thanks!). c) Change mtu to unsigned int, else we get a warning about non-matching types because of the min()-macro type-check. Acked-by: Gao feng Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_output.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d5d20cde8d92..6e3ddf806ec2 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1098,11 +1098,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } -static void ip6_append_data_mtu(int *mtu, +static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, - struct rt6_info *rt) + struct rt6_info *rt, + bool pmtuprobe) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { @@ -1114,7 +1115,9 @@ static void ip6_append_data_mtu(int *mtu, * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = dst_mtu(rt->dst.path); + *mtu = min(*mtu, pmtuprobe ? + rt->dst.dev->mtu : + dst_mtu(rt->dst.path)); } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1131,11 +1134,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, mtu; int exthdrlen; int dst_exthdrlen; int hh_len; - int mtu; int copy; int err; int offset = 0; @@ -1292,7 +1294,9 @@ alloc_new_skb: /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, - fragheaderlen, skb, rt); + fragheaderlen, skb, rt, + np->pmtudisc == + IPV6_PMTUDISC_PROBE); skb_prev = skb; From c3a5491228f90fd82e67b7ccd2fcbaf984d44c07 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 3 Jul 2013 20:45:04 +0200 Subject: [PATCH 15/80] ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available [ Upstream commit 3630d40067a21d4dfbadc6002bb469ce26ac5d52 ] After the removal of rt->n we do not create a neighbour entry at route insertion time (rt6_bind_neighbour is gone). As long as no neighbour is created because of "useful traffic" we skip this routing entry because rt6_check_neigh cannot pick up a valid neighbour (neigh == NULL) and thus returns false. This change was introduced by commit 887c95cc1da53f66a5890fdeab13414613010097 ("ipv6: Complete neighbour entry removal from dst_entry.") To quote RFC4191: "If the host has no information about the router's reachability, then the host assumes the router is reachable." and also: "A host MUST NOT probe a router's reachability in the absence of useful traffic that the host would have sent to the router if it were reachable." So, just assume the router is reachable and let's rt6_probe do the rest. We don't need to create a neighbour on route insertion time. If we don't compile with CONFIG_IPV6_ROUTER_PREF (RFC4191 support) a neighbour is only valid if its nud_state is NUD_VALID. I did not find any references that we should probe the router on route insertion time via the other RFCs. So skip this route in that case. v2: a) use IS_ENABLED instead of #ifdefs (thanks to Sergei Shtylyov) Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86a..7f1332fc4346 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -547,6 +547,8 @@ static inline bool rt6_check_neigh(struct rt6_info *rt) ret = true; #endif read_unlock(&neigh->lock); + } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + ret = true; } rcu_read_unlock_bh(); From 1c6d3d1d21e522c9722cd6bdfd57afb171786028 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 4 Jul 2013 23:48:46 +0100 Subject: [PATCH 16/80] sfc: Fix memory leak when discarding scattered packets [ Upstream commit 734d4e159b283a4ae4d007b7e7a91d84398ccb92 ] Commit 2768935a4660 ('sfc: reuse pages to avoid DMA mapping/unmapping costs') did not fully take account of DMA scattering which was introduced immediately before. If a received packet is invalid and must be discarded, we only drop a reference to the first buffer's page, but we need to drop a reference for each buffer the packet used. I think this bug was missed partly because efx_recycle_rx_buffers() was not renamed and so no longer does what its name says. It does not change the state of buffers, but only prepares the underlying pages for recycling. Rename it accordingly. Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/rx.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index a7dfe36cabf4..5173eaac5bca 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -282,9 +282,9 @@ static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue, } /* Recycle the pages that are used by buffers that have just been received. */ -static void efx_recycle_rx_buffers(struct efx_channel *channel, - struct efx_rx_buffer *rx_buf, - unsigned int n_frags) +static void efx_recycle_rx_pages(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) { struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); @@ -294,6 +294,20 @@ static void efx_recycle_rx_buffers(struct efx_channel *channel, } while (--n_frags); } +static void efx_discard_rx_packet(struct efx_channel *channel, + struct efx_rx_buffer *rx_buf, + unsigned int n_frags) +{ + struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + + efx_recycle_rx_pages(channel, rx_buf, n_frags); + + do { + efx_free_rx_buffer(rx_buf); + rx_buf = efx_rx_buf_next(rx_queue, rx_buf); + } while (--n_frags); +} + /** * efx_fast_push_rx_descriptors - push new RX descriptors quickly * @rx_queue: RX descriptor queue @@ -533,8 +547,7 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, */ if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { efx_rx_flush_packet(channel); - put_page(rx_buf->page); - efx_recycle_rx_buffers(channel, rx_buf, n_frags); + efx_discard_rx_packet(channel, rx_buf, n_frags); return; } @@ -570,9 +583,9 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); } - /* All fragments have been DMA-synced, so recycle buffers and pages. */ + /* All fragments have been DMA-synced, so recycle pages. */ rx_buf = efx_rx_buffer(rx_queue, index); - efx_recycle_rx_buffers(channel, rx_buf, n_frags); + efx_recycle_rx_pages(channel, rx_buf, n_frags); /* Pipeline receives so that we give time for packet headers to be * prefetched into cache. From d1a2a1a6efee375e9ac00ec0a718503fd05b4302 Mon Sep 17 00:00:00 2001 From: Jongsung Kim Date: Tue, 9 Jul 2013 17:36:00 +0900 Subject: [PATCH 17/80] net/cadence/macb: fix bug/typo in extracting gem_irq_read_clear bit [ Upstream commit 01276ed2424eb78c95461545410923d5da154d31 ] Signed-off-by: Jongsung Kim Acked-by: Nicolas Ferre Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cadence/macb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index c89aa41dd448..b4e0dc832c69 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -1070,7 +1070,7 @@ static void macb_configure_dma(struct macb *bp) static void macb_configure_caps(struct macb *bp) { if (macb_is_gem(bp)) { - if (GEM_BF(IRQCOR, gem_readl(bp, DCFG1)) == 0) + if (GEM_BFEXT(IRQCOR, gem_readl(bp, DCFG1)) == 0) bp->caps |= MACB_CAPS_ISR_CLEAR_ON_WRITE; } } From c23b1ece6112ed0f227fdc881db33c6427b65222 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 9 Jul 2013 13:19:18 +0300 Subject: [PATCH 18/80] virtio: support unlocked queue poll [ Upstream commit cc229884d3f77ec3b1240e467e0236c3e0647c0c ] This adds a way to check ring empty state after enable_cb outside any locks. Will be used by virtio_net. Note: there's room for more optimization: caller is likely to have a memory barrier already, which means we might be able to get rid of a barrier here. Deferring this optimization until we do some benchmarking. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/virtio/virtio_ring.c | 70 ++++++++++++++++++++++++++---------- include/linux/virtio.h | 4 +++ 2 files changed, 55 insertions(+), 19 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 5217baf5528c..37d58f84dc50 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -606,6 +606,55 @@ void virtqueue_disable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_disable_cb); +/** + * virtqueue_enable_cb_prepare - restart callbacks after disable_cb + * @vq: the struct virtqueue we're talking about. + * + * This re-enables callbacks; it returns current queue state + * in an opaque unsigned value. This value should be later tested by + * virtqueue_poll, to detect a possible race between the driver checking for + * more work, and enabling callbacks. + * + * Caller must ensure we don't call this with other virtqueue + * operations at the same time (except where noted). + */ +unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 last_used_idx; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx; + END_USE(vq); + return last_used_idx; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare); + +/** + * virtqueue_poll - query pending used buffers + * @vq: the struct virtqueue we're talking about. + * @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare). + * + * Returns "true" if there are pending used buffers in the queue. + * + * This does not need to be serialized. + */ +bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + + virtio_mb(vq->weak_barriers); + return (u16)last_used_idx != vq->vring.used->idx; +} +EXPORT_SYMBOL_GPL(virtqueue_poll); + /** * virtqueue_enable_cb - restart callbacks after disable_cb. * @vq: the struct virtqueue we're talking about. @@ -619,25 +668,8 @@ EXPORT_SYMBOL_GPL(virtqueue_disable_cb); */ bool virtqueue_enable_cb(struct virtqueue *_vq) { - struct vring_virtqueue *vq = to_vvq(_vq); - - START_USE(vq); - - /* We optimistically turn back on interrupts, then check if there was - * more to do. */ - /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to - * either clear the flags bit or point the event index at the next - * entry. Always do both to keep code simple. */ - vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; - vring_used_event(&vq->vring) = vq->last_used_idx; - virtio_mb(vq->weak_barriers); - if (unlikely(more_used(vq))) { - END_USE(vq); - return false; - } - - END_USE(vq); - return true; + unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq); + return !virtqueue_poll(_vq, last_used_idx); } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 9ff8645b7e0b..72398eea6e86 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -70,6 +70,10 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +unsigned virtqueue_enable_cb_prepare(struct virtqueue *vq); + +bool virtqueue_poll(struct virtqueue *vq, unsigned); + bool virtqueue_enable_cb_delayed(struct virtqueue *vq); void *virtqueue_detach_unused_buf(struct virtqueue *vq); From 2b0e8a4ff83fd85afe5d0b84a2e1c6faa5fa56b8 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 9 Jul 2013 08:13:04 +0300 Subject: [PATCH 19/80] virtio_net: fix race in RX VQ processing [ Upstream commit cbdadbbf0c790f79350a8f36029208944c5487d0 ] virtio net called virtqueue_enable_cq on RX path after napi_complete, so with NAPI_STATE_SCHED clear - outside the implicit napi lock. This violates the requirement to synchronize virtqueue_enable_cq wrt virtqueue_add_buf. In particular, used event can move backwards, causing us to lose interrupts. In a debug build, this can trigger panic within START_USE. Jason Wang reports that he can trigger the races artificially, by adding udelay() in virtqueue_enable_cb() after virtio_mb(). However, we must call napi_complete to clear NAPI_STATE_SCHED before polling the virtqueue for used buffers, otherwise napi_schedule_prep in a callback will fail, causing us to lose RX events. To fix, call virtqueue_enable_cb_prepare with NAPI_STATE_SCHED set (under napi lock), later call virtqueue_poll with NAPI_STATE_SCHED clear (outside the lock). Reported-by: Jason Wang Tested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/virtio_net.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c9e00387d999..42d670a468f8 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -602,7 +602,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget) container_of(napi, struct receive_queue, napi); struct virtnet_info *vi = rq->vq->vdev->priv; void *buf; - unsigned int len, received = 0; + unsigned int r, len, received = 0; again: while (received < budget && @@ -619,8 +619,9 @@ again: /* Out of packets? */ if (received < budget) { + r = virtqueue_enable_cb_prepare(rq->vq); napi_complete(napi); - if (unlikely(!virtqueue_enable_cb(rq->vq)) && + if (unlikely(virtqueue_poll(rq->vq, r)) && napi_schedule_prep(napi)) { virtqueue_disable_cb(rq->vq); __napi_schedule(napi); From f5ce1d2513b74d4603769ed99636dc52144f02c4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 25 Jun 2013 17:29:46 +0300 Subject: [PATCH 20/80] vhost-net: fix use-after-free in vhost_net_flush [ Upstream commit c38e39c378f46f00ce922dd40a91043a9925c28d ] vhost_net_ubuf_put_and_wait has a confusing name: it will actually also free it's argument. Thus since commit 1280c27f8e29acf4af2da914e80ec27c3dbd5c01 "vhost-net: flush outstanding DMAs on memory change" vhost_net_flush tries to use the argument after passing it to vhost_net_ubuf_put_and_wait, this results in use after free. To fix, don't free the argument in vhost_net_ubuf_put_and_wait, add an new API for callers that want to free ubufs. Acked-by: Asias He Acked-by: Jason Wang Signed-off-by: Michael S. Tsirkin Signed-off-by: Greg Kroah-Hartman --- drivers/vhost/net.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index f80d3dd41d8c..8ca5ac71b845 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -150,6 +150,11 @@ static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) { kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); +} + +static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) +{ + vhost_net_ubuf_put_and_wait(ubufs); kfree(ubufs); } @@ -948,7 +953,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) mutex_unlock(&vq->mutex); if (oldubufs) { - vhost_net_ubuf_put_and_wait(oldubufs); + vhost_net_ubuf_put_wait_and_free(oldubufs); mutex_lock(&vq->mutex); vhost_zerocopy_signal_used(n, vq); mutex_unlock(&vq->mutex); @@ -966,7 +971,7 @@ err_used: rcu_assign_pointer(vq->private_data, oldsock); vhost_net_enable_vq(n, vq); if (ubufs) - vhost_net_ubuf_put_and_wait(ubufs); + vhost_net_ubuf_put_wait_and_free(ubufs); err_ubufs: fput(sock->file); err_vq: From c51a7a308e2ff38ae5ad3005a7ce8c333779f8fd Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 1 Jul 2013 16:49:22 -0500 Subject: [PATCH 21/80] sunvnet: vnet_port_remove must call unregister_netdev [ Upstream commit aabb9875d02559ab9b928cd6f259a5cc4c21a589 ] The missing call to unregister_netdev() leaves the interface active after the driver is unloaded by rmmod. Signed-off-by: Dave Kleikamp Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sun/sunvnet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 1df0ff3839e8..3df56840a3b9 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -1239,6 +1239,8 @@ static int vnet_port_remove(struct vio_dev *vdev) dev_set_drvdata(&vdev->dev, NULL); kfree(port); + + unregister_netdev(vp->dev); } return 0; } From 6683151aeea6c7aade12dde7ec2008fc380f0728 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Wed, 10 Jul 2013 12:04:02 +0800 Subject: [PATCH 22/80] ifb: fix rcu_sched self-detected stalls [ Upstream commit 440d57bc5ff55ec1efb3efc9cbe9420b4bbdfefa ] According to the commit 16b0dc29c1af9df341428f4c49ada4f626258082 (dummy: fix rcu_sched self-detected stalls) Eric Dumazet fix the problem in dummy, but the ifb will occur the same problem like the dummy modules. Trying to "modprobe ifb numifbs=30000" triggers : INFO: rcu_sched self-detected stall on CPU After this splat, RTNL is locked and reboot is needed. We must call cond_resched() to avoid this, even holding RTNL. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ifb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index dc9f6a45515d..a11f7a45cb5f 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -292,8 +292,10 @@ static int __init ifb_init_module(void) rtnl_lock(); err = __rtnl_link_register(&ifb_link_ops); - for (i = 0; i < numifbs && !err; i++) + for (i = 0; i < numifbs && !err; i++) { err = ifb_init_one(i); + cond_resched(); + } if (err) __rtnl_link_unregister(&ifb_link_ops); rtnl_unlock(); From d09ec76aebab878e0dc583484152e20a2172a7b4 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Jul 2013 13:43:27 +0800 Subject: [PATCH 23/80] tuntap: correctly linearize skb when zerocopy is used [ Upstream commit 3dd5c3308e8b671e8e8882ba972f51cefbe9fd0d ] Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to linearize parts of the skb to let the rest of iov to be fit in the frags, we need count copylen into linear when calling tun_alloc_skb() instead of partly counting it into data_len. Since this breaks zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should be zero at beginning. This cause nr_frags to be increased wrongly without setting the correct frags. This bug were introduced from 0690899b4d4501b3505be069b9a687e68ccbe15b (tun: experimental zero copy tx support) Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9c61f8734a40..c3cb60b1d1a2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1044,7 +1044,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, { struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; struct sk_buff *skb; - size_t len = total_len, align = NET_SKB_PAD; + size_t len = total_len, align = NET_SKB_PAD, linear; struct virtio_net_hdr gso = { 0 }; int offset = 0; int copylen; @@ -1108,10 +1108,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, copylen = gso.hdr_len; if (!copylen) copylen = GOODCOPY_LEN; - } else + linear = copylen; + } else { copylen = len; + linear = gso.hdr_len; + } - skb = tun_alloc_skb(tfile, align, copylen, gso.hdr_len, noblock); + skb = tun_alloc_skb(tfile, align, copylen, linear, noblock); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EAGAIN) tun->dev->stats.rx_dropped++; From bd31fdd2f48f4e42c6a19d24fa85e59eb2ccf583 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Jul 2013 13:43:28 +0800 Subject: [PATCH 24/80] macvtap: correctly linearize skb when zerocopy is used [ Upstream commit 61d46bf979d5cd7c164709a80ad5676a35494aae ] Userspace may produce vectors greater than MAX_SKB_FRAGS. When we try to linearize parts of the skb to let the rest of iov to be fit in the frags, we need count copylen into linear when calling macvtap_alloc_skb() instead of partly counting it into data_len. Since this breaks zerocopy_sg_from_iovec() since its inner counter assumes nr_frags should be zero at beginning. This cause nr_frags to be increased wrongly without setting the correct frags. This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73 (macvtap: zerocopy: validate vectors before building skb). Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index b6dd6a75919a..502d9486395f 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -647,6 +647,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, int vnet_hdr_len = 0; int copylen = 0; bool zerocopy = false; + size_t linear; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = q->vnet_hdr_sz; @@ -701,11 +702,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, copylen = vnet_hdr.hdr_len; if (!copylen) copylen = GOODCOPY_LEN; - } else + linear = copylen; + } else { copylen = len; + linear = vnet_hdr.hdr_len; + } skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen, - vnet_hdr.hdr_len, noblock, &err); + linear, noblock, &err); if (!skb) goto err; From a025e28ad81b611a908bf234579b2fc58efcb371 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 10 Jul 2013 23:00:57 +0200 Subject: [PATCH 25/80] ipv6: in case of link failure remove route directly instead of letting it expire [ Upstream commit 1eb4f758286884e7566627164bca4c4a16952a83 ] We could end up expiring a route which is part of an ecmp route set. Doing so would invalidate the rt->rt6i_nsiblings calculations and could provoke the following panic: [ 80.144667] ------------[ cut here ]------------ [ 80.145172] kernel BUG at net/ipv6/ip6_fib.c:733! [ 80.145172] invalid opcode: 0000 [#1] SMP [ 80.145172] Modules linked in: 8021q nf_conntrack_netbios_ns nf_conntrack_broadcast ipt_MASQUERADE ip6table_mangle ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 iptable_nat nf_nat_ipv4 nf_nat iptable_mangle nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables +snd_hda_intel snd_hda_codec snd_hwdep snd_seq snd_seq_device snd_pcm snd_page_alloc snd_timer virtio_balloon snd soundcore i2c_piix4 i2c_core virtio_net virtio_blk [ 80.145172] CPU: 1 PID: 786 Comm: ping6 Not tainted 3.10.0+ #118 [ 80.145172] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 80.145172] task: ffff880117fa0000 ti: ffff880118770000 task.ti: ffff880118770000 [ 80.145172] RIP: 0010:[] [] fib6_add+0x75d/0x830 [ 80.145172] RSP: 0018:ffff880118771798 EFLAGS: 00010202 [ 80.145172] RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88011350e480 [ 80.145172] RDX: ffff88011350e238 RSI: 0000000000000004 RDI: ffff88011350f738 [ 80.145172] RBP: ffff880118771848 R08: ffff880117903280 R09: 0000000000000001 [ 80.145172] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88011350f680 [ 80.145172] R13: ffff880117903280 R14: ffff880118771890 R15: ffff88011350ef90 [ 80.145172] FS: 00007f02b5127740(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 [ 80.145172] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 80.145172] CR2: 00007f981322a000 CR3: 00000001181b1000 CR4: 00000000000006e0 [ 80.145172] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 80.145172] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 80.145172] Stack: [ 80.145172] 0000000000000001 ffff880100000000 ffff880100000000 ffff880117903280 [ 80.145172] 0000000000000000 ffff880119a4cf00 0000000000000400 00000000000007fa [ 80.145172] 0000000000000000 0000000000000000 0000000000000000 ffff88011350f680 [ 80.145172] Call Trace: [ 80.145172] [] ? rt6_bind_peer+0x4b/0x90 [ 80.145172] [] __ip6_ins_rt+0x45/0x70 [ 80.145172] [] ip6_ins_rt+0x35/0x40 [ 80.145172] [] ip6_pol_route.isra.44+0x3a4/0x4b0 [ 80.145172] [] ip6_pol_route_output+0x2a/0x30 [ 80.145172] [] fib6_rule_action+0xd7/0x210 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] fib_rules_lookup+0xc6/0x140 [ 80.145172] [] fib6_rule_lookup+0x44/0x80 [ 80.145172] [] ? ip6_pol_route_input+0x30/0x30 [ 80.145172] [] ip6_route_output+0x73/0xb0 [ 80.145172] [] ip6_dst_lookup_tail+0x2c3/0x2e0 [ 80.145172] [] ? list_del+0x11/0x40 [ 80.145172] [] ? remove_wait_queue+0x3c/0x50 [ 80.145172] [] ip6_dst_lookup_flow+0x3d/0xa0 [ 80.145172] [] rawv6_sendmsg+0x267/0xc20 [ 80.145172] [] inet_sendmsg+0x63/0xb0 [ 80.145172] [] ? selinux_socket_sendmsg+0x23/0x30 [ 80.145172] [] sock_sendmsg+0xa6/0xd0 [ 80.145172] [] SYSC_sendto+0x128/0x180 [ 80.145172] [] ? update_curr+0xec/0x170 [ 80.145172] [] ? kvm_clock_get_cycles+0x9/0x10 [ 80.145172] [] ? __getnstimeofday+0x3e/0xd0 [ 80.145172] [] SyS_sendto+0xe/0x10 [ 80.145172] [] system_call_fastpath+0x16/0x1b [ 80.145172] Code: fe ff ff 41 f6 45 2a 06 0f 85 ca fe ff ff 49 8b 7e 08 4c 89 ee e8 94 ef ff ff e9 b9 fe ff ff 48 8b 82 28 05 00 00 e9 01 ff ff ff <0f> 0b 49 8b 54 24 30 0d 00 00 40 00 89 83 14 01 00 00 48 89 53 [ 80.145172] RIP [] fib6_add+0x75d/0x830 [ 80.145172] RSP [ 80.387413] ---[ end trace 02f20b7a8b81ed95 ]--- [ 80.390154] Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Hannes Frederic Sowa Cc: Nicolas Dichtel Cc: YOSHIFUJI Hideaki Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 7f1332fc4346..262d6d8c7e89 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1076,10 +1076,13 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) - rt6_update_expires(rt, 0); - else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; + } } } From 53effe1697676189395fc9f8a02c749e19cc9daa Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 11 Jul 2013 13:16:54 -0400 Subject: [PATCH 26/80] 9p: fix off by one causing access violations and memory corruption [ Upstream commit 110ecd69a9feea82a152bbf9b12aba57e6396883 ] p9_release_pages() would attempt to dereference one value past the end of pages[]. This would cause the following crashes: [ 6293.171817] BUG: unable to handle kernel paging request at ffff8807c96f3000 [ 6293.174146] IP: [] p9_release_pages+0x3b/0x60 [ 6293.176447] PGD 79c5067 PUD 82c1e3067 PMD 82c197067 PTE 80000007c96f3060 [ 6293.180060] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 6293.180060] Modules linked in: [ 6293.180060] CPU: 62 PID: 174043 Comm: modprobe Tainted: G W 3.10.0-next-20130710-sasha #3954 [ 6293.180060] task: ffff8807b803b000 ti: ffff880787dde000 task.ti: ffff880787dde000 [ 6293.180060] RIP: 0010:[] [] p9_release_pages+0x3b/0x60 [ 6293.214316] RSP: 0000:ffff880787ddfc28 EFLAGS: 00010202 [ 6293.214316] RAX: 0000000000000001 RBX: ffff8807c96f2ff8 RCX: 0000000000000000 [ 6293.222017] RDX: ffff8807b803b000 RSI: 0000000000000001 RDI: ffffea001c7e3d40 [ 6293.222017] RBP: ffff880787ddfc48 R08: 0000000000000000 R09: 0000000000000000 [ 6293.222017] R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001 [ 6293.222017] R13: 0000000000000001 R14: ffff8807cc50c070 R15: ffff8807cc50c070 [ 6293.222017] FS: 00007f572641d700(0000) GS:ffff8807f3600000(0000) knlGS:0000000000000000 [ 6293.256784] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 6293.256784] CR2: ffff8807c96f3000 CR3: 00000007c8e81000 CR4: 00000000000006e0 [ 6293.256784] Stack: [ 6293.256784] ffff880787ddfcc8 ffff880787ddfcc8 0000000000000000 ffff880787ddfcc8 [ 6293.256784] ffff880787ddfd48 ffffffff84128be8 ffff880700000002 0000000000000001 [ 6293.256784] ffff8807b803b000 ffff880787ddfce0 0000100000000000 0000000000000000 [ 6293.256784] Call Trace: [ 6293.256784] [] p9_virtio_zc_request+0x598/0x630 [ 6293.256784] [] ? wake_up_bit+0x40/0x40 [ 6293.256784] [] p9_client_zc_rpc+0x111/0x3a0 [ 6293.256784] [] ? sched_clock_cpu+0x108/0x120 [ 6293.256784] [] p9_client_read+0xe1/0x2c0 [ 6293.256784] [] v9fs_file_read+0x90/0xc0 [ 6293.256784] [] vfs_read+0xc3/0x130 [ 6293.256784] [] ? trace_hardirqs_on+0xd/0x10 [ 6293.256784] [] SyS_read+0x62/0xa0 [ 6293.256784] [] tracesys+0xdd/0xe2 [ 6293.256784] Code: 66 90 48 89 fb 41 89 f5 48 8b 3f 48 85 ff 74 29 85 f6 74 25 45 31 e4 66 0f 1f 84 00 00 00 00 00 e8 eb 14 12 fd 41 ff c4 49 63 c4 <48> 8b 3c c3 48 85 ff 74 05 45 39 e5 75 e7 48 83 c4 08 5b 41 5c [ 6293.256784] RIP [] p9_release_pages+0x3b/0x60 [ 6293.256784] RSP [ 6293.256784] CR2: ffff8807c96f3000 [ 6293.256784] ---[ end trace 50822ee72cd360fc ]--- Signed-off-by: Sasha Levin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/9p/trans_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index de8df957867d..2ee3879161b1 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -24,11 +24,11 @@ */ void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; - while (pages[i] && nr_pages--) { - put_page(pages[i]); - i++; - } + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } EXPORT_SYMBOL(p9_release_pages); From 61b6f1280da836ca67e6e4cfaefd65e989f6bfbc Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Thu, 11 Jul 2013 15:53:21 +0200 Subject: [PATCH 27/80] alx: fix lockdep annotation [ Upstream commit a8798a5c77c9981e88caef1373a3310bf8aed219 ] Move spin_lock_init to be called before the spinlocks are used, preventing a lockdep splat. Signed-off-by: Maarten Lankhorst Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/atheros/alx/main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 418de8b13165..d30085c2b454 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1303,6 +1303,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) SET_NETDEV_DEV(netdev, &pdev->dev); alx = netdev_priv(netdev); + spin_lock_init(&alx->hw.mdio_lock); + spin_lock_init(&alx->irq_lock); alx->dev = netdev; alx->hw.pdev = pdev; alx->msg_enable = NETIF_MSG_LINK | NETIF_MSG_HW | NETIF_MSG_IFUP | @@ -1385,9 +1387,6 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&alx->link_check_wk, alx_link_check); INIT_WORK(&alx->reset_wk, alx_reset); - spin_lock_init(&alx->hw.mdio_lock); - spin_lock_init(&alx->irq_lock); - netif_carrier_off(netdev); err = register_netdev(netdev); From b4f1489ed58cb6337c663d6eabd8be86c84bf9f1 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 11 Jul 2013 12:43:42 +0200 Subject: [PATCH 28/80] ipv6: fix route selection if kernel is not compiled with CONFIG_IPV6_ROUTER_PREF [ Upstream commit afc154e978de1eb11c555bc8bcec1552f75ebc43 ] This is a follow-up patch to 3630d40067a21d4dfbadc6002bb469ce26ac5d52 ("ipv6: rt6_check_neigh should successfully verify neigh if no NUD information are available"). Since the removal of rt->n in rt6_info we can end up with a dst == NULL in rt6_check_neigh. In case the kernel is not compiled with CONFIG_IPV6_ROUTER_PREF we should also select a route with unkown NUD state but we must not avoid doing round robin selection on routes with the same target. So introduce and pass down a boolean ``do_rr'' to indicate when we should update rt->rr_ptr. As soon as no route is valid we do backtracking and do a lookup on a higher level in the fib trie. v2: a) Improved rt6_check_neigh logic (no need to create neighbour there) and documented return values. v3: a) Introduce enum rt6_nud_state to get rid of the magic numbers (thanks to David Miller). b) Update and shorten commit message a bit to actualy reflect the source. Reported-by: Pierre Emeriaud Cc: YOSHIFUJI Hideaki Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 69 ++++++++++++++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 262d6d8c7e89..bacce6c08644 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -65,6 +65,12 @@ #include #endif +enum rt6_nud_state { + RT6_NUD_FAIL_HARD = -2, + RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_SUCCEED = 1 +}; + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); @@ -527,28 +533,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline bool rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - bool ret = false; + enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - return true; + return RT6_NUD_SUCCEED; rcu_read_lock_bh(); neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) - ret = true; + ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) - ret = true; + ret = RT6_NUD_SUCCEED; #endif read_unlock(&neigh->lock); - } else if (IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { - ret = true; + } else { + ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? + RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; } rcu_read_unlock_bh(); @@ -562,43 +569,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif, m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) - return -1; + return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) - return -1; + if (strict & RT6_LOOKUP_F_REACHABLE) { + int n = rt6_check_neigh(rt); + if (n < 0) + return n; + } return m; } static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match) + int *mpri, struct rt6_info *match, + bool *do_rr) { int m; + bool match_do_rr = false; if (rt6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); - if (m < 0) + if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + match_do_rr = true; + m = 0; /* lowest valid score */ + } else if (m < 0) { goto out; - - if (m > *mpri) { - if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(match); - *mpri = m; - match = rt; - } else if (strict & RT6_LOOKUP_F_REACHABLE) { - rt6_probe(rt); } + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(rt); + + if (m > *mpri) { + *do_rr = match_do_rr; + *mpri = m; + match = rt; + } out: return match; } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, struct rt6_info *rr_head, - u32 metric, int oif, int strict) + u32 metric, int oif, int strict, + bool *do_rr) { struct rt6_info *rt, *match; int mpri = -1; @@ -606,10 +622,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } @@ -618,15 +634,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; struct net *net; + bool do_rr = false; rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); + match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + &do_rr); - if (!match && - (strict & RT6_LOOKUP_F_REACHABLE)) { + if (do_rr) { struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ From 629b1d3db71d24e52478b94c71202034eaefed80 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Thu, 11 Jul 2013 19:04:02 +0800 Subject: [PATCH 29/80] dummy: fix oops when loading the dummy failed [ Upstream commit 2c8a01894a12665d8059fad8f0a293c98a264121 ] We rename the dummy in modprobe.conf like this: install dummy0 /sbin/modprobe -o dummy0 --ignore-install dummy install dummy1 /sbin/modprobe -o dummy1 --ignore-install dummy We got oops when we run the command: modprobe dummy0 modprobe dummy1 ------------[ cut here ]------------ [ 3302.187584] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 [ 3302.195411] IP: [] __rtnl_link_unregister+0x9a/0xd0 [ 3302.201844] PGD 85c94a067 PUD 8517bd067 PMD 0 [ 3302.206305] Oops: 0002 [#1] SMP [ 3302.299737] task: ffff88105ccea300 ti: ffff880eba4a0000 task.ti: ffff880eba4a0000 [ 3302.307186] RIP: 0010:[] [] __rtnl_link_unregister+0x9a/0xd0 [ 3302.316044] RSP: 0018:ffff880eba4a1dd8 EFLAGS: 00010246 [ 3302.321332] RAX: 0000000000000000 RBX: ffffffff81a9d738 RCX: 0000000000000002 [ 3302.328436] RDX: 0000000000000000 RSI: ffffffffa04d602c RDI: ffff880eba4a1dd8 [ 3302.335541] RBP: ffff880eba4a1e18 R08: dead000000200200 R09: dead000000100100 [ 3302.342644] R10: 0000000000000080 R11: 0000000000000003 R12: ffffffff81a9d788 [ 3302.349748] R13: ffffffffa04d7020 R14: ffffffff81a9d670 R15: ffff880eba4a1dd8 [ 3302.364910] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3302.370630] CR2: 0000000000000008 CR3: 000000085e15e000 CR4: 00000000000427e0 [ 3302.377734] DR0: 0000000000000003 DR1: 00000000000000b0 DR2: 0000000000000001 [ 3302.384838] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 3302.391940] Stack: [ 3302.393944] ffff880eba4a1dd8 ffff880eba4a1dd8 ffff880eba4a1e18 ffffffffa04d70c0 [ 3302.401350] 00000000ffffffef ffffffffa01a8000 0000000000000000 ffffffff816111c8 [ 3302.408758] ffff880eba4a1e48 ffffffffa01a80be ffff880eba4a1e48 ffffffffa04d70c0 [ 3302.416164] Call Trace: [ 3302.418605] [] ? 0xffffffffa01a7fff [ 3302.423727] [] dummy_init_module+0xbe/0x1000 [dummy0] [ 3302.430405] [] ? 0xffffffffa01a7fff [ 3302.435535] [] do_one_initcall+0x152/0x1b0 [ 3302.441263] [] do_init_module+0x7b/0x200 [ 3302.446824] [] load_module+0x4e2/0x530 [ 3302.452215] [] ? ddebug_dyndbg_boot_param_cb+0x60/0x60 [ 3302.458979] [] SyS_init_module+0xd1/0x130 [ 3302.464627] [] system_call_fastpath+0x16/0x1b [ 3302.490090] RIP [] __rtnl_link_unregister+0x9a/0xd0 [ 3302.496607] RSP [ 3302.500084] CR2: 0000000000000008 [ 3302.503466] ---[ end trace 8342d49cd49f78ed ]--- The reason is that when loading dummy, if __rtnl_link_register() return failed, the init_module should return and avoid take the wrong path. Signed-off-by: Tan Xiaojun Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dummy.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 42aa54af6842..b710c6b2d659 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -185,6 +185,8 @@ static int __init dummy_init_module(void) rtnl_lock(); err = __rtnl_link_register(&dummy_link_ops); + if (err < 0) + goto out; for (i = 0; i < numdummies && !err; i++) { err = dummy_init_one(); @@ -192,6 +194,8 @@ static int __init dummy_init_module(void) } if (err < 0) __rtnl_link_unregister(&dummy_link_ops); + +out: rtnl_unlock(); return err; From f84ddbc58cfd8551757efe3c61242c4d6e9918ce Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Thu, 11 Jul 2013 19:04:06 +0800 Subject: [PATCH 30/80] ifb: fix oops when loading the ifb failed [ Upstream commit f2966cd5691058b8674a20766525bedeaea9cbcf ] If __rtnl_link_register() return faild when loading the ifb, it will take the wrong path and get oops, so fix it just like dummy. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ifb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index a11f7a45cb5f..a3bed28197d2 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -291,6 +291,8 @@ static int __init ifb_init_module(void) rtnl_lock(); err = __rtnl_link_register(&ifb_link_ops); + if (err < 0) + goto out; for (i = 0; i < numifbs && !err; i++) { err = ifb_init_one(i); @@ -298,6 +300,8 @@ static int __init ifb_init_module(void) } if (err) __rtnl_link_unregister(&ifb_link_ops); + +out: rtnl_unlock(); return err; From 6afbcb59964ec9dc3dd63ed96a9e2cda31f8a072 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 11 Jul 2013 13:12:22 -0700 Subject: [PATCH 31/80] gre: Fix MTU sizing check for gretap tunnels [ Upstream commit 8c91e162e058bb91b7766f26f4d5823a21941026 ] This change fixes an MTU sizing issue seen with gretap tunnels when non-gso packets are sent from the interface. In my case I was able to reproduce the issue by simply sending a ping of 1421 bytes with the gretap interface created on a device with a standard 1500 mtu. This fix is based on the fact that the tunnel mtu is already adjusted by dev->hard_header_len so it would make sense that any packets being compared against that mtu should also be adjusted by hard_header_len and the tunnel header instead of just the tunnel header. Signed-off-by: Alexander Duyck Reported-by: Cong Wang Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index d05bd02886ed..cbfc37f5f05a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -490,7 +490,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, struct rtable *rt, __be16 df) { struct ip_tunnel *tunnel = netdev_priv(dev); - int pkt_size = skb->len - tunnel->hlen; + int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; int mtu; if (df) From d6245ef7c307dfe1ae238a64980cf2469ecc5f68 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 12 Jul 2013 23:46:33 +0200 Subject: [PATCH 32/80] ipv6: only static routes qualify for equal cost multipathing [ Upstream commit 307f2fb95e9b96b3577916e73d92e104f8f26494 ] Static routes in this case are non-expiring routes which did not get configured by autoconf or by icmpv6 redirects. To make sure we actually get an ecmp route while searching for the first one in this fib6_node's leafs, also make sure it matches the ecmp route assumptions. v2: a) Removed RTF_EXPIRE check in dst.from chain. The check of RTF_ADDRCONF already ensures that this route, even if added again without RTF_EXPIRES (in case of a RA announcement with infinite timeout), does not cause the rt6i_nsiblings logic to go wrong if a later RA updates the expiration time later. v3: a) Allow RTF_EXPIRES routes to enter the ecmp route set. We have to do so, because an pmtu event could update the RTF_EXPIRES flag and we would not count this route, if another route joins this set. We now filter only for RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC, which are flags that don't get changed after rt6_info construction. Cc: Nicolas Dichtel Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 192dd1a0e188..5fc9c7a68d8d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -632,6 +632,12 @@ insert_above: return ln; } +static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + /* * Insert routing information in a node. */ @@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); ins = &fn->leaf; @@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * To avoid long list, we only had siblings if the * route have a gateway. */ - if (rt->rt6i_flags & RTF_GATEWAY && - !(rt->rt6i_flags & RTF_EXPIRES) && - !(iter->rt6i_flags & RTF_EXPIRES)) + if (rt_can_ecmp && + rt6_qualify_for_ecmp(iter)) rt->rt6i_nsiblings++; } @@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* Find the first route that have the same metric */ sibling = fn->leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric) { + if (sibling->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(sibling)) { list_add_tail(&rt->rt6i_siblings, &sibling->rt6i_siblings); break; From dc419b2d5cac92acac3228a46df12720cac358f5 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 12 Jul 2013 10:58:48 -0400 Subject: [PATCH 33/80] atl1e: fix dma mapping warnings [ Upstream commit 352900b583b2852152a1e05ea0e8b579292e731e ] Recently had this backtrace reported: WARNING: at lib/dma-debug.c:937 check_unmap+0x47d/0x930() Hardware name: System Product Name ATL1E 0000:02:00.0: DMA-API: device driver failed to check map error[device address=0x00000000cbfd1000] [size=90 bytes] [mapped as single] Modules linked in: xt_conntrack nf_conntrack ebtable_filter ebtables ip6table_filter ip6_tables snd_hda_codec_hdmi snd_hda_codec_realtek iTCO_wdt iTCO_vendor_support snd_hda_intel acpi_cpufreq mperf coretemp btrfs zlib_deflate snd_hda_codec snd_hwdep microcode raid6_pq libcrc32c snd_seq usblp serio_raw xor snd_seq_device joydev snd_pcm snd_page_alloc snd_timer snd lpc_ich i2c_i801 soundcore mfd_core atl1e asus_atk0110 ata_generic pata_acpi radeon i2c_algo_bit drm_kms_helper ttm drm i2c_core pata_marvell uinput Pid: 314, comm: systemd-journal Not tainted 3.9.0-0.rc6.git2.3.fc19.x86_64 #1 Call Trace: [] warn_slowpath_common+0x66/0x80 [] warn_slowpath_fmt+0x4c/0x50 [] check_unmap+0x47d/0x930 [] ? sched_clock_cpu+0xa8/0x100 [] debug_dma_unmap_page+0x5f/0x70 [] ? unmap_single+0x20/0x30 [] atl1e_intr+0x3a1/0x5b0 [atl1e] [] ? trace_hardirqs_off+0xd/0x10 [] handle_irq_event_percpu+0x56/0x390 [] handle_irq_event+0x3d/0x60 [] handle_fasteoi_irq+0x5a/0x100 [] handle_irq+0xbf/0x150 [] ? file_sb_list_del+0x3f/0x50 [] ? irq_enter+0x50/0xa0 [] do_IRQ+0x4d/0xc0 [] ? file_sb_list_del+0x3f/0x50 [] common_interrupt+0x72/0x72 [] ? lock_release+0xc2/0x310 [] lg_local_unlock_cpu+0x24/0x50 [] file_sb_list_del+0x3f/0x50 [] fput+0x2d/0xc0 [] filp_close+0x61/0x90 [] __close_fd+0x8d/0x150 [] sys_close+0x20/0x50 [] system_call_fastpath+0x16/0x1b The usual straighforward failure to check for dma_mapping_error after a map operation is completed. This patch should fix it, the reporter wandered off after filing this bz: https://bugzilla.redhat.com/show_bug.cgi?id=954170 and I don't have hardware to test, but the fix is pretty straightforward, so I figured I'd post it for review. Signed-off-by: Neil Horman CC: Jay Cliburn CC: Chris Snook CC: "David S. Miller" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/atheros/atl1e/atl1e_main.c | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 0688bb82b442..8116cb84cdb8 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1665,8 +1665,8 @@ check_sum: return 0; } -static void atl1e_tx_map(struct atl1e_adapter *adapter, - struct sk_buff *skb, struct atl1e_tpd_desc *tpd) +static int atl1e_tx_map(struct atl1e_adapter *adapter, + struct sk_buff *skb, struct atl1e_tpd_desc *tpd) { struct atl1e_tpd_desc *use_tpd = NULL; struct atl1e_tx_buffer *tx_buffer = NULL; @@ -1677,6 +1677,7 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, u16 nr_frags; u16 f; int segment; + int ring_start = adapter->tx_ring.next_to_use; nr_frags = skb_shinfo(skb)->nr_frags; segment = (tpd->word3 >> TPD_SEGMENT_EN_SHIFT) & TPD_SEGMENT_EN_MASK; @@ -1689,6 +1690,9 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, tx_buffer->length = map_len; tx_buffer->dma = pci_map_single(adapter->pdev, skb->data, hdr_len, PCI_DMA_TODEVICE); + if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) + return -ENOSPC; + ATL1E_SET_PCIMAP_TYPE(tx_buffer, ATL1E_TX_PCIMAP_SINGLE); mapped_len += map_len; use_tpd->buffer_addr = cpu_to_le64(tx_buffer->dma); @@ -1715,6 +1719,13 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, tx_buffer->dma = pci_map_single(adapter->pdev, skb->data + mapped_len, map_len, PCI_DMA_TODEVICE); + + if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* Reset the tx rings next pointer */ + adapter->tx_ring.next_to_use = ring_start; + return -ENOSPC; + } + ATL1E_SET_PCIMAP_TYPE(tx_buffer, ATL1E_TX_PCIMAP_SINGLE); mapped_len += map_len; use_tpd->buffer_addr = cpu_to_le64(tx_buffer->dma); @@ -1750,6 +1761,13 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, (i * MAX_TX_BUF_LEN), tx_buffer->length, DMA_TO_DEVICE); + + if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* Reset the ring next to use pointer */ + adapter->tx_ring.next_to_use = ring_start; + return -ENOSPC; + } + ATL1E_SET_PCIMAP_TYPE(tx_buffer, ATL1E_TX_PCIMAP_PAGE); use_tpd->buffer_addr = cpu_to_le64(tx_buffer->dma); use_tpd->word2 = (use_tpd->word2 & (~TPD_BUFLEN_MASK)) | @@ -1767,6 +1785,7 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, /* The last buffer info contain the skb address, so it will be free after unmap */ tx_buffer->skb = skb; + return 0; } static void atl1e_tx_queue(struct atl1e_adapter *adapter, u16 count, @@ -1834,10 +1853,13 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } - atl1e_tx_map(adapter, skb, tpd); + if (atl1e_tx_map(adapter, skb, tpd)) + goto out; + atl1e_tx_queue(adapter, tpd_req, tpd); netdev->trans_start = jiffies; /* NETIF_F_LLTX driver :( */ +out: spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_OK; } From da7e35cee6bf8aae938baa597863691bfffc26eb Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 16 Jul 2013 10:49:41 -0400 Subject: [PATCH 34/80] atl1e: unmap partially mapped skb on dma error and free skb [ Upstream commit 584ec4355355ffac43571b02a314d43eb2f7fcbf ] Ben Hutchings pointed out that my recent update to atl1e in commit 352900b583b2852152a1e05ea0e8b579292e731e ("atl1e: fix dma mapping warnings") was missing a bit of code. Specifically it reset the hardware tx ring to its origional state when we hit a dma error, but didn't unmap any exiting mappings from the operation. This patch fixes that up. It also remembers to free the skb in the event that an error occurs, so we don't leak. Untested, as I don't have hardware. I think its pretty straightforward, but please review closely. Signed-off-by: Neil Horman CC: Ben Hutchings CC: Jay Cliburn CC: Chris Snook CC: "David S. Miller" Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/atheros/atl1e/atl1e_main.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 8116cb84cdb8..c23bb02e3ed0 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1678,6 +1678,7 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter, u16 f; int segment; int ring_start = adapter->tx_ring.next_to_use; + int ring_end; nr_frags = skb_shinfo(skb)->nr_frags; segment = (tpd->word3 >> TPD_SEGMENT_EN_SHIFT) & TPD_SEGMENT_EN_MASK; @@ -1721,6 +1722,15 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter, map_len, PCI_DMA_TODEVICE); if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* We need to unwind the mappings we've done */ + ring_end = adapter->tx_ring.next_to_use; + adapter->tx_ring.next_to_use = ring_start; + while (adapter->tx_ring.next_to_use != ring_end) { + tpd = atl1e_get_tpd(adapter); + tx_buffer = atl1e_get_tx_buffer(adapter, tpd); + pci_unmap_single(adapter->pdev, tx_buffer->dma, + tx_buffer->length, PCI_DMA_TODEVICE); + } /* Reset the tx rings next pointer */ adapter->tx_ring.next_to_use = ring_start; return -ENOSPC; @@ -1763,6 +1773,16 @@ static int atl1e_tx_map(struct atl1e_adapter *adapter, DMA_TO_DEVICE); if (dma_mapping_error(&adapter->pdev->dev, tx_buffer->dma)) { + /* We need to unwind the mappings we've done */ + ring_end = adapter->tx_ring.next_to_use; + adapter->tx_ring.next_to_use = ring_start; + while (adapter->tx_ring.next_to_use != ring_end) { + tpd = atl1e_get_tpd(adapter); + tx_buffer = atl1e_get_tx_buffer(adapter, tpd); + dma_unmap_page(&adapter->pdev->dev, tx_buffer->dma, + tx_buffer->length, DMA_TO_DEVICE); + } + /* Reset the ring next to use pointer */ adapter->tx_ring.next_to_use = ring_start; return -ENOSPC; @@ -1853,8 +1873,10 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb, return NETDEV_TX_OK; } - if (atl1e_tx_map(adapter, skb, tpd)) + if (atl1e_tx_map(adapter, skb, tpd)) { + dev_kfree_skb_any(skb); goto out; + } atl1e_tx_queue(adapter, tpd_req, tpd); From b3923f8211bc60ff3b4567fafe0814beb6bc5a50 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 15 Jul 2013 20:03:19 -0700 Subject: [PATCH 35/80] ipv4: set transport header earlier [ Upstream commit 21d1196a35f5686c4323e42a62fdb4b23b0ab4a3 ] commit 45f00f99d6e ("ipv4: tcp: clean up tcp_v4_early_demux()") added a performance regression for non GRO traffic, basically disabling IP early demux. IPv6 stack resets transport header in ip6_rcv() before calling IP early demux in ip6_rcv_finish(), while IPv4 does this only in ip_local_deliver_finish(), _after_ IP early demux. GRO traffic happened to enable IP early demux because transport header is also set in inet_gro_receive() Instead of reverting the faulty commit, we can make IPv4/IPv6 behave the same : transport_header should be set in ip_rcv() instead of ip_local_deliver_finish() ip_local_deliver_finish() can also use skb_network_header_len() which is faster than ip_hdrlen() Signed-off-by: Eric Dumazet Cc: Neal Cardwell Cc: Tom Herbert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ip_input.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3da817b89e9b..15e3e683adec 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - __skb_pull(skb, ip_hdrlen(skb)); - - /* Point into the IP datagram, just past the header. */ - skb_reset_transport_header(skb); + __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { @@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + skb->transport_header = skb->network_header + iph->ihl*4; + /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); From e0ca176c17745ac3e0c5a9969aba3c983e6c69c5 Mon Sep 17 00:00:00 2001 From: Sarveshwar Bandi Date: Tue, 16 Jul 2013 12:44:02 +0530 Subject: [PATCH 36/80] be2net: Fix to avoid hardware workaround when not needed [ Upstream commit 52fe29e4bb614367c108b717c6d7fe5953eb7af3 ] Hardware workaround requesting hardware to skip vlan insertion is necessary only when umc or qnq is enabled. Enabling this workaround in other scenarios could cause controller to stall. Signed-off-by: Sarveshwar Bandi Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/emulex/benet/be_main.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index a0b4be51f0d1..6e4342695fa7 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -782,16 +782,22 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter, if (vlan_tx_tag_present(skb)) vlan_tag = be_get_tx_vlan_tag(adapter, skb); - else if (qnq_async_evt_rcvd(adapter) && adapter->pvid) - vlan_tag = adapter->pvid; + + if (qnq_async_evt_rcvd(adapter) && adapter->pvid) { + if (!vlan_tag) + vlan_tag = adapter->pvid; + /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to + * skip VLAN insertion + */ + if (skip_hw_vlan) + *skip_hw_vlan = true; + } if (vlan_tag) { skb = __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); if (unlikely(!skb)) return skb; skb->vlan_tci = 0; - if (skip_hw_vlan) - *skip_hw_vlan = true; } /* Insert the outer VLAN, if any */ From 98bec4a114dfd772390a07735606109447481872 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Tue, 16 Jul 2013 23:01:20 -0700 Subject: [PATCH 37/80] hyperv: Fix the NETIF_F_SG flag setting in netvsc [ Upstream commit f45708209dc445bac0844f6ce86e315a2ffe8a29 ] SG mode is not currently supported by netvsc, so remove this flag for now. Otherwise, it will be unconditionally enabled by commit ec5f0615642 "Kill link between CSUM and SG features" Previously, the SG feature is disabled because CSUM is not set here. Signed-off-by: Haiyang Zhang Reviewed-by: K. Y. Srinivasan Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 4dccead586be..23a0fff0df52 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -431,8 +431,8 @@ static int netvsc_probe(struct hv_device *dev, net->netdev_ops = &device_ops; /* TODO: Add GSO and Checksum offload */ - net->hw_features = NETIF_F_SG; - net->features = NETIF_F_SG | NETIF_F_HW_VLAN_CTAG_TX; + net->hw_features = 0; + net->features = NETIF_F_HW_VLAN_CTAG_TX; SET_ETHTOOL_OPS(net, ðtool_ops); SET_NETDEV_DEV(net, &dev->device); From c1d220fb027abb92452bf3a59fed7308647b39be Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Tue, 16 Jul 2013 08:52:30 +0200 Subject: [PATCH 38/80] pkt_sched: sch_qfq: remove a source of high packet delay/jitter [ Upstream commit 87f40dd6ce7042caca0b3b557e8923127f51f902 ] QFQ+ inherits from QFQ a design choice that may cause a high packet delay/jitter and a severe short-term unfairness. As QFQ, QFQ+ uses a special quantity, the system virtual time, to track the service provided by the ideal system it approximates. When a packet is dequeued, this quantity must be incremented by the size of the packet, divided by the sum of the weights of the aggregates waiting to be served. Tracking this sum correctly is a non-trivial task, because, to preserve tight service guarantees, the decrement of this sum must be delayed in a special way [1]: this sum can be decremented only after that its value would decrease also in the ideal system approximated by QFQ+. For efficiency, QFQ+ keeps track only of the 'instantaneous' weight sum, increased and decreased immediately as the weight of an aggregate changes, and as an aggregate is created or destroyed (which, in its turn, happens as a consequence of some class being created/destroyed/changed). However, to avoid the problems caused to service guarantees by these immediate decreases, QFQ+ increments the system virtual time using the maximum value allowed for the weight sum, 2^10, in place of the dynamic, instantaneous value. The instantaneous value of the weight sum is used only to check whether a request of weight increase or a class creation can be satisfied. Unfortunately, the problems caused by this choice are worse than the temporary degradation of the service guarantees that may occur, when a class is changed or destroyed, if the instantaneous value of the weight sum was used to update the system virtual time. In fact, the fraction of the link bandwidth guaranteed by QFQ+ to each aggregate is equal to the ratio between the weight of the aggregate and the sum of the weights of the competing aggregates. The packet delay guaranteed to the aggregate is instead inversely proportional to the guaranteed bandwidth. By using the maximum possible value, and not the actual value of the weight sum, QFQ+ provides each aggregate with the worst possible service guarantees, and not with service guarantees related to the actual set of competing aggregates. To see the consequences of this fact, consider the following simple example. Suppose that only the following aggregates are backlogged, i.e., that only the classes in the following aggregates have packets to transmit: one aggregate with weight 10, say A, and ten aggregates with weight 1, say B1, B2, ..., B10. In particular, suppose that these aggregates are always backlogged. Given the weight distribution, the smoothest and fairest service order would be: A B1 A B2 A B3 A B4 A B5 A B6 A B7 A B8 A B9 A B10 A B1 A B2 ... QFQ+ would provide exactly this optimal service if it used the actual value for the weight sum instead of the maximum possible value, i.e., 11 instead of 2^10. In contrast, since QFQ+ uses the latter value, it serves aggregates as follows (easy to prove and to reproduce experimentally): A B1 B2 B3 B4 B5 B6 B7 B8 B9 B10 A A A A A A A A A A B1 B2 ... B10 A A ... By replacing 10 with N in the above example, and by increasing N, one can increase at will the maximum packet delay and the jitter experienced by the classes in aggregate A. This patch addresses this issue by just using the above 'instantaneous' value of the weight sum, instead of the maximum possible value, when updating the system virtual time. After the instantaneous weight sum is decreased, QFQ+ may deviate from the ideal service for a time interval in the order of the time to serve one maximum-size packet for each backlogged class. The worst-case extent of the deviation exhibited by QFQ+ during this time interval [1] is basically the same as of the deviation described above (but, without this patch, QFQ+ suffers from such a deviation all the time). Finally, this patch modifies the comment to the function qfq_slot_insert, to make it coherent with the fact that the weight sum used by QFQ+ can now be lower than the maximum possible value. [1] P. Valente, "Extending WF2Q+ to support a dynamic traffic mix", Proceedings of AAA-IDEA'05, June 2005. Signed-off-by: Paolo Valente Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sched/sch_qfq.c | 85 +++++++++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01c..57922524f0c7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,7 +113,6 @@ #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) -#define IWSUM (ONE_FP/QFQ_MAX_WSUM) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ @@ -189,6 +188,7 @@ struct qfq_sched { struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ u32 num_active_agg; /* Num. of active aggregates */ u32 wsum; /* weight sum */ + u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ @@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); + q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } @@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { if (!hlist_unhashed(&agg->nonfull_next)) hlist_del_init(&agg->nonfull_next); + q->wsum -= agg->class_weight; + if (q->wsum != 0) + q->iwsum = ONE_FP / q->wsum; + if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); @@ -827,38 +832,60 @@ static void qfq_make_eligible(struct qfq_sched *q) } } - /* - * The index of the slot in which the aggregate is to be inserted must - * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' - * because the start time of the group may be moved backward by one - * slot after the aggregate has been inserted, and this would cause - * non-empty slots to be right-shifted by one position. + * The index of the slot in which the input aggregate agg is to be + * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' + * and not a '-1' because the start time of the group may be moved + * backward by one slot after the aggregate has been inserted, and + * this would cause non-empty slots to be right-shifted by one + * position. * - * If the weight and lmax (max_pkt_size) of the classes do not change, - * then QFQ+ does meet the above contraint according to the current - * values of its parameters. In fact, if the weight and lmax of the - * classes do not change, then, from the theory, QFQ+ guarantees that - * the slot index is never higher than - * 2 + QFQ_MAX_AGG_CLASSES * ((1<budget -= len; - q->V += (u64)len * IWSUM; + q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); From 05464d21faec5e37e5b0fca58a5201408d6aab31 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 18 Jul 2013 10:55:15 +0800 Subject: [PATCH 39/80] tuntap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS [ Upstream commit 885291761dba2bfe04df4c0f7bb75e4c920ab82e ] We try to linearize part of the skb when the number of iov is greater than MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest network. Solve this problem by calculate the pages needed for iov before trying to do zerocopy and switch to use copy instead of zerocopy if it needs more than MAX_SKB_FRAGS. This is done through introducing a new helper to count the pages for iov, and call uarg->callback() manually when switching from zerocopy to copy to notify vhost. We can do further optimization on top. The bug were introduced from commit 0690899b4d4501b3505be069b9a687e68ccbe15b (tun: experimental zero copy tx support) Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 62 +++++++++++++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c3cb60b1d1a2..2491eb28a8d2 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1037,6 +1037,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, return 0; } +static unsigned long iov_pages(const struct iovec *iv, int offset, + unsigned long nr_segs) +{ + unsigned long seg, base; + int pages = 0, len, size; + + while (nr_segs && (offset >= iv->iov_len)) { + offset -= iv->iov_len; + ++iv; + --nr_segs; + } + + for (seg = 0; seg < nr_segs; seg++) { + base = (unsigned long)iv[seg].iov_base + offset; + len = iv[seg].iov_len - offset; + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + pages += size; + offset = 0; + } + + return pages; +} + /* Get packet from user space buffer */ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, void *msg_control, const struct iovec *iv, @@ -1084,32 +1107,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, return -EINVAL; } - if (msg_control) - zerocopy = true; - - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < offset) - copylen = 0; - else - copylen -= offset; - } else - copylen = 0; - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. + if (msg_control) { + /* There are 256 bytes to be copied in skb, so there is + * enough room for skb expand head in case it is used. * The rest of the buffer is mapped from userspace. */ - if (copylen < gso.hdr_len) - copylen = gso.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; + copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN; linear = copylen; - } else { + if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { copylen = len; linear = gso.hdr_len; } @@ -1123,8 +1132,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, offset, count); - else + else { err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); + if (!err && msg_control) { + struct ubuf_info *uarg = msg_control; + uarg->callback(uarg, false); + } + } if (err) { tun->dev->stats.rx_dropped++; From 7d9e6dd88cdea6c3d62a795ef69e6cc47b6dca2d Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Thu, 18 Jul 2013 10:55:16 +0800 Subject: [PATCH 40/80] macvtap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS [ Upstream commit ece793fcfc417b3925844be88a6a6dc82ae8f7c6 ] We try to linearize part of the skb when the number of iov is greater than MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest network. Solve this problem by calculate the pages needed for iov before trying to do zerocopy and switch to use copy instead of zerocopy if it needs more than MAX_SKB_FRAGS. This is done through introducing a new helper to count the pages for iov, and call uarg->callback() manually when switching from zerocopy to copy to notify vhost. We can do further optimization on top. This bug were introduced from b92946e2919134ebe2a4083e4302236295ea2a73 (macvtap: zerocopy: validate vectors before building skb). Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/macvtap.c | 62 ++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 25 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 502d9486395f..523d6b2426a8 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -633,6 +633,28 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, return 0; } +static unsigned long iov_pages(const struct iovec *iv, int offset, + unsigned long nr_segs) +{ + unsigned long seg, base; + int pages = 0, len, size; + + while (nr_segs && (offset >= iv->iov_len)) { + offset -= iv->iov_len; + ++iv; + --nr_segs; + } + + for (seg = 0; seg < nr_segs; seg++) { + base = (unsigned long)iv[seg].iov_base + offset; + len = iv[seg].iov_len - offset; + size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; + pages += size; + offset = 0; + } + + return pages; +} /* Get packet from user space buffer */ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, @@ -679,31 +701,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (unlikely(count > UIO_MAXIOV)) goto err; - if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) - zerocopy = true; - - if (zerocopy) { - /* Userspace may produce vectors with count greater than - * MAX_SKB_FRAGS, so we need to linearize parts of the skb - * to let the rest of data to be fit in the frags. - */ - if (count > MAX_SKB_FRAGS) { - copylen = iov_length(iv, count - MAX_SKB_FRAGS); - if (copylen < vnet_hdr_len) - copylen = 0; - else - copylen -= vnet_hdr_len; - } - /* There are 256 bytes to be copied in skb, so there is enough - * room for skb expand head in case it is used. - * The rest buffer is mapped from userspace. - */ - if (copylen < vnet_hdr.hdr_len) - copylen = vnet_hdr.hdr_len; - if (!copylen) - copylen = GOODCOPY_LEN; + if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { + copylen = vnet_hdr.hdr_len ? vnet_hdr.hdr_len : GOODCOPY_LEN; linear = copylen; - } else { + if (iov_pages(iv, vnet_hdr_len + copylen, count) + <= MAX_SKB_FRAGS) + zerocopy = true; + } + + if (!zerocopy) { copylen = len; linear = vnet_hdr.hdr_len; } @@ -715,9 +721,15 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, if (zerocopy) err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); - else + else { err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); + if (!err && m && m->msg_control) { + struct ubuf_info *uarg = m->msg_control; + uarg->callback(uarg, false); + } + } + if (err) goto err_kfree; From 37b25f3f99d52710de567c5ff880824bb576121c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 07:19:26 -0700 Subject: [PATCH 41/80] vlan: mask vlan prio bits [ Upstream commit d4b812dea4a236f729526facf97df1a9d18e191c ] In commit 48cc32d38a52d0b68f91a171a8d00531edc6a46e ("vlan: don't deliver frames for unknown vlans to protocols") Florian made sure we set pkt_type to PACKET_OTHERHOST if the vlan id is set and we could find a vlan device for this particular id. But we also have a problem if prio bits are set. Steinar reported an issue on a router receiving IPv6 frames with a vlan tag of 4000 (id 0, prio 2), and tunneled into a sit device, because skb->vlan_tci is set. Forwarded frame is completely corrupted : We can see (8100:4000) being inserted in the middle of IPv6 source address : 16:48:00.780413 IP6 2001:16d8:8100:4000:ee1c:0:9d9:bc87 > 9f94:4d95:2001:67c:29f4::: ICMP6, unknown icmp6 type (0), length 64 0x0000: 0000 0029 8000 c7c3 7103 0001 a0ae e651 0x0010: 0000 0000 ccce 0b00 0000 0000 1011 1213 0x0020: 1415 1617 1819 1a1b 1c1d 1e1f 2021 2223 0x0030: 2425 2627 2829 2a2b 2c2d 2e2f 3031 3233 It seems we are not really ready to properly cope with this right now. We can probably do better in future kernels : vlan_get_ingress_priority() should be a netdev property instead of a per vlan_dev one. For stable kernels, lets clear vlan_tci to fix the bugs. Reported-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/if_vlan.h | 3 +-- net/8021q/vlan_core.c | 2 +- net/core/dev.c | 11 +++++++++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 637fa71de0c7..0b3498800ba0 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -79,9 +79,8 @@ static inline int is_vlan_dev(struct net_device *dev) } #define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) -#define vlan_tx_nonzero_tag_present(__skb) \ - (vlan_tx_tag_present(__skb) && ((__skb)->vlan_tci & VLAN_VID_MASK)) #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) +#define vlan_tx_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8a15eaadc4bd..4a78c4de9f20 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; + u16 vlan_id = vlan_tx_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/core/dev.c b/net/core/dev.c index faebb398fb46..7ddbb31b10d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3513,8 +3513,15 @@ ncls: } } - if (vlan_tx_nonzero_tag_present(skb)) - skb->pkt_type = PACKET_OTHERHOST; + if (unlikely(vlan_tx_tag_present(skb))) { + if (vlan_tx_tag_get_id(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* Note: we might in the future use prio bits + * and set skb->priority like in vlan_do_receive() + * For the time being, just ignore Priority Code Point + */ + skb->vlan_tci = 0; + } /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; From 5110890cbfa0c783cb727d6a2fbf36ce8560f9a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Jul 2013 09:35:10 -0700 Subject: [PATCH 42/80] vlan: fix a race in egress prio management [ Upstream commit 3e3aac497513c669e1c62c71e1d552ea85c1d974 ] egress_priority_map[] hash table updates are protected by rtnl, and we never remove elements until device is dismantled. We have to make sure that before inserting an new element in hash table, all its fields are committed to memory or else another cpu could find corrupt values and crash. Signed-off-by: Eric Dumazet Cc: Patrick McHardy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/8021q/vlan_dev.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3a8c8fd63c88..1cd3d2a406f5 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; + smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ + mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; while (mp) { if (mp->priority == skb->priority) { @@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; + /* Before inserting this element in hash table, make sure all its fields + * are committed to memory. + * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() + */ + smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; if (vlan_qos) vlan->nr_egress_mappings++; From cf6a37e7da73702e865a02f03664fc0e2c4a2b3f Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 13 Jun 2013 01:29:24 +0200 Subject: [PATCH 43/80] MIPS: Oceton: Fix build error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 39205750efa6d335fac4f9bcd32b49c7e71c12b7 upstream. If CONFIG_CAVIUM_OCTEON_LOCK_L2_TLB, CONFIG_CAVIUM_OCTEON_LOCK_L2_EXCEPTION, CONFIG_CAVIUM_OCTEON_LOCK_L2_LOW_LEVEL_INTERRUPT and CONFIG_CAVIUM_OCTEON_LOCK_L2_INTERRUPT are all undefined: arch/mips/cavium-octeon/setup.c: In function ‘prom_init’: arch/mips/cavium-octeon/setup.c:715:12: error: unused variable ‘ebase’ [-Werror=unused-variable] Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/cavium-octeon/setup.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 1e1e18c5a534..2a75ff249e71 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -7,6 +7,7 @@ * Copyright (C) 2008, 2009 Wind River Systems * written by Ralf Baechle */ +#include #include #include #include @@ -712,7 +713,7 @@ void __init prom_init(void) if (cvmx_read_csr(CVMX_L2D_FUS3) & (3ull << 34)) { pr_info("Skipping L2 locking due to reduced L2 cache size\n"); } else { - uint32_t ebase = read_c0_ebase() & 0x3ffff000; + uint32_t __maybe_unused ebase = read_c0_ebase() & 0x3ffff000; #ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_TLB /* TLB refill */ cvmx_l2c_lock_mem_region(ebase, 0x100); From df18f5f93ce9ed27bd9a779196614afd7a0aa017 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 13 Jun 2013 02:45:53 +0200 Subject: [PATCH 44/80] RAPIDIO: IDT_GEN2: Fix build error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 27f62b9f294b7e2019c94c385abda43a0af6bb8b upstream. CC drivers/rapidio/switches/idt_gen2.o drivers/rapidio/switches/idt_gen2.c: In function ‘idtg2_show_errlog’: drivers/rapidio/switches/idt_gen2.c:379:30: error: ‘PAGE_SIZE’ undeclared (first use in this function) drivers/rapidio/switches/idt_gen2.c:379:30: note: each undeclared identifier is reported only once for each function it appears in Signed-off-by: Ralf Baechle Acked-by: Alexandre Bounine Signed-off-by: Greg Kroah-Hartman --- drivers/rapidio/switches/idt_gen2.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rapidio/switches/idt_gen2.c b/drivers/rapidio/switches/idt_gen2.c index 809b7a3336ba..5d3b0f014d35 100644 --- a/drivers/rapidio/switches/idt_gen2.c +++ b/drivers/rapidio/switches/idt_gen2.c @@ -15,6 +15,8 @@ #include #include #include + +#include #include "../rio.h" #define LOCAL_RTE_CONF_DESTID_SEL 0x010070 From 05ac7b3a7db7823aecde55df6a5f8d6bb45693f4 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Wed, 17 Jul 2013 14:53:53 +0200 Subject: [PATCH 45/80] fuse: readdirplus: fix dentry leak commit 53ce9a3364de0723b27d861de93bfc882f7db050 upstream. In case d_lookup() returns a dentry with d_inode == NULL, the dentry is not returned with dput(). This results in triggering a BUG() in shrink_dcache_for_umount_subtree(): BUG: Dentry ...{i=0,n=...} still in use (1) [unmount of fuse fuse] [SzM: need to d_drop() as well] Reported-by: Justin Clift Signed-off-by: Niels de Vos Signed-off-by: Miklos Szeredi Tested-by: Brian Foster Tested-by: Niels de Vos Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f3f783dc4f75..5532bf416f9b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1229,9 +1229,15 @@ static int fuse_direntplus_link(struct file *file, name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry && dentry->d_inode) { + if (dentry) { inode = dentry->d_inode; - if (get_node_id(inode) == o->nodeid) { + if (!inode) { + d_drop(dentry); + } else if (get_node_id(inode) != o->nodeid) { + err = d_invalidate(dentry); + if (err) + goto out; + } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); spin_lock(&fc->lock); @@ -1244,9 +1250,6 @@ static int fuse_direntplus_link(struct file *file, */ goto found; } - err = d_invalidate(dentry); - if (err) - goto out; dput(dentry); dentry = NULL; } From dc2a6c2d3e9a2529b3ffc70c165cbb10c85768f5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 17 Jul 2013 14:53:53 +0200 Subject: [PATCH 46/80] fuse: readdirplus: fix instantiate commit 2914941e3178d84a216fc4eb85292dfef3b6d628 upstream. Fuse does instantiation slightly differently from NFS/CIFS which use d_materialise_unique(). Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 5532bf416f9b..a71df0206f6f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1264,10 +1264,19 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); - err = PTR_ERR(alias); - if (IS_ERR(alias)) - goto out; + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&fc->inst_mutex); + alias = fuse_d_add_directory(dentry, inode); + mutex_unlock(&fc->inst_mutex); + err = PTR_ERR(alias); + if (IS_ERR(alias)) { + iput(inode); + goto out; + } + } else { + alias = d_splice_alias(inode, dentry); + } + if (alias) { dput(dentry); dentry = alias; From 223828d8a6ef53f7671351fe5d08f18dca714830 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 17 Jul 2013 14:53:53 +0200 Subject: [PATCH 47/80] fuse: readdirplus: sanity checks commit a28ef45cbb1e7fadd5159deb17b02de15c6e4aaf upstream. Add sanity checks before adding or updating an entry with data received from readdirplus. Signed-off-by: Miklos Szeredi Signed-off-by: Greg Kroah-Hartman --- fs/fuse/dir.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index a71df0206f6f..5b1274699b08 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1225,6 +1225,12 @@ static int fuse_direntplus_link(struct file *file, if (name.name[1] == '.' && name.len == 2) return 0; } + + if (invalid_nodeid(o->nodeid)) + return -EIO; + if (!fuse_valid_type(o->attr.mode)) + return -EIO; + fc = get_fuse_conn(dir); name.hash = full_name_hash(name.name, name.len); @@ -1233,10 +1239,14 @@ static int fuse_direntplus_link(struct file *file, inode = dentry->d_inode; if (!inode) { d_drop(dentry); - } else if (get_node_id(inode) != o->nodeid) { + } else if (get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { err = d_invalidate(dentry); if (err) goto out; + } else if (is_bad_inode(inode)) { + err = -EIO; + goto out; } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); From c0d8f455600820eb05208b57c50d4e90a58c4d96 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:04:21 -0700 Subject: [PATCH 48/80] bcache: Fix a dumb race commit 6aa8f1a6ca41c49721d2de4e048d3da8d06411f9 upstream. In the far-too-complicated closure code - closures can have destructors, for probably dubious reasons; they get run after the closure is no longer waiting on anything but before dropping the parent ref, intended just for freeing whatever memory the closure is embedded in. Trouble is, when remaining goes to 0 and we've got nothing more to run - we also have to unlock the closure, setting remaining to -1. If there's a destructor, that unlock isn't doing anything - nobody could be trying to lock it if we're about to free it - but if the unlock _is needed... that check for a destructor was racy. Argh. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/closure.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c index bd05a9a8c7cf..9aba2017f0d1 100644 --- a/drivers/md/bcache/closure.c +++ b/drivers/md/bcache/closure.c @@ -66,16 +66,18 @@ static inline void closure_put_after_sub(struct closure *cl, int flags) } else { struct closure *parent = cl->parent; struct closure_waitlist *wait = closure_waitlist(cl); + closure_fn *destructor = cl->fn; closure_debug_destroy(cl); + smp_mb(); atomic_set(&cl->remaining, -1); if (wait) closure_wake_up(wait); - if (cl->fn) - cl->fn(cl); + if (destructor) + destructor(cl); if (parent) closure_put(parent); From 3fcbc17636c83da9d85e2604de4af56c215e6e3c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:44:40 -0700 Subject: [PATCH 49/80] bcache: Advertise that flushes are supported commit 54d12f2b4fd0f218590d1490b41a18d0e2328a9a upstream. Whoops - bcache's flush/FUA was mostly correct, but flushes get filtered out unless we say we support them... Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/request.c | 8 +++++++- drivers/md/bcache/super.c | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index e5ff12e52d5b..2f36743ce708 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -489,6 +489,12 @@ static void bch_insert_data_loop(struct closure *cl) bch_queue_gc(op->c); } + /* + * Journal writes are marked REQ_FLUSH; if the original write was a + * flush, it'll wait on the journal write. + */ + bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA); + do { unsigned i; struct bkey *k; @@ -716,7 +722,7 @@ static struct search *search_alloc(struct bio *bio, struct bcache_device *d) s->task = current; s->orig_bio = bio; s->write = (bio->bi_rw & REQ_WRITE) != 0; - s->op.flush_journal = (bio->bi_rw & REQ_FLUSH) != 0; + s->op.flush_journal = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0; s->op.skip = (bio->bi_rw & REQ_DISCARD) != 0; s->recoverable = 1; s->start_time = jiffies; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index f88e2b653a3f..a7d9828b01d1 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -781,6 +781,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size) set_bit(QUEUE_FLAG_NONROT, &d->disk->queue->queue_flags); set_bit(QUEUE_FLAG_DISCARD, &d->disk->queue->queue_flags); + blk_queue_flush(q, REQ_FLUSH|REQ_FUA); + return 0; } From 63a53870bb23c2d991f378ae9137d775978d5ae9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:03:25 -0700 Subject: [PATCH 50/80] bcache: Shutdown fix commit 5caa52afc5abd1396e4af720469abb5843a71eb8 upstream. Stopping a cache set is supposed to make it stop attached backing devices, but somewhere along the way that code got lost. Fixing this mainly has the effect of fixing our reboot notifier. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/super.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index a7d9828b01d1..88113fe57fed 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1305,18 +1305,22 @@ static void cache_set_flush(struct closure *cl) static void __cache_set_unregister(struct closure *cl) { struct cache_set *c = container_of(cl, struct cache_set, caching); - struct cached_dev *dc, *t; + struct cached_dev *dc; size_t i; mutex_lock(&bch_register_lock); - if (test_bit(CACHE_SET_UNREGISTERING, &c->flags)) - list_for_each_entry_safe(dc, t, &c->cached_devs, list) - bch_cached_dev_detach(dc); - for (i = 0; i < c->nr_uuids; i++) - if (c->devices[i] && UUID_FLASH_ONLY(&c->uuids[i])) - bcache_device_stop(c->devices[i]); + if (c->devices[i]) { + if (!UUID_FLASH_ONLY(&c->uuids[i]) && + test_bit(CACHE_SET_UNREGISTERING, &c->flags)) { + dc = container_of(c->devices[i], + struct cached_dev, disk); + bch_cached_dev_detach(dc); + } else { + bcache_device_stop(c->devices[i]); + } + } mutex_unlock(&bch_register_lock); From fe1b710530e60c8fbb07a73ad27e9f65305ce06a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:25:02 -0700 Subject: [PATCH 51/80] bcache: Fix a sysfs splat on shutdown commit c9502ea4424b31728703d113fc6b30bfead14633 upstream. If we stopped a bcache device when we were already detaching (or something like that), bcache_device_unlink() would try to remove a symlink from sysfs that was already gone because the bcache dev kobject had already been removed from sysfs. So keep track of whether we've removed stuff from sysfs. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/super.c | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d3e15b42a4ab..c42b14b2304c 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -437,6 +437,7 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; + int flush_done; atomic_long_t sectors_dirty; unsigned long sectors_dirty_gc; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 88113fe57fed..b4713cea1913 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -704,7 +704,8 @@ static void bcache_device_detach(struct bcache_device *d) atomic_set(&d->detaching, 0); } - bcache_device_unlink(d); + if (!d->flush_done) + bcache_device_unlink(d); d->c->devices[d->id] = NULL; closure_put(&d->c->caching); @@ -1016,6 +1017,14 @@ static void cached_dev_flush(struct closure *cl) struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); struct bcache_device *d = &dc->disk; + mutex_lock(&bch_register_lock); + d->flush_done = 1; + + if (d->c) + bcache_device_unlink(d); + + mutex_unlock(&bch_register_lock); + bch_cache_accounting_destroy(&dc->accounting); kobject_del(&d->kobj); From 99e56bf5abce5131746397a2ce551fd92ca8f342 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 19:43:21 -0700 Subject: [PATCH 52/80] bcache: Fix GC_SECTORS_USED() calculation commit 29ebf465b9050f241c4433a796a32e6c896a9dcd upstream. Part of the job of garbage collection is to add up however many sectors of live data it finds in each bucket, but that doesn't work very well if it doesn't reset GC_SECTORS_USED() when it starts. Whoops. This wouldn't have broken anything horribly, but allocation tries to preferentially reclaim buckets that are mostly empty and that's not gonna work with an incorrect GC_SECTORS_USED() value. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/btree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 7a5658f04e62..7b687a6f3dec 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1419,8 +1419,10 @@ static void btree_gc_start(struct cache_set *c) for_each_cache(ca, c, i) for_each_bucket(b, ca) { b->gc_gen = b->gen; - if (!atomic_read(&b->pin)) + if (!atomic_read(&b->pin)) { SET_GC_MARK(b, GC_MARK_RECLAIMABLE); + SET_GC_SECTORS_USED(b, 0); + } } for (d = c->devices; From 5e20e8b3713b8a5941891db2626c9eecd3c0888c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 11 Jul 2013 22:42:14 -0700 Subject: [PATCH 53/80] bcache: Journal replay fix commit faa5673617656ee58369a3cfe4a312cfcdc59c81 upstream. The journal replay code starts by finding something that looks like a valid journal entry, then it does a binary search over the unchecked region of the journal for the journal entries with the highest sequence numbers. Trouble is, the logic was wrong - journal_read_bucket() returns true if it found journal entries we need, but if the range of journal entries we're looking for loops around the end of the journal - in that case journal_read_bucket() could return true when it hadn't found the highest sequence number we'd seen yet, and in that case the binary search did the wrong thing. Whoops. Signed-off-by: Kent Overstreet Signed-off-by: Greg Kroah-Hartman --- drivers/md/bcache/journal.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 8c8dfdcd9d4c..8a54d3b4f517 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -182,9 +182,14 @@ bsearch: pr_debug("starting binary search, l %u r %u", l, r); while (l + 1 < r) { - m = (l + r) >> 1; + seq = list_entry(list->prev, struct journal_replay, + list)->j.seq; - if (read_bucket(m)) + m = (l + r) >> 1; + read_bucket(m); + + if (seq != list_entry(list->prev, struct journal_replay, + list)->j.seq) l = m; else r = m; From f46ef77da1b61d05dc5f7cbb111df53d92b887a2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 19 Jul 2013 12:28:25 +0200 Subject: [PATCH 54/80] EDAC: Fix lockdep splat commit 88d84ac97378c2f1d5fec9af1e8b7d9a662d6b00 upstream. Fix the following: BUG: key ffff88043bdd0330 not in .data! ------------[ cut here ]------------ WARNING: at kernel/lockdep.c:2987 lockdep_init_map+0x565/0x5a0() DEBUG_LOCKS_WARN_ON(1) Modules linked in: glue_helper sb_edac(+) edac_core snd acpi_cpufreq lrw gf128mul ablk_helper iTCO_wdt evdev i2c_i801 dcdbas button cryptd pcspkr iTCO_vendor_support usb_common lpc_ich mfd_core soundcore mperf processor microcode CPU: 2 PID: 599 Comm: modprobe Not tainted 3.10.0 #1 Hardware name: Dell Inc. Precision T3600/0PTTT9, BIOS A08 01/24/2013 0000000000000009 ffff880439a1d920 ffffffff8160a9a9 ffff880439a1d958 ffffffff8103d9e0 ffff88043af4a510 ffffffff81a16e11 0000000000000000 ffff88043bdd0330 0000000000000000 ffff880439a1d9b8 ffffffff8103dacc Call Trace: dump_stack warn_slowpath_common warn_slowpath_fmt lockdep_init_map ? trace_hardirqs_on_caller ? trace_hardirqs_on debug_mutex_init __mutex_init bus_register edac_create_sysfs_mci_device edac_mc_add_mc sbridge_probe pci_device_probe driver_probe_device __driver_attach ? driver_probe_device bus_for_each_dev driver_attach bus_add_driver driver_register __pci_register_driver ? 0xffffffffa0010fff sbridge_init ? 0xffffffffa0010fff do_one_initcall load_module ? unset_module_init_ro_nx SyS_init_module tracesys ---[ end trace d24a70b0d3ddf733 ]--- EDAC MC0: Giving out device to 'sbridge_edac.c' 'Sandy Bridge Socket#0': DEV 0000:3f:0e.0 EDAC sbridge: Driver loaded. What happens is that bus_register needs a statically allocated lock_key because the last is handed in to lockdep. However, struct mem_ctl_info embeds struct bus_type (the whole struct, not a pointer to it) and the whole thing gets dynamically allocated. Fix this by using a statically allocated struct bus_type for the MC bus. Signed-off-by: Borislav Petkov Acked-by: Mauro Carvalho Chehab Cc: Markus Trippelsdorf Signed-off-by: Tony Luck Signed-off-by: Greg Kroah-Hartman --- drivers/edac/edac_mc.c | 9 +++++++++ drivers/edac/edac_mc_sysfs.c | 28 +++++++++++++++------------- drivers/edac/i5100_edac.c | 2 +- include/linux/edac.h | 7 ++++++- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 27e86d938262..89e109022d78 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -48,6 +48,8 @@ static LIST_HEAD(mc_devices); */ static void const *edac_mc_owner; +static struct bus_type mc_bus[EDAC_MAX_MCS]; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -723,6 +725,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) int ret = -EINVAL; edac_dbg(0, "\n"); + if (mci->mc_idx >= EDAC_MAX_MCS) { + pr_warn_once("Too many memory controllers: %d\n", mci->mc_idx); + return -ENODEV; + } + #ifdef CONFIG_EDAC_DEBUG if (edac_debug_level >= 3) edac_mc_dump_mci(mci); @@ -762,6 +769,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) /* set load time so that error rate can be tracked */ mci->start_time = jiffies; + mci->bus = &mc_bus[mci->mc_idx]; + if (edac_create_sysfs_mci_device(mci)) { edac_mc_printk(mci, KERN_WARNING, "failed to create sysfs device\n"); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 67610a6ebf87..c4d700a577d2 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -370,7 +370,7 @@ static int edac_create_csrow_object(struct mem_ctl_info *mci, return -ENODEV; csrow->dev.type = &csrow_attr_type; - csrow->dev.bus = &mci->bus; + csrow->dev.bus = mci->bus; device_initialize(&csrow->dev); csrow->dev.parent = &mci->dev; csrow->mci = mci; @@ -605,7 +605,7 @@ static int edac_create_dimm_object(struct mem_ctl_info *mci, dimm->mci = mci; dimm->dev.type = &dimm_attr_type; - dimm->dev.bus = &mci->bus; + dimm->dev.bus = mci->bus; device_initialize(&dimm->dev); dimm->dev.parent = &mci->dev; @@ -975,11 +975,13 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) * The memory controller needs its own bus, in order to avoid * namespace conflicts at /sys/bus/edac. */ - mci->bus.name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); - if (!mci->bus.name) + mci->bus->name = kasprintf(GFP_KERNEL, "mc%d", mci->mc_idx); + if (!mci->bus->name) return -ENOMEM; - edac_dbg(0, "creating bus %s\n", mci->bus.name); - err = bus_register(&mci->bus); + + edac_dbg(0, "creating bus %s\n", mci->bus->name); + + err = bus_register(mci->bus); if (err < 0) return err; @@ -988,7 +990,7 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) device_initialize(&mci->dev); mci->dev.parent = mci_pdev; - mci->dev.bus = &mci->bus; + mci->dev.bus = mci->bus; dev_set_name(&mci->dev, "mc%d", mci->mc_idx); dev_set_drvdata(&mci->dev, mci); pm_runtime_forbid(&mci->dev); @@ -997,8 +999,8 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) err = device_add(&mci->dev); if (err < 0) { edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1064,8 +1066,8 @@ fail: } fail2: device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); return err; } @@ -1098,8 +1100,8 @@ void edac_unregister_sysfs(struct mem_ctl_info *mci) { edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev)); device_unregister(&mci->dev); - bus_unregister(&mci->bus); - kfree(mci->bus.name); + bus_unregister(mci->bus); + kfree(mci->bus->name); } static void mc_attr_release(struct device *dev) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 1b635178cc44..157b934e8ce3 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -974,7 +974,7 @@ static int i5100_setup_debugfs(struct mem_ctl_info *mci) if (!i5100_debugfs) return -ENODEV; - priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + priv->debugfs = debugfs_create_dir(mci->bus->name, i5100_debugfs); if (!priv->debugfs) return -ENOMEM; diff --git a/include/linux/edac.h b/include/linux/edac.h index 0b763276f619..5c6d7fbaf89e 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -622,7 +622,7 @@ struct edac_raw_error_desc { */ struct mem_ctl_info { struct device dev; - struct bus_type bus; + struct bus_type *bus; struct list_head link; /* for global list of mem_ctl_info structs */ @@ -742,4 +742,9 @@ struct mem_ctl_info { #endif }; +/* + * Maximum number of memory controllers in the coherent fabric. + */ +#define EDAC_MAX_MCS 16 + #endif From 72ffadbae16297ac86e31c759450e0e395209903 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Sat, 29 Jun 2013 03:53:02 +0530 Subject: [PATCH 55/80] SCSI: mpt3sas: Infinite loops can occur if MPI2_IOCSTATUS_CONFIG_INVALID_PAGE is not returned commit 14be49ac965ebd3f8561d57e01ddb22f93f9b454 upstream. Infinite loop can occur if IOCStatus is not equal to MPI2_IOCSTATUS_CONFIG_INVALID_PAGE value in the while loops in functions _scsih_search_responding_sas_devices, _scsih_search_responding_raid_devices and _scsih_search_responding_expanders So, Instead of checking for MPI2_IOCSTATUS_CONFIG_INVALID_PAGE value, in this patch code is modified to check for IOCStatus not equals to MPI2_IOCSTATUS_SUCCESS to break the while loop. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index dcbf7c880cb2..1fb5c6bcb786 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -6392,7 +6392,7 @@ _scsih_search_responding_sas_devices(struct MPT3SAS_ADAPTER *ioc) handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; handle = le16_to_cpu(sas_device_pg0.DevHandle); device_info = le32_to_cpu(sas_device_pg0.DeviceInfo); @@ -6494,7 +6494,7 @@ _scsih_search_responding_raid_devices(struct MPT3SAS_ADAPTER *ioc) &volume_pg1, MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE, handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; handle = le16_to_cpu(volume_pg1.DevHandle); @@ -6518,7 +6518,7 @@ _scsih_search_responding_raid_devices(struct MPT3SAS_ADAPTER *ioc) phys_disk_num))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; phys_disk_num = pd_pg0.PhysDiskNum; handle = le16_to_cpu(pd_pg0.DevHandle); @@ -6597,7 +6597,7 @@ _scsih_search_responding_expanders(struct MPT3SAS_ADAPTER *ioc) ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) + if (ioc_status != MPI2_IOCSTATUS_SUCCESS) break; handle = le16_to_cpu(expander_pg0.DevHandle); @@ -6742,8 +6742,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) MPI2_SAS_EXPAND_PGAD_FORM_GET_NEXT_HNDL, handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from expander scan: " \ "ioc_status(0x%04x), loginfo(0x%08x)\n", @@ -6787,8 +6785,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) phys_disk_num))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from phys disk scan: "\ "ioc_status(0x%04x), loginfo(0x%08x)\n", @@ -6854,8 +6850,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) &volume_pg1, MPI2_RAID_VOLUME_PGAD_FORM_GET_NEXT_HANDLE, handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from volume scan: " \ "ioc_status(0x%04x), loginfo(0x%08x)\n", @@ -6914,8 +6908,6 @@ _scsih_scan_for_devices_after_reset(struct MPT3SAS_ADAPTER *ioc) handle))) { ioc_status = le16_to_cpu(mpi_reply.IOCStatus) & MPI2_IOCSTATUS_MASK; - if (ioc_status == MPI2_IOCSTATUS_CONFIG_INVALID_PAGE) - break; if (ioc_status != MPI2_IOCSTATUS_SUCCESS) { pr_info(MPT3SAS_FMT "\tbreak from end device scan:"\ " ioc_status(0x%04x), loginfo(0x%08x)\n", From e90c1dc2c6298f7d369804aa7873d1c4c434f9d3 Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Sat, 29 Jun 2013 03:52:03 +0530 Subject: [PATCH 56/80] SCSI: mpt3sas: fix for kernel panic when driver loads with HBA conected to non LUN 0 configured expander commit b65cfedf4560af65305bd7b3b9f26c02c6fb3660 upstream. With some enclosures when LUN 0 is not created but LUN 1 or LUN X is created then SCSI scan procedure calls target_alloc, slave_alloc call back functions for LUN 0 and slave_destory() for same LUN 0. In these kind of cases within slave_destroy, pointer to scsi_target in _sas_device structure is set to NULL, following which when slave_alloc for LUN 1 is called then starget would not be set properly for this LUN. So, scsi_target pointer pointing to NULL value would lead to a crash later in the discovery procedure. To solve this issue set the sas_device's scsi_target pointer to scsi_device's scsi_target if it is NULL earlier in slave_alloc callback function. Signed-off-by: Sreekanth Reddy Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 1fb5c6bcb786..f8c4b8564251 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1273,6 +1273,7 @@ _scsih_slave_alloc(struct scsi_device *sdev) struct MPT3SAS_DEVICE *sas_device_priv_data; struct scsi_target *starget; struct _raid_device *raid_device; + struct _sas_device *sas_device; unsigned long flags; sas_device_priv_data = kzalloc(sizeof(struct scsi_device), GFP_KERNEL); @@ -1301,6 +1302,19 @@ _scsih_slave_alloc(struct scsi_device *sdev) spin_unlock_irqrestore(&ioc->raid_device_lock, flags); } + if (!(sas_target_priv_data->flags & MPT_TARGET_FLAGS_VOLUME)) { + spin_lock_irqsave(&ioc->sas_device_lock, flags); + sas_device = mpt3sas_scsih_sas_device_find_by_sas_address(ioc, + sas_target_priv_data->sas_address); + if (sas_device && (sas_device->starget == NULL)) { + sdev_printk(KERN_INFO, sdev, + "%s : sas_device->starget set to starget @ %d\n", + __func__, __LINE__); + sas_device->starget = starget; + } + spin_unlock_irqrestore(&ioc->sas_device_lock, flags); + } + return 0; } From 4ac697c53bf856ef1c8d32f8fd903987e0494311 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 21 Nov 2012 09:54:48 +0100 Subject: [PATCH 57/80] SCSI: megaraid_sas: fix memory leak if SGL has zero length entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7a6a731bd00ca90d0e250867c3b9c05b5ff0fa49 upstream. commit 98cb7e44 ([SCSI] megaraid_sas: Sanity check user supplied length before passing it to dma_alloc_coherent()) introduced a memory leak. Memory allocated for entries following zero length SGL entries will not be freed. Reference: http://bugs.debian.org/688198 Signed-off-by: Bjørn Mork Acked-by: Adam Radford Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/megaraid/megaraid_sas_base.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 3a9ddae86f1f..89178b833b52 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -4852,10 +4852,12 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance, sense, sense_handle); } - for (i = 0; i < ioc->sge_count && kbuff_arr[i]; i++) { - dma_free_coherent(&instance->pdev->dev, - kern_sge32[i].length, - kbuff_arr[i], kern_sge32[i].phys_addr); + for (i = 0; i < ioc->sge_count; i++) { + if (kbuff_arr[i]) + dma_free_coherent(&instance->pdev->dev, + kern_sge32[i].length, + kbuff_arr[i], + kern_sge32[i].phys_addr); } megasas_return_cmd(instance, cmd); From 3c19c4f9b4eeabe75eddbec1a0e8599c940e8ec8 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Tue, 11 Jun 2013 08:49:50 +0000 Subject: [PATCH 58/80] lib/Kconfig.debug: Restrict FRAME_POINTER for MIPS commit 25c87eae1725ed77a8b44d782a86abdc279b4ede upstream. FAULT_INJECTION_STACKTRACE_FILTER selects FRAME_POINTER but that symbol is not available for MIPS. Fixes the following problem on a randconfig: warning: (LOCKDEP && FAULT_INJECTION_STACKTRACE_FILTER && LATENCYTOP && KMEMCHECK) selects FRAME_POINTER which has unmet direct dependencies (DEBUG_KERNEL && (CRIS || M68K || FRV || UML || AVR32 || SUPERH || BLACKFIN || MN10300 || METAG) || ARCH_WANT_FRAME_POINTERS) Signed-off-by: Markos Chandras Acked-by: Steven J. Hill Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/5441/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 566cf2bc08ea..74fdc5cf4adc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1272,7 +1272,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND help Provide stacktrace filter for fault-injection capabilities From d547d18d1920026dd08d9c19e8da403cbea62319 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 29 Jun 2013 15:33:35 +0200 Subject: [PATCH 59/80] usb: serial: option: blacklist ONDA MT689DC QMI interface commit 3d1a69e726406ab662ab88fa30a3a05ed404334d upstream. Prevent the option driver from binding itself to the QMI/WWAN interface, making it unusable by the proper driver. Signed-off-by: enrico Mioso Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 5dd857de05b0..1fc01efca909 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -817,7 +817,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0017, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0018, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0019, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&net_intf3_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0020, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0021, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, From 6fd55cb457cbcc24d8283ae87c9345c72645c93f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Fri, 28 Jun 2013 17:15:25 +0200 Subject: [PATCH 60/80] usb: option: add TP-LINK MA260 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 94190301ffa059c2d127b3a67ec5d161d5c62681 upstream. Signed-off-by: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1fc01efca909..62c87efe703b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1333,6 +1333,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) }, { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, + { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ + .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(CHANGHONG_VENDOR_ID, CHANGHONG_PRODUCT_CH690) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x02, 0x01) }, /* D-Link DWM-156 (variant) */ { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d01, 0xff, 0x00, 0x00) }, /* D-Link DWM-156 (variant) */ From 177f82a9d23d9ba1a7e8179cca296fd17ebfd9d0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 10 Jul 2013 12:25:02 -0500 Subject: [PATCH 61/80] usb: serial: option: add Olivetti Olicard 200 commit 4cf76df06ecc852633ed927d91e01c83c33bc331 upstream. Speaks AT on interfaces 5 (command & PPP) and 3 (secondary), other interface protocols are unknown. Signed-off-by: Dan Williams Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 62c87efe703b..d68f89eda805 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -341,6 +341,7 @@ static void option_instat_callback(struct urb *urb); #define OLIVETTI_VENDOR_ID 0x0b3c #define OLIVETTI_PRODUCT_OLICARD100 0xc000 #define OLIVETTI_PRODUCT_OLICARD145 0xc003 +#define OLIVETTI_PRODUCT_OLICARD200 0xc005 /* Celot products */ #define CELOT_VENDOR_ID 0x211f @@ -1257,6 +1258,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD100) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, + { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ From 5ac2d863c312ec42bb6778988577ada67f09ba85 Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Sat, 13 Jul 2013 18:54:14 +0200 Subject: [PATCH 62/80] usb: serial: option.c: remove ONDA MT825UP product ID fromdriver commit 878c69aae986ae97084458c0183a8c0a059865b1 upstream. Some (very few) early devices like mine, where not exposting a proper CDC descriptor. This was fixed with an immediate firmware update from the vendor, and pre-installed on newer devices. So actual devices can be driven by cdc_acm.c + cdc_ether.c. Signed-off-by: Enrico Mioso Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index d68f89eda805..b2eba3edf34d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -347,12 +347,6 @@ static void option_instat_callback(struct urb *urb); #define CELOT_VENDOR_ID 0x211f #define CELOT_PRODUCT_CT680M 0x6801 -/* ONDA Communication vendor id */ -#define ONDA_VENDOR_ID 0x1ee8 - -/* ONDA MT825UP HSDPA 14.2 modem */ -#define ONDA_MT825UP 0x000b - /* Samsung products */ #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 @@ -1260,7 +1254,6 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD145) }, { USB_DEVICE(OLIVETTI_VENDOR_ID, OLIVETTI_PRODUCT_OLICARD200) }, { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ - { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) }, { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) }, From 490217ce2c9ab2318d9a4613547943d98706034b Mon Sep 17 00:00:00 2001 From: Daniil Bolsun Date: Fri, 19 Jul 2013 10:21:23 +0300 Subject: [PATCH 63/80] USB: option: append Petatel NP10T device to GSM modems list commit c38e83b6cc2adf80e3f091fd92cfbeacc9748347 upstream. This patch was tested on 3.10.1 kernel. Same models of Petatel NP10T modems have different device IDs. Unfortunately they have no additional revision information on a board which may treat them as different devices. Currently I've seen only two NP10T devices with various IDs. Possibly Petatel NP10T list will be appended upon devices with new IDs will appear. Signed-off-by: Daniil Bolsun Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b2eba3edf34d..884c1ea1d3a4 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -439,7 +439,8 @@ static void option_instat_callback(struct urb *urb); /* Hyundai Petatel Inc. products */ #define PETATEL_VENDOR_ID 0x1ff4 -#define PETATEL_PRODUCT_NP10T 0x600e +#define PETATEL_PRODUCT_NP10T_600A 0x600a +#define PETATEL_PRODUCT_NP10T_600E 0x600e /* TP-LINK Incorporated products */ #define TPLINK_VENDOR_ID 0x2357 @@ -1325,7 +1326,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(MEDIATEK_VENDOR_ID, MEDIATEK_PRODUCT_DC_4COM2, 0xff, 0x00, 0x00) }, { USB_DEVICE(CELLIENT_VENDOR_ID, CELLIENT_PRODUCT_MEN200) }, - { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600A) }, + { USB_DEVICE(PETATEL_VENDOR_ID, PETATEL_PRODUCT_NP10T_600E) }, { USB_DEVICE(TPLINK_VENDOR_ID, TPLINK_PRODUCT_MA180), .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE(TPLINK_VENDOR_ID, 0x9000), /* TP-Link MA260 */ From 9cca0118ba973e04573cb3221c246422e917ebe6 Mon Sep 17 00:00:00 2001 From: "Alexandr \\\\\\\"Sky\\\\\\\" Ivanov" Date: Tue, 23 Jul 2013 17:46:40 +0400 Subject: [PATCH 64/80] USB: option: add D-Link DWM-152/C1 and DWM-156/C1 commit ca24763588844b14f019ffc45c7df6d9e8f932c5 upstream. Adding support for D-Link DWM-152/C1 and DWM-156/C1 devices. DWM-152/C1: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=07d1 ProdID=3e01 Rev= 0.00 S: Product=USB Configuration S: SerialNumber=1234567890ABCDEF C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms DWM-156/C1: T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 8 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=07d1 ProdID=3e02 Rev= 0.00 S: Product=DataCard Device S: SerialNumber=1234567890ABCDEF C:* #Ifs= 5 Cfg#= 1 Atr=e0 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=81(I) Atr=03(Int.) MxPS= 64 Ivl=2ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=4ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms Signed-off-by: Alexandr Ivanov Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 884c1ea1d3a4..254e2d0498e9 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1339,6 +1339,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d02, 0xff, 0x00, 0x00) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x02, 0x01) }, { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x7d03, 0xff, 0x00, 0x00) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); From bb2177cd04ac81f2922cdd614f79b586556f92ad Mon Sep 17 00:00:00 2001 From: Enrico Mioso Date: Thu, 25 Jul 2013 02:01:39 +0200 Subject: [PATCH 65/80] usb: serial: option: Add ONYX 3G device support commit 63b5df963f52ccbab6fabedf05b7ac6b465789a4 upstream. This patch adds support for the ONYX 3G device (version 1) from ALFA NETWORK. Signed-off-by: Enrico Mioso Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 254e2d0498e9..1cf6f125f5f0 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -778,6 +778,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC650) }, { USB_DEVICE(KYOCERA_VENDOR_ID, KYOCERA_PRODUCT_KPC680) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6280) }, /* BP3-USB & BP3-EXT HSDPA */ { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6008) }, From ee2f769fb4a5a317b1558d0bbcccb89b8c2697a6 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Wed, 24 Jul 2013 13:23:51 +0900 Subject: [PATCH 66/80] ARM: S3C24XX: Add missing clkdev entries for s3c2440 UART commit d817468c4b2892b9468e2a0c92116e38a3a61370 upstream. This patch restores serial port operation which has been broken since commit 60e93575476f ("serial: samsung: enable clock before clearing pending interrupts during init") That commit only uncovered the real issue which was missing clkdev entries for the "uart" clocks on S3C2440. It went unnoticed so far because return value of clk API calls were not being checked at all in the samsung serial port driver. This patch should be backported to at least 3.10 stable kernel, since the serial port has not been working on s3c2440 since 3.10-rc5. Signed-off-by: Sylwester Nawrocki Cc: Chander Kashyap [on S3C2440 SoC based Mini2440 board] Tested-by: Sylwester Nawrocki Reviewed-by: Tomasz Figa Tested-by: Juergen Beisert Signed-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-s3c24xx/clock-s3c2410.c | 161 +++++++++++++-------- arch/arm/mach-s3c24xx/clock-s3c2440.c | 3 + arch/arm/plat-samsung/include/plat/clock.h | 5 + 3 files changed, 106 insertions(+), 63 deletions(-) diff --git a/arch/arm/mach-s3c24xx/clock-s3c2410.c b/arch/arm/mach-s3c24xx/clock-s3c2410.c index 34fffdf6fc1d..564553694b54 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2410.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2410.c @@ -119,66 +119,101 @@ static struct clk init_clocks_off[] = { } }; -static struct clk init_clocks[] = { - { - .name = "lcd", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_LCDC, - }, { - .name = "gpio", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_GPIO, - }, { - .name = "usb-host", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_USBH, - }, { - .name = "usb-device", - .parent = &clk_h, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_USBD, - }, { - .name = "timers", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_PWMT, - }, { - .name = "uart", - .devname = "s3c2410-uart.0", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART0, - }, { - .name = "uart", - .devname = "s3c2410-uart.1", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART1, - }, { - .name = "uart", - .devname = "s3c2410-uart.2", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_UART2, - }, { - .name = "rtc", - .parent = &clk_p, - .enable = s3c2410_clkcon_enable, - .ctrlbit = S3C2410_CLKCON_RTC, - }, { - .name = "watchdog", - .parent = &clk_p, - .ctrlbit = 0, - }, { - .name = "usb-bus-host", - .parent = &clk_usb_bus, - }, { - .name = "usb-bus-gadget", - .parent = &clk_usb_bus, - }, +static struct clk clk_lcd = { + .name = "lcd", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_LCDC, +}; + +static struct clk clk_gpio = { + .name = "gpio", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_GPIO, +}; + +static struct clk clk_usb_host = { + .name = "usb-host", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_USBH, +}; + +static struct clk clk_usb_device = { + .name = "usb-device", + .parent = &clk_h, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_USBD, +}; + +static struct clk clk_timers = { + .name = "timers", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_PWMT, +}; + +struct clk s3c24xx_clk_uart0 = { + .name = "uart", + .devname = "s3c2410-uart.0", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART0, +}; + +struct clk s3c24xx_clk_uart1 = { + .name = "uart", + .devname = "s3c2410-uart.1", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART1, +}; + +struct clk s3c24xx_clk_uart2 = { + .name = "uart", + .devname = "s3c2410-uart.2", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_UART2, +}; + +static struct clk clk_rtc = { + .name = "rtc", + .parent = &clk_p, + .enable = s3c2410_clkcon_enable, + .ctrlbit = S3C2410_CLKCON_RTC, +}; + +static struct clk clk_watchdog = { + .name = "watchdog", + .parent = &clk_p, + .ctrlbit = 0, +}; + +static struct clk clk_usb_bus_host = { + .name = "usb-bus-host", + .parent = &clk_usb_bus, +}; + +static struct clk clk_usb_bus_gadget = { + .name = "usb-bus-gadget", + .parent = &clk_usb_bus, +}; + +static struct clk *init_clocks[] = { + &clk_lcd, + &clk_gpio, + &clk_usb_host, + &clk_usb_device, + &clk_timers, + &s3c24xx_clk_uart0, + &s3c24xx_clk_uart1, + &s3c24xx_clk_uart2, + &clk_rtc, + &clk_watchdog, + &clk_usb_bus_host, + &clk_usb_bus_gadget, }; /* s3c2410_baseclk_add() @@ -195,7 +230,6 @@ int __init s3c2410_baseclk_add(void) { unsigned long clkslow = __raw_readl(S3C2410_CLKSLOW); unsigned long clkcon = __raw_readl(S3C2410_CLKCON); - struct clk *clkp; struct clk *xtal; int ret; int ptr; @@ -207,8 +241,9 @@ int __init s3c2410_baseclk_add(void) /* register clocks from clock array */ - clkp = init_clocks; - for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++, clkp++) { + for (ptr = 0; ptr < ARRAY_SIZE(init_clocks); ptr++) { + struct clk *clkp = init_clocks[ptr]; + /* ensure that we note the clock state */ clkp->usage = clkcon & clkp->ctrlbit ? 1 : 0; diff --git a/arch/arm/mach-s3c24xx/clock-s3c2440.c b/arch/arm/mach-s3c24xx/clock-s3c2440.c index 1069b5680826..aaf006d1d6dc 100644 --- a/arch/arm/mach-s3c24xx/clock-s3c2440.c +++ b/arch/arm/mach-s3c24xx/clock-s3c2440.c @@ -166,6 +166,9 @@ static struct clk_lookup s3c2440_clk_lookup[] = { CLKDEV_INIT(NULL, "clk_uart_baud1", &s3c24xx_uclk), CLKDEV_INIT(NULL, "clk_uart_baud2", &clk_p), CLKDEV_INIT(NULL, "clk_uart_baud3", &s3c2440_clk_fclk_n), + CLKDEV_INIT("s3c2440-uart.0", "uart", &s3c24xx_clk_uart0), + CLKDEV_INIT("s3c2440-uart.1", "uart", &s3c24xx_clk_uart1), + CLKDEV_INIT("s3c2440-uart.2", "uart", &s3c24xx_clk_uart2), CLKDEV_INIT("s3c2440-camif", "camera", &s3c2440_clk_cam_upll), }; diff --git a/arch/arm/plat-samsung/include/plat/clock.h b/arch/arm/plat-samsung/include/plat/clock.h index a62753dc15ba..df45d6edc98d 100644 --- a/arch/arm/plat-samsung/include/plat/clock.h +++ b/arch/arm/plat-samsung/include/plat/clock.h @@ -83,6 +83,11 @@ extern struct clk clk_ext; extern struct clksrc_clk clk_epllref; extern struct clksrc_clk clk_esysclk; +/* S3C24XX UART clocks */ +extern struct clk s3c24xx_clk_uart0; +extern struct clk s3c24xx_clk_uart1; +extern struct clk s3c24xx_clk_uart2; + /* S3C64XX specific clocks */ extern struct clk clk_h2; extern struct clk clk_27m; From 1b1eeeb02ec556c6a31a8383f86f1a7bf5419111 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Thu, 27 Jun 2013 22:42:36 -0400 Subject: [PATCH 67/80] ARM: footbridge: fix overlapping PCI mappings commit 6287e7319870ec949fb809e4eb4154c2b05b221f upstream. Commit 8ef6e6201b26cb9fde79c1baa08145af6aca2815 (ARM: footbridge: use fixed PCI i/o mapping) broke booting on my netwinder. Before that, everything boots fine. Since then, it crashes on boot. With earlyprintk, I see it BUG-ing like so: kernel BUG at lib/ioremap.c:27! Internal error: Oops - BUG: 0 [#1] ARM ... [] (ioremap_page_range+0x128/0x154) from [] (dc21285_setup+0xd0/0x114) [] (dc21285_setup+0xd0/0x114) from [] (pci_common_init+0xa0/0x298) [] (pci_common_init+0xa0/0x298) from [] (netwinder_pci_init+0xc/0x18) [] (netwinder_pci_init+0xc/0x18) from [] (do_one_initcall+0xb4/0x180) ... Russell points out it's because of overlapping PCI mappings that was added with the aforementioned commit. Rob thought the code would re-use the static mapping, but that turns out to not be the case and instead hits the BUG further down. After deleting this hunk as suggested by Russel, the system boots up fine again and all my PCI devices work (IDE, ethernet, the DC21285). Signed-off-by: Mike Frysinger Acked-by: Rob Herring Signed-off-by: Olof Johansson Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-footbridge/dc21285.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm/mach-footbridge/dc21285.c b/arch/arm/mach-footbridge/dc21285.c index a7cd2cf5e08d..3490a24f969e 100644 --- a/arch/arm/mach-footbridge/dc21285.c +++ b/arch/arm/mach-footbridge/dc21285.c @@ -276,8 +276,6 @@ int __init dc21285_setup(int nr, struct pci_sys_data *sys) sys->mem_offset = DC21285_PCI_MEM; - pci_ioremap_io(0, DC21285_PCI_IO); - pci_add_resource_offset(&sys->resources, &res[0], sys->mem_offset); pci_add_resource_offset(&sys->resources, &res[1], sys->mem_offset); From 29ab402957ba9f329b901c69cb4179fafbfd0cf4 Mon Sep 17 00:00:00 2001 From: Luiz Angelo Daros de Luca Date: Mon, 1 Jul 2013 23:56:25 -0300 Subject: [PATCH 68/80] usb: serial: cp210x: Add USB ID for Netgear Switches embedded serial adapter commit 90625070c4253377025878c4e82feed8b35c7116 upstream. This adds NetGear Managed Switch M4100 series, M5300 series, M7100 series USB ID (0846:0110) to the cp210x driver. Without this, the serial adapter is not recognized in Linux. Description was obtained from an Netgear Eng. Signed-off-by: Luiz Angelo Daros de Luca Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 2c659553c07c..de63ed19a806 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -53,6 +53,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0489, 0xE003) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */ + { USB_DEVICE(0x0846, 0x1100) }, /* NetGear Managed Switch M4100 series, M5300 series, M7100 series */ { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */ { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */ { USB_DEVICE(0x0BED, 0x1100) }, /* MEI (TM) Cashflow-SC Bill/Voucher Acceptor */ From 35976473f0c19ec3e3b88b0d2ba312def35536a7 Mon Sep 17 00:00:00 2001 From: Sami Rahman Date: Mon, 8 Jul 2013 14:28:55 -0400 Subject: [PATCH 69/80] USB: cp210x: add MMB and PI ZigBee USB Device Support commit 7681156982026ebf7eafd7301eb0374d7648d068 upstream. Added support for MMB Networks and Planet Innovation Ingeni ZigBee USB devices using customized Silicon Labs' CP210x.c USB to UART bridge drivers with PIDs: 88A4, 88A5. Signed-off-by: Sami Rahman Tested-by: Sami Rahman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index de63ed19a806..dcaa724b4be9 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -119,6 +119,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */ { USB_DEVICE(0x10C4, 0x8664) }, /* AC-Services CAN-IF */ { USB_DEVICE(0x10C4, 0x8665) }, /* AC-Services OBD-IF */ + { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */ { USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */ From 95405bdbff81ce7f491145b2ea0d4091c71c2af7 Mon Sep 17 00:00:00 2001 From: Barry Grussling Date: Fri, 19 Jul 2013 14:46:12 -0700 Subject: [PATCH 70/80] usb: cp210x support SEL C662 Vendor/Device commit b579fa52f6be0b4157ca9cc5e94d44a2c89a7e95 upstream. This patch adds support for the Schweitzer Engineering Laboratories C662 USB cable based off the CP210x driver. Signed-off-by: Barry Grussling Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index dcaa724b4be9..c90d960e091b 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -151,6 +151,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */ + { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ { USB_DEVICE(0x1BE3, 0x07A6) }, /* WAGO 750-923 USB Service Cable */ { USB_DEVICE(0x1E29, 0x0102) }, /* Festo CPX-USB */ { USB_DEVICE(0x1E29, 0x0501) }, /* Festo CMSP */ From c07ae685f761d7b6850f502a34964005428c181a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 15 Jul 2013 00:09:19 -0400 Subject: [PATCH 71/80] ext4: fix error handling in ext4_ext_truncate() commit 8acd5e9b1217e58a57124d9e225afa12efeae20d upstream. Previously ext4_ext_truncate() was ignoring potential error returns from ext4_es_remove_extent() and ext4_ext_remove_space(). This can lead to the on-diks extent tree and the extent status tree cache getting out of sync, which is particuarlly bad, and can lead to file system corruption and potential data loss. Signed-off-by: "Theodore Ts'o" Signed-off-by: Greg Kroah-Hartman --- fs/ext4/extents.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e49da585739f..fddf3d957004 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4386,9 +4386,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, From 7b4fc5f531f64ce52e5f421c9bc285ccf30ccac8 Mon Sep 17 00:00:00 2001 From: Liu ShuoX Date: Thu, 11 Jul 2013 16:03:45 +0800 Subject: [PATCH 72/80] PM / Sleep: avoid 'autosleep' in shutdown progress commit e5248a111bf4048a9f3fab1a9c94c4630a10592a upstream. Prevent automatic system suspend from happening during system shutdown by making try_to_suspend() check system_state and return immediately if it is not SYSTEM_RUNNING. This prevents the following breakage from happening (scenario from Zhang Yanmin): Kernel starts shutdown and calls all device driver's shutdown callback. When a driver's shutdown is called, the last wakelock is released and suspend-to-ram starts. However, as some driver's shut down callbacks already shut down devices and disabled runtime pm, the suspend-to-ram calls driver's suspend callback without noticing that device is already off and causes crash. [rjw: Changelog] Signed-off-by: Liu ShuoX Signed-off-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- kernel/power/autosleep.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/power/autosleep.c b/kernel/power/autosleep.c index c6422ffeda9a..9012ecf7b814 100644 --- a/kernel/power/autosleep.c +++ b/kernel/power/autosleep.c @@ -32,7 +32,8 @@ static void try_to_suspend(struct work_struct *work) mutex_lock(&autosleep_lock); - if (!pm_save_wakeup_count(initial_count)) { + if (!pm_save_wakeup_count(initial_count) || + system_state != SYSTEM_RUNNING) { mutex_unlock(&autosleep_lock); goto out; } From b534f8f9f56d5e800d4529f54e38be960fd064a2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 11 Jul 2013 18:00:59 +0200 Subject: [PATCH 73/80] media: saa7134: Fix unlocked snd_pcm_stop() call commit e6355ad7b1c6f70e2f48ae159f5658b441ccff95 upstream. snd_pcm_stop() must be called in the PCM substream lock context. Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/saa7134/saa7134-alsa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/media/pci/saa7134/saa7134-alsa.c b/drivers/media/pci/saa7134/saa7134-alsa.c index 10460fd3ce39..dbcdfbf8aed0 100644 --- a/drivers/media/pci/saa7134/saa7134-alsa.c +++ b/drivers/media/pci/saa7134/saa7134-alsa.c @@ -172,7 +172,9 @@ static void saa7134_irq_alsa_done(struct saa7134_dev *dev, dprintk("irq: overrun [full=%d/%d] - Blocks in %d\n",dev->dmasound.read_count, dev->dmasound.bufsize, dev->dmasound.blocks); spin_unlock(&dev->slock); + snd_pcm_stream_lock(dev->dmasound.substream); snd_pcm_stop(dev->dmasound.substream,SNDRV_PCM_STATE_XRUN); + snd_pcm_stream_unlock(dev->dmasound.substream); return; } From 61eeaa1b66e47443f27f4cf0809940fbe08142f8 Mon Sep 17 00:00:00 2001 From: Soeren Moch Date: Wed, 5 Jun 2013 21:26:23 -0300 Subject: [PATCH 74/80] media: dmxdev: remove dvb_ringbuffer_flush() on writer side commit 414abbd2cd4c2618895f02ed3a76ec6647281436 upstream. In dvb_ringbuffer lock-less synchronizationof reader and writer threads is done with separateread and write pointers. Sincedvb_ringbuffer_flush() modifies the read pointer, this function must not be called from the writer thread. This patch removes the dvb_ringbuffer_flush() calls in the dmxdev ringbuffer write functions, this fixes Oopses "Unable to handle kernel paging request" I could observe for the call chaindvb_demux_read ->dvb_dmxdev_buffer_read -> dvb_ringbuffer_read_user -> __copy_to_user (the reader side of the ringbuffer). The flush calls at the write side are not necessary anyway since ringbuffer_flush is also called in dvb_dmxdev_buffer_read() when an error condition is set in the ringbuffer. This patch should also be applied to stable kernels. Signed-off-by: Soeren Moch Reviewed-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/dvb-core/dmxdev.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index a1a3a5159d71..0b4616b87195 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -377,10 +377,8 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, ret = dvb_dmxdev_buffer_write(&dmxdevfilter->buffer, buffer2, buffer2_len); } - if (ret < 0) { - dvb_ringbuffer_flush(&dmxdevfilter->buffer); + if (ret < 0) dmxdevfilter->buffer.error = ret; - } if (dmxdevfilter->params.sec.flags & DMX_ONESHOT) dmxdevfilter->state = DMXDEV_STATE_DONE; spin_unlock(&dmxdevfilter->dev->lock); @@ -416,10 +414,8 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, ret = dvb_dmxdev_buffer_write(buffer, buffer1, buffer1_len); if (ret == buffer1_len) ret = dvb_dmxdev_buffer_write(buffer, buffer2, buffer2_len); - if (ret < 0) { - dvb_ringbuffer_flush(buffer); + if (ret < 0) buffer->error = ret; - } spin_unlock(&dmxdevfilter->dev->lock); wake_up(&buffer->queue); return 0; From c4d98e2535e0269fcd513cbb79e937ef3ccc3d01 Mon Sep 17 00:00:00 2001 From: David Jeffery Date: Wed, 10 Jul 2013 13:19:50 -0400 Subject: [PATCH 75/80] lockd: protect nlm_blocked access in nlmsvc_retry_blocked commit 1c327d962fc420aea046c16215a552710bde8231 upstream. In nlmsvc_retry_blocked, the check that the list is non-empty and acquiring the pointer of the first entry is unprotected by any lock. This allows a rare race condition when there is only one entry on the list. A function such as nlmsvc_grant_callback() can be called, which will temporarily remove the entry from the list. Between the list_empty() and list_entry(),the list may become empty, causing an invalid pointer to be used as an nlm_block, leading to a possible crash. This patch adds the nlm_block_lock around these calls to prevent concurrent use of the nlm_blocked list. This was a regression introduced by f904be9cc77f361d37d71468b13ff3d1a1823dea "lockd: Mostly remove BKL from the server". Signed-off-by: David Jeffery Cc: Bryan Schumaker Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/lockd/svclock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e703318c41df..8ebd3f551e0c 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -939,6 +939,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -948,6 +949,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -957,7 +959,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } From 2d06fa0f825ec1d7943a3bcceecbd85aa82ef27d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 5 Jul 2013 12:09:18 +0200 Subject: [PATCH 76/80] hrtimers: Move SMP function call to thread context commit 5ec2481b7b47a4005bb446d176e5d0257400c77d upstream. smp_call_function_* must not be called from softirq context. But clock_was_set() which calls on_each_cpu() is called from softirq context to implement a delayed clock_was_set() for the timer interrupt handler. Though that almost never gets invoked. A recent change in the resume code uses the softirq based delayed clock_was_set to support Xens resume mechanism. linux-next contains a new warning which warns if smp_call_function_* is called from softirq context which gets triggered by that Xen change. Fix this by moving the delayed clock_was_set() call to a work context. Reported-and-tested-by: Artem Savkov Reported-by: Sasha Levin Cc: David Vrabel Cc: Ingo Molnar Cc: H. Peter Anvin , Cc: Konrad Wilk Cc: John Stultz Cc: xen-devel@lists.xen.org Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- kernel/hrtimer.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index fd4b13b131f8..2288fbdada16 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -721,17 +721,20 @@ static int hrtimer_switch_to_hres(void) return 1; } +static void clock_was_set_work(struct work_struct *work) +{ + clock_was_set(); +} + +static DECLARE_WORK(hrtimer_work, clock_was_set_work); + /* - * Called from timekeeping code to reprogramm the hrtimer interrupt - * device. If called from the timer interrupt context we defer it to - * softirq context. + * Called from timekeeping and resume code to reprogramm the hrtimer + * interrupt device on all cpus. */ void clock_was_set_delayed(void) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - cpu_base->clock_was_set = 1; - __raise_softirq_irqoff(HRTIMER_SOFTIRQ); + schedule_work(&hrtimer_work); } #else @@ -780,8 +783,10 @@ void hrtimers_resume(void) WARN_ONCE(!irqs_disabled(), KERN_INFO "hrtimers_resume() called with IRQs enabled!"); + /* Retrigger on the local CPU */ retrigger_next_event(NULL); - timerfd_clock_was_set(); + /* And schedule a retrigger for all others */ + clock_was_set_delayed(); } static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) @@ -1432,13 +1437,6 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { - struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); - - if (cpu_base->clock_was_set) { - cpu_base->clock_was_set = 0; - clock_was_set(); - } - hrtimer_peek_ahead_timers(); } From 0d9230a39086b5f183fda1091eb078a67be01f05 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Jul 2013 08:02:25 +0200 Subject: [PATCH 77/80] ALSA: hda - Remove NO_PRESENCE bit override for Dell 1420n Laptop commit f3e351eef3a7fd1e36a3e18d4f2f069b00deb23c upstream. The quirk for Dell laptops with STAC9228 overrides the pin default config of NID 0x0f to the value with AC_DEFCFG_MISC_NO_PRESENCE bit on. I'm not quite sure why this was done so, but can guess that this was introduced for avoiding this to be muted by another headphone plug. Now, after transition to the generic parser, this workaround rather causes a problem (notably as unexpected speaker mutes) because the pin is seen as if it's always plugged in. Since the generic parser can handle multiple headphone plugging gracefully, we can get rid of this override now. Reported-and-tested-by: Eric Shattow Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 1d9d6427e0bf..486df8a867c8 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -3227,7 +3227,7 @@ static const struct hda_fixup stac927x_fixups[] = { /* configure the analog microphone on some laptops */ { 0x0c, 0x90a79130 }, /* correct the front output jack as a hp out */ - { 0x0f, 0x0227011f }, + { 0x0f, 0x0221101f }, /* correct the front input jack as a mic */ { 0x0e, 0x02a79130 }, {} From 5a0ecab6b7f55d7b2162e95c396ce778f2f2154c Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Fri, 19 Jul 2013 18:26:53 +0200 Subject: [PATCH 78/80] ALSA: usb-audio: 6fire: return correct XRUN indication commit be2f93a4c4981b3646b6f98f477154411b8516cb upstream. Return SNDRV_PCM_POS_XRUN (snd_pcm_uframes_t) instead of SNDRV_PCM_STATE_XRUN (snd_pcm_state_t) from the pointer function of 6fire, as expected by snd_pcm_update_hw_ptr0(). Caught by sparse. Signed-off-by: Eldad Zack Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/6fire/pcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/usb/6fire/pcm.c b/sound/usb/6fire/pcm.c index 8221ff2f209f..074aaf7a36db 100644 --- a/sound/usb/6fire/pcm.c +++ b/sound/usb/6fire/pcm.c @@ -543,7 +543,7 @@ static snd_pcm_uframes_t usb6fire_pcm_pointer( snd_pcm_uframes_t ret; if (rt->panic || !sub) - return SNDRV_PCM_STATE_XRUN; + return SNDRV_PCM_POS_XRUN; spin_lock_irqsave(&sub->lock, flags); ret = sub->dma_off; From 9c563a54c85bcd531cc671d9575d4dff64852124 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Jul 2013 07:58:02 +0200 Subject: [PATCH 79/80] ALSA: hda - Fix EAPD GPIO control for Sigmatel codecs commit 1ea9a69d1a36a5b62bf281ba8bb304fcac656dad upstream. The EAPD GPIO is dynamically turned on/off for some machines with Sigmatel codecs, but this didn't work as expected, and it resulted in spontaneous lost of speaker outputs per HP plugging or power-saving. This patch fixes the bug by simply including spec->eapd_mask into spec->gpio_mask and spec->gpio_data bits. Reported-and-tested-by: Eric Shattow Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_sigmatel.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 486df8a867c8..e849e1e0d7d9 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -417,9 +417,11 @@ static void stac_update_outputs(struct hda_codec *codec) val &= ~spec->eapd_mask; else val |= spec->eapd_mask; - if (spec->gpio_data != val) + if (spec->gpio_data != val) { + spec->gpio_data = val; stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, val); + } } } @@ -3608,20 +3610,18 @@ static int stac_parse_auto_config(struct hda_codec *codec) static int stac_init(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; - unsigned int gpio; int i; /* override some hints */ stac_store_hints(codec); /* set up GPIO */ - gpio = spec->gpio_data; /* turn on EAPD statically when spec->eapd_switch isn't set. * otherwise, unsol event will turn it on/off dynamically */ if (!spec->eapd_switch) - gpio |= spec->eapd_mask; - stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, gpio); + spec->gpio_data |= spec->eapd_mask; + stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data); snd_hda_gen_init(codec); @@ -3921,6 +3921,7 @@ static void stac_setup_gpio(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; + spec->gpio_mask |= spec->eapd_mask; if (spec->gpio_led) { if (!spec->vref_mute_led_nid) { spec->gpio_mask |= spec->gpio_led; From 6f7bb6bafb3153e9b9383f3f44a0f23fc00318d9 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 28 Jul 2013 16:30:49 -0700 Subject: [PATCH 80/80] Linux 3.10.4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index b5485529b6bf..b4df9b20c49f 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 3 +SUBLEVEL = 4 EXTRAVERSION = NAME = Unicycling Gorilla