Skip to content

Commit 59e0cd1

Browse files
Jon Paul Maloygregkh
authored andcommitted
tipc: fix socket timer deadlock
commit f1d048f24e66ba85d3dabf3d076cefa5f2b546b0 upstream. We sometimes observe a 'deadly embrace' type deadlock occurring between mutually connected sockets on the same node. This happens when the one-hour peer supervision timers happen to expire simultaneously in both sockets. The scenario is as follows: CPU 1: CPU 2: -------- -------- tipc_sk_timeout(sk1) tipc_sk_timeout(sk2) lock(sk1.slock) lock(sk2.slock) msg_create(probe) msg_create(probe) unlock(sk1.slock) unlock(sk2.slock) tipc_node_xmit_skb() tipc_node_xmit_skb() tipc_node_xmit() tipc_node_xmit() tipc_sk_rcv(sk2) tipc_sk_rcv(sk1) lock(sk2.slock) lock((sk1.slock) filter_rcv() filter_rcv() tipc_sk_proto_rcv() tipc_sk_proto_rcv() msg_create(probe_rsp) msg_create(probe_rsp) tipc_sk_respond() tipc_sk_respond() tipc_node_xmit_skb() tipc_node_xmit_skb() tipc_node_xmit() tipc_node_xmit() tipc_sk_rcv(sk1) tipc_sk_rcv(sk2) lock((sk1.slock) lock((sk2.slock) ===> DEADLOCK ===> DEADLOCK Further analysis reveals that there are three different locations in the socket code where tipc_sk_respond() is called within the context of the socket lock, with ensuing risk of similar deadlocks. We now solve this by passing a buffer queue along with all upcalls where sk_lock.slock may potentially be held. Response or rejected message buffers are accumulated into this queue instead of being sent out directly, and only sent once we know we are safely outside the slock context. Reported-by: GUNA <gbalasun@gmail.com> Acked-by: Ying Xue <ying.xue@windriver.com> Signed-off-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent abc025d commit 59e0cd1

1 file changed

Lines changed: 42 additions & 12 deletions

File tree

net/tipc/socket.c

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -777,9 +777,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
777777
* @tsk: receiving socket
778778
* @skb: pointer to message buffer.
779779
*/
780-
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
780+
static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
781+
struct sk_buff_head *xmitq)
781782
{
782783
struct sock *sk = &tsk->sk;
784+
u32 onode = tsk_own_node(tsk);
783785
struct tipc_msg *hdr = buf_msg(skb);
784786
int mtyp = msg_type(hdr);
785787
int conn_cong;
@@ -792,7 +794,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
792794

793795
if (mtyp == CONN_PROBE) {
794796
msg_set_type(hdr, CONN_PROBE_REPLY);
795-
tipc_sk_respond(sk, skb, TIPC_OK);
797+
if (tipc_msg_reverse(onode, &skb, TIPC_OK))
798+
__skb_queue_tail(xmitq, skb);
796799
return;
797800
} else if (mtyp == CONN_ACK) {
798801
conn_cong = tsk_conn_cong(tsk);
@@ -1647,7 +1650,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
16471650
*
16481651
* Returns true if message was added to socket receive queue, otherwise false
16491652
*/
1650-
static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
1653+
static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
1654+
struct sk_buff_head *xmitq)
16511655
{
16521656
struct socket *sock = sk->sk_socket;
16531657
struct tipc_sock *tsk = tipc_sk(sk);
@@ -1657,7 +1661,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
16571661
int usr = msg_user(hdr);
16581662

16591663
if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
1660-
tipc_sk_proto_rcv(tsk, skb);
1664+
tipc_sk_proto_rcv(tsk, skb, xmitq);
16611665
return false;
16621666
}
16631667

@@ -1700,7 +1704,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
17001704
return true;
17011705

17021706
reject:
1703-
tipc_sk_respond(sk, skb, err);
1707+
if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
1708+
__skb_queue_tail(xmitq, skb);
17041709
return false;
17051710
}
17061711

@@ -1716,9 +1721,24 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
17161721
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
17171722
{
17181723
unsigned int truesize = skb->truesize;
1724+
struct sk_buff_head xmitq;
1725+
u32 dnode, selector;
17191726

1720-
if (likely(filter_rcv(sk, skb)))
1727+
__skb_queue_head_init(&xmitq);
1728+
1729+
if (likely(filter_rcv(sk, skb, &xmitq))) {
17211730
atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
1731+
return 0;
1732+
}
1733+
1734+
if (skb_queue_empty(&xmitq))
1735+
return 0;
1736+
1737+
/* Send response/rejected message */
1738+
skb = __skb_dequeue(&xmitq);
1739+
dnode = msg_destnode(buf_msg(skb));
1740+
selector = msg_origport(buf_msg(skb));
1741+
tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
17221742
return 0;
17231743
}
17241744

@@ -1732,12 +1752,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
17321752
* Caller must hold socket lock
17331753
*/
17341754
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
1735-
u32 dport)
1755+
u32 dport, struct sk_buff_head *xmitq)
17361756
{
1757+
unsigned long time_limit = jiffies + 2;
1758+
struct sk_buff *skb;
17371759
unsigned int lim;
17381760
atomic_t *dcnt;
1739-
struct sk_buff *skb;
1740-
unsigned long time_limit = jiffies + 2;
1761+
u32 onode;
17411762

17421763
while (skb_queue_len(inputq)) {
17431764
if (unlikely(time_after_eq(jiffies, time_limit)))
@@ -1749,7 +1770,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
17491770

17501771
/* Add message directly to receive queue if possible */
17511772
if (!sock_owned_by_user(sk)) {
1752-
filter_rcv(sk, skb);
1773+
filter_rcv(sk, skb, xmitq);
17531774
continue;
17541775
}
17551776

@@ -1762,7 +1783,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
17621783
continue;
17631784

17641785
/* Overload => reject message back to sender */
1765-
tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD);
1786+
onode = tipc_own_addr(sock_net(sk));
1787+
if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD))
1788+
__skb_queue_tail(xmitq, skb);
17661789
break;
17671790
}
17681791
}
@@ -1775,22 +1798,29 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
17751798
*/
17761799
void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
17771800
{
1801+
struct sk_buff_head xmitq;
17781802
u32 dnode, dport = 0;
17791803
int err;
17801804
struct tipc_sock *tsk;
17811805
struct sock *sk;
17821806
struct sk_buff *skb;
17831807

1808+
__skb_queue_head_init(&xmitq);
17841809
while (skb_queue_len(inputq)) {
17851810
dport = tipc_skb_peek_port(inputq, dport);
17861811
tsk = tipc_sk_lookup(net, dport);
17871812

17881813
if (likely(tsk)) {
17891814
sk = &tsk->sk;
17901815
if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
1791-
tipc_sk_enqueue(inputq, sk, dport);
1816+
tipc_sk_enqueue(inputq, sk, dport, &xmitq);
17921817
spin_unlock_bh(&sk->sk_lock.slock);
17931818
}
1819+
/* Send pending response/rejected messages, if any */
1820+
while ((skb = __skb_dequeue(&xmitq))) {
1821+
dnode = msg_destnode(buf_msg(skb));
1822+
tipc_node_xmit_skb(net, skb, dnode, dport);
1823+
}
17941824
sock_put(sk);
17951825
continue;
17961826
}

0 commit comments

Comments
 (0)