6edafaaf6f
If the following packet flow happen, kernel will panic. MathineA MathineB SYN ----------------------> SYN+ACK <---------------------- ACK(bad seq) ----------------------> When a bad seq ACK is received, tcp_v4_md5_do_lookup(skb->sk, ip_hdr(skb)->daddr)) is finally called by tcp_v4_reqsk_send_ack(), but the first parameter(skb->sk) is NULL at that moment, so kernel panic happens. This patch fixes this bug. OOPS output is as following: [ 302.812793] IP: [<c05cfaa6>] tcp_v4_md5_do_lookup+0x12/0x42 [ 302.817075] Oops: 0000 [#1] SMP [ 302.819815] Modules linked in: ipv6 loop dm_multipath rtc_cmos rtc_core rtc_lib pcspkr pcnet32 mii i2c_piix4 parport_pc i2c_core parport ac button ata_piix libata dm_mod mptspi mptscsih mptbase scsi_transport_spi sd_mod scsi_mod crc_t10dif ext3 jbd mbcache uhci_hcd ohci_hcd ehci_hcd [last unloaded: scsi_wait_scan] [ 302.849946] [ 302.851198] Pid: 0, comm: swapper Not tainted (2.6.27-rc1-guijf #5) [ 302.855184] EIP: 0060:[<c05cfaa6>] EFLAGS: 00010296 CPU: 0 [ 302.858296] EIP is at tcp_v4_md5_do_lookup+0x12/0x42 [ 302.861027] EAX: 0000001e EBX: 00000000 ECX: 00000046 EDX: 00000046 [ 302.864867] ESI: ceb69e00 EDI: 1467a8c0 EBP: cf75f180 ESP: c0792e54 [ 302.868333] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 302.871287] Process swapper (pid: 0, ti=c0792000 task=c0712340 task.ti=c0746000) [ 302.875592] Stack: c06f413a 00000000 cf75f180 ceb69e00 00000000 c05d0d86 000016d0 ceac5400 [ 302.883275] c05d28f8 000016d0 ceb69e00 ceb69e20 681bf6e3 00001000 00000000 0a67a8c0 [ 302.890971] ceac5400 c04250a3 c06f413a c0792eb0 c0792edc cf59a620 cf59a620 cf59a634 [ 302.900140] Call Trace: [ 302.902392] [<c05d0d86>] tcp_v4_reqsk_send_ack+0x17/0x35 [ 302.907060] [<c05d28f8>] tcp_check_req+0x156/0x372 [ 302.910082] [<c04250a3>] printk+0x14/0x18 [ 302.912868] [<c05d0aa1>] tcp_v4_do_rcv+0x1d3/0x2bf [ 302.917423] [<c05d26be>] tcp_v4_rcv+0x563/0x5b9 [ 302.920453] [<c05bb20f>] ip_local_deliver_finish+0xe8/0x183 [ 302.923865] [<c05bb10a>] ip_rcv_finish+0x286/0x2a3 [ 302.928569] [<c059e438>] dev_alloc_skb+0x11/0x25 [ 302.931563] [<c05a211f>] netif_receive_skb+0x2d6/0x33a [ 302.934914] [<d0917941>] pcnet32_poll+0x333/0x680 [pcnet32] [ 302.938735] [<c05a3b48>] net_rx_action+0x5c/0xfe [ 302.941792] [<c042856b>] __do_softirq+0x5d/0xc1 [ 302.944788] [<c042850e>] __do_softirq+0x0/0xc1 [ 302.948999] [<c040564b>] do_softirq+0x55/0x88 [ 302.951870] [<c04501b1>] handle_fasteoi_irq+0x0/0xa4 [ 302.954986] [<c04284da>] irq_exit+0x35/0x69 [ 302.959081] [<c0405717>] do_IRQ+0x99/0xae [ 302.961896] [<c040422b>] common_interrupt+0x23/0x28 [ 302.966279] [<c040819d>] default_idle+0x2a/0x3d [ 302.969212] [<c0402552>] cpu_idle+0xb2/0xd2 [ 302.972169] ======================= [ 302.974274] Code: fc ff 84 d2 0f 84 df fd ff ff e9 34 fe ff ff 83 c4 0c 5b 5e 5f 5d c3 90 90 57 89 d7 56 53 89 c3 50 68 3a 41 6f c0 e8 e9 55 e5 ff <8b> 93 9c 04 00 00 58 85 d2 59 74 1e 8b 72 10 31 db 31 c9 85 f6 [ 303.011610] EIP: [<c05cfaa6>] tcp_v4_md5_do_lookup+0x12/0x42 SS:ESP 0068:c0792e54 [ 303.018360] Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com> Signed-off-by: David S. Miller <davem@davemloft.net>
248 lines
6.4 KiB
C
248 lines
6.4 KiB
C
/*
|
|
* NET Generic infrastructure for Network protocols.
|
|
*
|
|
* Definitions for request_sock
|
|
*
|
|
* Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br>
|
|
*
|
|
* From code originally in include/net/tcp.h
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
#ifndef _REQUEST_SOCK_H
|
|
#define _REQUEST_SOCK_H
|
|
|
|
#include <linux/slab.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/types.h>
|
|
#include <linux/bug.h>
|
|
|
|
#include <net/sock.h>
|
|
|
|
struct request_sock;
|
|
struct sk_buff;
|
|
struct dst_entry;
|
|
struct proto;
|
|
|
|
struct request_sock_ops {
|
|
int family;
|
|
int obj_size;
|
|
struct kmem_cache *slab;
|
|
int (*rtx_syn_ack)(struct sock *sk,
|
|
struct request_sock *req);
|
|
void (*send_ack)(struct sock *sk, struct sk_buff *skb,
|
|
struct request_sock *req);
|
|
void (*send_reset)(struct sock *sk,
|
|
struct sk_buff *skb);
|
|
void (*destructor)(struct request_sock *req);
|
|
};
|
|
|
|
/* struct request_sock - mini sock to represent a connection request
|
|
*/
|
|
struct request_sock {
|
|
struct request_sock *dl_next; /* Must be first member! */
|
|
u16 mss;
|
|
u8 retrans;
|
|
u8 cookie_ts; /* syncookie: encode tcpopts in timestamp */
|
|
/* The following two fields can be easily recomputed I think -AK */
|
|
u32 window_clamp; /* window clamp at creation time */
|
|
u32 rcv_wnd; /* rcv_wnd offered first time */
|
|
u32 ts_recent;
|
|
unsigned long expires;
|
|
const struct request_sock_ops *rsk_ops;
|
|
struct sock *sk;
|
|
u32 secid;
|
|
u32 peer_secid;
|
|
};
|
|
|
|
static inline struct request_sock *reqsk_alloc(const struct request_sock_ops *ops)
|
|
{
|
|
struct request_sock *req = kmem_cache_alloc(ops->slab, GFP_ATOMIC);
|
|
|
|
if (req != NULL)
|
|
req->rsk_ops = ops;
|
|
|
|
return req;
|
|
}
|
|
|
|
static inline void __reqsk_free(struct request_sock *req)
|
|
{
|
|
kmem_cache_free(req->rsk_ops->slab, req);
|
|
}
|
|
|
|
static inline void reqsk_free(struct request_sock *req)
|
|
{
|
|
req->rsk_ops->destructor(req);
|
|
__reqsk_free(req);
|
|
}
|
|
|
|
extern int sysctl_max_syn_backlog;
|
|
|
|
/** struct listen_sock - listen state
|
|
*
|
|
* @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
|
|
*/
|
|
struct listen_sock {
|
|
u8 max_qlen_log;
|
|
/* 3 bytes hole, try to use */
|
|
int qlen;
|
|
int qlen_young;
|
|
int clock_hand;
|
|
u32 hash_rnd;
|
|
u32 nr_table_entries;
|
|
struct request_sock *syn_table[0];
|
|
};
|
|
|
|
/** struct request_sock_queue - queue of request_socks
|
|
*
|
|
* @rskq_accept_head - FIFO head of established children
|
|
* @rskq_accept_tail - FIFO tail of established children
|
|
* @rskq_defer_accept - User waits for some data after accept()
|
|
* @syn_wait_lock - serializer
|
|
*
|
|
* %syn_wait_lock is necessary only to avoid proc interface having to grab the main
|
|
* lock sock while browsing the listening hash (otherwise it's deadlock prone).
|
|
*
|
|
* This lock is acquired in read mode only from listening_get_next() seq_file
|
|
* op and it's acquired in write mode _only_ from code that is actively
|
|
* changing rskq_accept_head. All readers that are holding the master sock lock
|
|
* don't need to grab this lock in read mode too as rskq_accept_head. writes
|
|
* are always protected from the main sock lock.
|
|
*/
|
|
struct request_sock_queue {
|
|
struct request_sock *rskq_accept_head;
|
|
struct request_sock *rskq_accept_tail;
|
|
rwlock_t syn_wait_lock;
|
|
u8 rskq_defer_accept;
|
|
/* 3 bytes hole, try to pack */
|
|
struct listen_sock *listen_opt;
|
|
};
|
|
|
|
extern int reqsk_queue_alloc(struct request_sock_queue *queue,
|
|
unsigned int nr_table_entries);
|
|
|
|
extern void __reqsk_queue_destroy(struct request_sock_queue *queue);
|
|
extern void reqsk_queue_destroy(struct request_sock_queue *queue);
|
|
|
|
static inline struct request_sock *
|
|
reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
|
|
{
|
|
struct request_sock *req = queue->rskq_accept_head;
|
|
|
|
queue->rskq_accept_head = NULL;
|
|
return req;
|
|
}
|
|
|
|
static inline int reqsk_queue_empty(struct request_sock_queue *queue)
|
|
{
|
|
return queue->rskq_accept_head == NULL;
|
|
}
|
|
|
|
static inline void reqsk_queue_unlink(struct request_sock_queue *queue,
|
|
struct request_sock *req,
|
|
struct request_sock **prev_req)
|
|
{
|
|
write_lock(&queue->syn_wait_lock);
|
|
*prev_req = req->dl_next;
|
|
write_unlock(&queue->syn_wait_lock);
|
|
}
|
|
|
|
static inline void reqsk_queue_add(struct request_sock_queue *queue,
|
|
struct request_sock *req,
|
|
struct sock *parent,
|
|
struct sock *child)
|
|
{
|
|
req->sk = child;
|
|
sk_acceptq_added(parent);
|
|
|
|
if (queue->rskq_accept_head == NULL)
|
|
queue->rskq_accept_head = req;
|
|
else
|
|
queue->rskq_accept_tail->dl_next = req;
|
|
|
|
queue->rskq_accept_tail = req;
|
|
req->dl_next = NULL;
|
|
}
|
|
|
|
static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue)
|
|
{
|
|
struct request_sock *req = queue->rskq_accept_head;
|
|
|
|
WARN_ON(req == NULL);
|
|
|
|
queue->rskq_accept_head = req->dl_next;
|
|
if (queue->rskq_accept_head == NULL)
|
|
queue->rskq_accept_tail = NULL;
|
|
|
|
return req;
|
|
}
|
|
|
|
static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue,
|
|
struct sock *parent)
|
|
{
|
|
struct request_sock *req = reqsk_queue_remove(queue);
|
|
struct sock *child = req->sk;
|
|
|
|
WARN_ON(child == NULL);
|
|
|
|
sk_acceptq_removed(parent);
|
|
__reqsk_free(req);
|
|
return child;
|
|
}
|
|
|
|
static inline int reqsk_queue_removed(struct request_sock_queue *queue,
|
|
struct request_sock *req)
|
|
{
|
|
struct listen_sock *lopt = queue->listen_opt;
|
|
|
|
if (req->retrans == 0)
|
|
--lopt->qlen_young;
|
|
|
|
return --lopt->qlen;
|
|
}
|
|
|
|
static inline int reqsk_queue_added(struct request_sock_queue *queue)
|
|
{
|
|
struct listen_sock *lopt = queue->listen_opt;
|
|
const int prev_qlen = lopt->qlen;
|
|
|
|
lopt->qlen_young++;
|
|
lopt->qlen++;
|
|
return prev_qlen;
|
|
}
|
|
|
|
static inline int reqsk_queue_len(const struct request_sock_queue *queue)
|
|
{
|
|
return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
|
|
}
|
|
|
|
static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
|
|
{
|
|
return queue->listen_opt->qlen_young;
|
|
}
|
|
|
|
static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
|
|
{
|
|
return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
|
|
}
|
|
|
|
static inline void reqsk_queue_hash_req(struct request_sock_queue *queue,
|
|
u32 hash, struct request_sock *req,
|
|
unsigned long timeout)
|
|
{
|
|
struct listen_sock *lopt = queue->listen_opt;
|
|
|
|
req->expires = jiffies + timeout;
|
|
req->retrans = 0;
|
|
req->sk = NULL;
|
|
req->dl_next = lopt->syn_table[hash];
|
|
|
|
write_lock(&queue->syn_wait_lock);
|
|
lopt->syn_table[hash] = req;
|
|
write_unlock(&queue->syn_wait_lock);
|
|
}
|
|
|
|
#endif /* _REQUEST_SOCK_H */
|