X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=net%2Fdccp%2Finput.c;h=decf2f21149b5bbef51ca126357c1c19e6933442;hb=b84a2189c4e1835c51fd6b974a0497be9bc4ba87;hp=da6ec185ed5b5826e6df5b6d58f7870e26330cb9;hpb=b643b0fdbc59cf6bbb086974b29d2571e9e9f646;p=powerpc.git diff --git a/net/dccp/input.c b/net/dccp/input.c index da6ec185ed..decf2f2114 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -19,6 +19,9 @@ #include "ccid.h" #include "dccp.h" +/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */ +int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8; + static void dccp_fin(struct sock *sk, struct sk_buff *skb) { sk->sk_shutdown |= RCV_SHUTDOWN; @@ -29,16 +32,56 @@ static void dccp_fin(struct sock *sk, struct sk_buff *skb) sk->sk_data_ready(sk, 0); } -static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) +static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb) { - dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); - dccp_fin(sk, skb); - dccp_set_state(sk, DCCP_CLOSED); - sk_wake_async(sk, 1, POLL_HUP); + int queued = 0; + + switch (sk->sk_state) { + /* + * We ignore Close when received in one of the following states: + * - CLOSED (may be a late or duplicate packet) + * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier) + * - RESPOND (already handled by dccp_check_req) + */ + case DCCP_CLOSING: + /* + * Simultaneous-close: receiving a Close after sending one. This + * can happen if both client and server perform active-close and + * will result in an endless ping-pong of crossing and retrans- + * mitted Close packets, which only terminates when one of the + * nodes times out (min. 64 seconds). Quicker convergence can be + * achieved when one of the nodes acts as tie-breaker. + * This is ok as both ends are done with data transfer and each + * end is just waiting for the other to acknowledge termination. + */ + if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) + break; + /* fall through */ + case DCCP_REQUESTING: + case DCCP_ACTIVE_CLOSEREQ: + dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_done(sk); + break; + case DCCP_OPEN: + case DCCP_PARTOPEN: + /* Give waiting application a chance to read pending data */ + queued = 1; + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_PASSIVE_CLOSE); + /* fall through */ + case DCCP_PASSIVE_CLOSE: + /* + * Retransmitted Close: we have already enqueued the first one. + */ + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); + } + return queued; } -static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) +static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) { + int queued = 0; + /* * Step 7: Check for unexpected packet types * If (S.is_server and P.type == CloseReq) @@ -47,12 +90,62 @@ static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) */ if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); - return; + return queued; } - if (sk->sk_state != DCCP_CLOSING) + /* Step 13: process relevant Client states < CLOSEREQ */ + switch (sk->sk_state) { + case DCCP_REQUESTING: + dccp_send_close(sk, 0); dccp_set_state(sk, DCCP_CLOSING); - dccp_send_close(sk, 0); + break; + case DCCP_OPEN: + case DCCP_PARTOPEN: + /* Give waiting application a chance to read pending data */ + queued = 1; + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ); + /* fall through */ + case DCCP_PASSIVE_CLOSEREQ: + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); + } + return queued; +} + +static u8 dccp_reset_code_convert(const u8 code) +{ + const u8 error_code[] = { + [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */ + [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */ + [DCCP_RESET_CODE_ABORTED] = ECONNRESET, + + [DCCP_RESET_CODE_NO_CONNECTION] = ECONNREFUSED, + [DCCP_RESET_CODE_CONNECTION_REFUSED] = ECONNREFUSED, + [DCCP_RESET_CODE_TOO_BUSY] = EUSERS, + [DCCP_RESET_CODE_AGGRESSION_PENALTY] = EDQUOT, + + [DCCP_RESET_CODE_PACKET_ERROR] = ENOMSG, + [DCCP_RESET_CODE_BAD_INIT_COOKIE] = EBADR, + [DCCP_RESET_CODE_BAD_SERVICE_CODE] = EBADRQC, + [DCCP_RESET_CODE_OPTION_ERROR] = EILSEQ, + [DCCP_RESET_CODE_MANDATORY_ERROR] = EOPNOTSUPP, + }; + + return code >= DCCP_MAX_RESET_CODES ? 0 : error_code[code]; +} + +static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) +{ + u8 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code); + + sk->sk_err = err; + + /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + dccp_fin(sk, skb); + + if (err && !sock_flag(sk, SOCK_DEAD)) + sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); } static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) @@ -64,11 +157,27 @@ static void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_ack_seq); } +static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + /* Don't deliver to RX CCID when node has shut down read end. */ + if (!(sk->sk_shutdown & RCV_SHUTDOWN)) + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + /* + * Until the TX queue has been drained, we can not honour SHUT_WR, since + * we need received feedback as input to adjust congestion control. + */ + if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN)) + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); +} + static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); struct dccp_sock *dp = dccp_sk(sk); - u64 lswl, lawl; + u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq, + ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; /* * Step 5: Prepare sequence numbers for Sync @@ -84,11 +193,9 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) */ if (dh->dccph_type == DCCP_PKT_SYNC || dh->dccph_type == DCCP_PKT_SYNCACK) { - if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - dp->dccps_awl, dp->dccps_awh) && - dccp_delta_seqno(dp->dccps_swl, - DCCP_SKB_CB(skb)->dccpd_seq) >= 0) - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(ackno, dp->dccps_awl, dp->dccps_awh) && + dccp_delta_seqno(dp->dccps_swl, seqno) >= 0) + dccp_update_gsr(sk, seqno); else return -1; } @@ -103,9 +210,6 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) * Update S.GSR, S.SWL, S.SWH * If P.type != Sync, * Update S.GAR - * Otherwise, - * Send Sync packet acknowledging P.seqno - * Drop packet and return */ lswl = dp->dccps_swl; lawl = dp->dccps_awl; @@ -113,35 +217,52 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) if (dh->dccph_type == DCCP_PKT_CLOSEREQ || dh->dccph_type == DCCP_PKT_CLOSE || dh->dccph_type == DCCP_PKT_RESET) { - lswl = dp->dccps_gsr; - dccp_inc_seqno(&lswl); + lswl = ADD48(dp->dccps_gsr, 1); lawl = dp->dccps_gar; } - if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && - (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || - between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, - lawl, dp->dccps_awh))) { - dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + if (between48(seqno, lswl, dp->dccps_swh) && + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ || + between48(ackno, lawl, dp->dccps_awh))) { + dccp_update_gsr(sk, seqno); if (dh->dccph_type != DCCP_PKT_SYNC && - (DCCP_SKB_CB(skb)->dccpd_ack_seq != - DCCP_PKT_WITHOUT_ACK_SEQ)) - dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; + (ackno != DCCP_PKT_WITHOUT_ACK_SEQ)) + dp->dccps_gar = ackno; } else { + unsigned long now = jiffies; + /* + * Step 6: Check sequence numbers + * Otherwise, + * If P.type == Reset, + * Send Sync packet acknowledging S.GSR + * Otherwise, + * Send Sync packet acknowledging P.seqno + * Drop packet and return + * + * These Syncs are rate-limited as per RFC 4340, 7.5.4: + * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second. + */ + if (time_before(now, (dp->dccps_rate_last + + sysctl_dccp_sync_ratelimit))) + return 0; + DCCP_WARN("DCCP: Step 6 failed for %s packet, " "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " "sending SYNC...\n", dccp_packet_name(dh->dccph_type), - (unsigned long long) lswl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq, + (unsigned long long) lswl, (unsigned long long) seqno, (unsigned long long) dp->dccps_swh, - (DCCP_SKB_CB(skb)->dccpd_ack_seq == - DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" : "exists", - (unsigned long long) lawl, - (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq, + (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist" + : "exists", + (unsigned long long) lawl, (unsigned long long) ackno, (unsigned long long) dp->dccps_awh); - dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC); + + dp->dccps_rate_last = now; + + if (dh->dccph_type == DCCP_PKT_RESET) + seqno = dp->dccps_gsr; + dccp_send_sync(sk, seqno, DCCP_PKT_SYNC); return -1; } @@ -157,8 +278,9 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, case DCCP_PKT_DATAACK: case DCCP_PKT_DATA: /* - * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED - * option if it is. + * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when + * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening" + * - sk_receive_queue is full, use Code 2, "Receive Buffer" */ __skb_pull(skb, dh->dccph_doff * 4); __skb_queue_tail(&sk->sk_receive_queue, skb); @@ -175,16 +297,17 @@ static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb, * S.state := TIMEWAIT * Set TIMEWAIT timer * Drop packet and return - */ - dccp_fin(sk, skb); - dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + */ + dccp_rcv_reset(sk, skb); return 0; case DCCP_PKT_CLOSEREQ: - dccp_rcv_closereq(sk, skb); + if (dccp_rcv_closereq(sk, skb)) + return 0; goto discard; case DCCP_PKT_CLOSE: - dccp_rcv_close(sk, skb); - return 0; + if (dccp_rcv_close(sk, skb)) + return 0; + goto discard; case DCCP_PKT_REQUEST: /* Step 7 * or (S.is_server and P.type == Response) @@ -249,9 +372,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; - - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + dccp_deliver_input_to_ccids(sk, skb); return __dccp_rcv_established(sk, skb, dh, len); discard: @@ -280,6 +401,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dh->dccph_type == DCCP_PKT_RESPONSE) { const struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); + long tstamp = dccp_timestamp(); /* Stop the REQUEST timer */ inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -300,13 +422,10 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (dccp_parse_options(sk, skb)) goto out_invalid_packet; - /* Obtain RTT sample from SYN exchange (used by CCID 3) */ - if (dp->dccps_options_received.dccpor_timestamp_echo) { - struct timeval now; - - dccp_timestamp(sk, &now); - dp->dccps_syn_rtt = dccp_sample_rtt(sk, &now, NULL); - } + /* Obtain usec RTT sample from SYN exchange (used by CCID 3) */ + if (likely(dp->dccps_options_received.dccpor_timestamp_echo)) + dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp - + dp->dccps_options_received.dccpor_timestamp_echo)); if (dccp_msk(sk)->dccpms_send_ack_vector && dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, @@ -353,7 +472,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } if (sk->sk_write_pending || icsk->icsk_ack.pingpong || @@ -494,8 +613,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, DCCP_ACKVEC_STATE_RECEIVED)) goto discard; - ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + dccp_deliver_input_to_ccids(sk, skb); } /* @@ -507,25 +625,18 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Drop packet and return */ if (dh->dccph_type == DCCP_PKT_RESET) { - /* - * Queue the equivalent of TCP fin so that dccp_recvmsg - * exits the loop - */ - dccp_fin(sk, skb); - dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + dccp_rcv_reset(sk, skb); return 0; /* * Step 7: Check for unexpected packet types - * If (S.is_server and P.type == CloseReq) - * or (S.is_server and P.type == Response) + * If (S.is_server and P.type == Response) * or (S.is_client and P.type == Request) * or (S.state == RESPOND and P.type == Data), * Send Sync packet acknowledging P.seqno * Drop packet and return */ } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && - (dh->dccph_type == DCCP_PKT_RESPONSE || - dh->dccph_type == DCCP_PKT_CLOSEREQ)) || + dh->dccph_type == DCCP_PKT_RESPONSE) || (dp->dccps_role == DCCP_ROLE_CLIENT && dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && @@ -533,15 +644,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { - dccp_rcv_closereq(sk, skb); + if (dccp_rcv_closereq(sk, skb)) + return 0; goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSE) { - dccp_rcv_close(sk, skb); - return 0; - } - - if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); + if (dccp_rcv_close(sk, skb)) + return 0; goto discard; } @@ -572,9 +680,12 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); break; } + } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { + dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); + goto discard; } if (!queued) { @@ -587,37 +698,22 @@ discard: EXPORT_SYMBOL_GPL(dccp_rcv_state_process); /** - * dccp_sample_rtt - Sample RTT from packet exchange - * - * @sk: connected dccp_sock - * @t_recv: receive timestamp of packet with timestamp echo - * @t_hist: packet history timestamp or NULL + * dccp_sample_rtt - Validate and finalise computation of RTT sample + * @delta: number of microseconds between packet and acknowledgment + * The routine is kept generic to work in different contexts. It should be + * called immediately when the ACK used for the RTT sample arrives. */ -u32 dccp_sample_rtt(struct sock *sk, struct timeval *t_recv, - struct timeval *t_hist) +u32 dccp_sample_rtt(struct sock *sk, long delta) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_options_received *or = &dp->dccps_options_received; - suseconds_t delta; - - if (t_hist == NULL) { - if (!or->dccpor_timestamp_echo) { - DCCP_WARN("packet without timestamp echo\n"); - return DCCP_SANE_RTT_MAX; - } - timeval_sub_usecs(t_recv, or->dccpor_timestamp_echo * 10); - delta = timeval_usecs(t_recv); - } else - delta = timeval_delta(t_recv, t_hist); - - delta -= or->dccpor_elapsed_time * 10; /* either set or 0 */ + /* dccpor_elapsed_time is either zeroed out or set and > 0 */ + delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10; if (unlikely(delta <= 0)) { - DCCP_WARN("unusable RTT sample %ld, using min\n", (long)delta); + DCCP_WARN("unusable RTT sample %ld, using min\n", delta); return DCCP_SANE_RTT_MIN; } - if (unlikely(delta - (suseconds_t)DCCP_SANE_RTT_MAX > 0)) { - DCCP_WARN("RTT sample %ld too large, using max\n", (long)delta); + if (unlikely(delta > DCCP_SANE_RTT_MAX)) { + DCCP_WARN("RTT sample %ld too large, using max\n", delta); return DCCP_SANE_RTT_MAX; }