de6f2b59e5
This fixes a subtle bug in the calculation of the inter-packet gap and shows that t_delta, as it is currently used, is not needed. And hence replaced. The algorithm from RFC 3448, 4.6 below continually computes a send time t_nom, which is initialised with the current time t_now; t_gran = 1E6 / HZ specifies the scheduling granularity, s the packet size, and X the sending rate: t_distance = t_nom - t_now; // in microseconds t_delta = min(t_ipi, t_gran) / 2; // `delta' parameter in microseconds if (t_distance >= t_delta) { reschedule after (t_distance / 1000) milliseconds; } else { t_ipi = s / X; // inter-packet interval in usec t_nom += t_ipi; // compute the next send time send packet now; } 1) Description of the bug ------------------------- Rescheduling requires a conversion into milliseconds, due to this call chain: * ccid3_hc_tx_send_packet() returns a timeout in milliseconds, * this value is converted by msecs_to_jiffies() in dccp_write_xmit(), * and finally used as jiffy-expires-value for sk_reset_timer(). The highest jiffy resolution with HZ=1000 is 1 millisecond, so using a higher granularity does not make much sense here. As a consequence, values of t_distance < 1000 are truncated to 0. This issue has so far been resolved by using instead if (t_distance >= t_delta + 1000) reschedule after (t_distance / 1000) milliseconds; The bug is in artificially inflating t_delta to t_delta' = t_delta + 1000. This is unnecessarily large, a more adequate value is t_delta' = max(t_delta, 1000). 2) Consequences of using the corrected t_delta' ----------------------------------------------- Since t_delta <= t_gran/2 = 10^6/(2*HZ), we have t_delta <= 1000 as long as HZ >= 500. This means that t_delta' = max(1000, t_delta) is constant at 1000. On the other hand, when using a coarse HZ value of HZ < 500, we have three sub-cases that can all be reduced to using another constant of t_gran/2. (a) The first case arises when t_ipi > t_gran. Here t_delta' is the constant t_delta' = max(1000, t_gran/2) = t_gran/2. (b) If t_ipi <= 2000 < t_gran = 10^6/HZ usec, then t_delta = t_ipi/2 <= 1000, so that t_delta' = max(1000, t_delta) = 1000 < t_gran/2. (c) If 2000 < t_ipi <= t_gran, we have t_delta' = max(t_delta, 1000) = t_ipi/2. In the second and third cases we have delay values less than t_gran/2, which is in the order of less than or equal to half a jiffy. How these are treated depends on how fractions of a jiffy are handled: they are either always rounded down to 0, or always rounded up to 1 jiffy (assuming non-zero values). In both cases the error is on average in the order of 50%. Thus we are not increasing the error when in the second/third case we replace a value less than t_gran/2 with 0, by setting t_delta' to the constant t_gran/2. 3) Summary ---------- Fixing (1) and considering (2), the patch replaces t_delta with a constant, whose value depends on CONFIG_HZ, changing the above algorithm to: if (t_distance >= t_delta') reschedule after (t_distance / 1000) milliseconds; where t_delta' = 10^6/(2*HZ) if HZ < 500, and t_delta' = 1000 otherwise. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
180 lines
5.9 KiB
C
180 lines
5.9 KiB
C
/*
|
|
* net/dccp/ccids/ccid3.h
|
|
*
|
|
* Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
|
|
* Copyright (c) 2007 The University of Aberdeen, Scotland, UK
|
|
*
|
|
* An implementation of the DCCP protocol
|
|
*
|
|
* This code has been developed by the University of Waikato WAND
|
|
* research group. For further information please see http://www.wand.net.nz/
|
|
* or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
|
|
*
|
|
* This code also uses code from Lulea University, rereleased as GPL by its
|
|
* authors:
|
|
* Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
|
|
*
|
|
* Changes to meet Linux coding standards, to make it meet latest ccid3 draft
|
|
* and to make it work as a loadable module in the DCCP stack written by
|
|
* Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
|
|
*
|
|
* Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*/
|
|
#ifndef _DCCP_CCID3_H_
|
|
#define _DCCP_CCID3_H_
|
|
|
|
#include <linux/ktime.h>
|
|
#include <linux/list.h>
|
|
#include <linux/types.h>
|
|
#include <linux/tfrc.h>
|
|
#include "lib/tfrc.h"
|
|
#include "../ccid.h"
|
|
|
|
/* Two seconds as per RFC 3448 4.2 */
|
|
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
|
|
|
|
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
|
|
#define TFRC_T_MBI 64
|
|
|
|
/*
|
|
* The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
|
|
* rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
|
|
* Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
|
|
* resolution of HZ < 500 means that the error is below one timer tick (t_gran)
|
|
* when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
|
|
*/
|
|
#if (HZ >= 500)
|
|
# define TFRC_T_DELTA USEC_PER_MSEC
|
|
#else
|
|
# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
|
|
#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC.
|
|
#endif
|
|
|
|
enum ccid3_options {
|
|
TFRC_OPT_LOSS_EVENT_RATE = 192,
|
|
TFRC_OPT_LOSS_INTERVALS = 193,
|
|
TFRC_OPT_RECEIVE_RATE = 194,
|
|
};
|
|
|
|
struct ccid3_options_received {
|
|
u64 ccid3or_seqno:48,
|
|
ccid3or_loss_intervals_idx:16;
|
|
u16 ccid3or_loss_intervals_len;
|
|
u32 ccid3or_loss_event_rate;
|
|
u32 ccid3or_receive_rate;
|
|
};
|
|
|
|
/* TFRC sender states */
|
|
enum ccid3_hc_tx_states {
|
|
TFRC_SSTATE_NO_SENT = 1,
|
|
TFRC_SSTATE_NO_FBACK,
|
|
TFRC_SSTATE_FBACK,
|
|
TFRC_SSTATE_TERM,
|
|
};
|
|
|
|
/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
|
|
*
|
|
* @x - Current sending rate in 64 * bytes per second
|
|
* @x_recv - Receive rate in 64 * bytes per second
|
|
* @x_calc - Calculated rate in bytes per second
|
|
* @rtt - Estimate of current round trip time in usecs
|
|
* @p - Current loss event rate (0-1) scaled by 1000000
|
|
* @s - Packet size in bytes
|
|
* @t_rto - Nofeedback Timer setting in usecs
|
|
* @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
|
|
* @state - Sender state, one of %ccid3_hc_tx_states
|
|
* @last_win_count - Last window counter sent
|
|
* @t_last_win_count - Timestamp of earliest packet with
|
|
* last_win_count value sent
|
|
* @no_feedback_timer - Handle to no feedback timer
|
|
* @t_ld - Time last doubled during slow start
|
|
* @t_nom - Nominal send time of next packet
|
|
* @hist - Packet history
|
|
* @options_received - Parsed set of retrieved options
|
|
*/
|
|
struct ccid3_hc_tx_sock {
|
|
u64 x;
|
|
u64 x_recv;
|
|
u32 x_calc;
|
|
u32 rtt;
|
|
u32 p;
|
|
u32 t_rto;
|
|
u32 t_ipi;
|
|
u16 s;
|
|
enum ccid3_hc_tx_states state:8;
|
|
u8 last_win_count;
|
|
ktime_t t_last_win_count;
|
|
struct timer_list no_feedback_timer;
|
|
ktime_t t_ld;
|
|
ktime_t t_nom;
|
|
struct tfrc_tx_hist_entry *hist;
|
|
struct ccid3_options_received options_received;
|
|
};
|
|
|
|
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
|
|
{
|
|
struct ccid3_hc_tx_sock *hctx = ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
|
|
BUG_ON(hctx == NULL);
|
|
return hctx;
|
|
}
|
|
|
|
/* TFRC receiver states */
|
|
enum ccid3_hc_rx_states {
|
|
TFRC_RSTATE_NO_DATA = 1,
|
|
TFRC_RSTATE_DATA,
|
|
TFRC_RSTATE_TERM = 127,
|
|
};
|
|
|
|
/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
|
|
*
|
|
* @x_recv - Receiver estimate of send rate (RFC 3448 4.3)
|
|
* @rtt - Receiver estimate of rtt (non-standard)
|
|
* @p - Current loss event rate (RFC 3448 5.4)
|
|
* @last_counter - Tracks window counter (RFC 4342, 8.1)
|
|
* @state - Receiver state, one of %ccid3_hc_rx_states
|
|
* @bytes_recv - Total sum of DCCP payload bytes
|
|
* @x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
|
|
* @rtt - Receiver estimate of RTT
|
|
* @tstamp_last_feedback - Time at which last feedback was sent
|
|
* @tstamp_last_ack - Time at which last feedback was sent
|
|
* @hist - Packet history (loss detection + RTT sampling)
|
|
* @li_hist - Loss Interval database
|
|
* @s - Received packet size in bytes
|
|
* @p_inverse - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
|
|
*/
|
|
struct ccid3_hc_rx_sock {
|
|
u8 last_counter:4;
|
|
enum ccid3_hc_rx_states state:8;
|
|
u32 bytes_recv;
|
|
u32 x_recv;
|
|
u32 rtt;
|
|
ktime_t tstamp_last_feedback;
|
|
struct tfrc_rx_hist hist;
|
|
struct tfrc_loss_hist li_hist;
|
|
u16 s;
|
|
#define p_inverse li_hist.i_mean
|
|
};
|
|
|
|
static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
|
|
{
|
|
struct ccid3_hc_rx_sock *hcrx = ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
|
|
BUG_ON(hcrx == NULL);
|
|
return hcrx;
|
|
}
|
|
|
|
#endif /* _DCCP_CCID3_H_ */
|