qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC Patch v2 04/10] virtio-net rsc: Detailed IPv4 and


From: Jason Wang
Subject: Re: [Qemu-devel] [RFC Patch v2 04/10] virtio-net rsc: Detailed IPv4 and General TCP data coalescing
Date: Mon, 1 Feb 2016 14:21:05 +0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.5.1


On 02/01/2016 02:13 AM, address@hidden wrote:
> From: Wei Xu <address@hidden>
>
> Since this feature also needs to support IPv6, and there are
> some protocol specific differences difference for IPv4/6 in the header,
> so try to make the interface to be general.
>
> IPv4/6 should set up both the new and old IP/TCP header before invoking
> TCP coalescing, and should also tell the real payload.
>
> The main handler of TCP includes TCP window update, duplicated ACK check
> and the real data coalescing if the new segment passed invalid filter
> and is identified as an expected one.
>
> An expected segment means:
> 1. Segment is within current window and the sequence is the expected one.
> 2. ACK of the segment is in the valid window.
> 3. If the ACK in the segment is a duplicated one, then it must less than 2,
>    this is to notify upper layer TCP starting retransmission due to the spec.
>
> Signed-off-by: Wei Xu <address@hidden>
> ---
>  hw/net/virtio-net.c | 127 
> ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 124 insertions(+), 3 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index cfbac6d..4f77fbe 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -41,6 +41,10 @@
>  
>  #define VIRTIO_HEADER   12    /* Virtio net header size */
>  #define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
> +#define TCP_WINDOW      65535

The name is confusing, how about TCP_MAX_WINDOW_SIZE ?

> +
> +/* IPv4 max payload, 16 bits in the header */
> +#define MAX_IP4_PAYLOAD  (65535 - sizeof(struct ip_header))
>  
>  #define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
>  
> @@ -1670,13 +1674,130 @@ out:
>      return 0;
>  }
>  
> +static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
> +                                 const uint8_t *buf, struct tcp_header 
> *n_tcp,
> +                                 struct tcp_header *o_tcp)
> +{
> +    uint32_t nack, oack;
> +    uint16_t nwin, owin;
> +
> +    nack = htonl(n_tcp->th_ack);
> +    nwin = htons(n_tcp->th_win);
> +    oack = htonl(o_tcp->th_ack);
> +    owin = htons(o_tcp->th_win);
> +
> +    if ((nack - oack) >= TCP_WINDOW) {
> +        return RSC_FINAL;
> +    } else if (nack == oack) {
> +        /* duplicated ack or window probe */
> +        if (nwin == owin) {
> +            /* duplicated ack, add dup ack count due to whql test up to 1 */
> +
> +            if (seg->dup_ack_count == 0) {
> +                seg->dup_ack_count++;
> +                return RSC_COALESCE;
> +            } else {
> +                /* Spec says should send it directly */
> +                return RSC_FINAL;
> +            }
> +        } else {
> +            /* Coalesce window update */

Need we flush this immediately consider it was a window update?

> +            o_tcp->th_win = n_tcp->th_win;
> +            return RSC_COALESCE;
> +        }
> +    } else {

What if nack < oack here?

> +        /* pure ack, update ack */
> +        o_tcp->th_ack = n_tcp->th_ack;
> +        return RSC_COALESCE;
> +    }
> +}
> +
> +static int32_t virtio_net_rsc_coalesce_tcp(NetRscChain *chain, NetRscSeg 
> *seg,
> +               const uint8_t *buf, struct tcp_header *n_tcp, uint16_t 
> n_tcp_len,
> +               uint16_t n_data, struct tcp_header *o_tcp, uint16_t o_tcp_len,
> +               uint16_t o_data, uint16_t *p_ip_len, uint16_t max_data)
> +{
> +    void *data;
> +    uint16_t o_ip_len;
> +    uint32_t nseq, oseq;
> +
> +    o_ip_len = htons(*p_ip_len);
> +    nseq = htonl(n_tcp->th_seq);
> +    oseq = htonl(o_tcp->th_seq);
> +

Need to the tcp header check here. And looks like we need also check more:

- Flags
- Data offset
- URG pointer

> +    /* Ignore packet with more/larger tcp options */
> +    if (n_tcp_len > o_tcp_len) {

What if n_tcp_len < o_tcp_len ?

> +        return RSC_FINAL;
> +    }
> +
> +    /* out of order or retransmitted. */
> +    if ((nseq - oseq) > TCP_WINDOW) {
> +        return RSC_FINAL;
> +    }
> +
> +    data = ((uint8_t *)n_tcp) + n_tcp_len;
> +    if (nseq == oseq) {
> +        if ((0 == o_data) && n_data) {
> +            /* From no payload to payload, normal case, not a dup ack or etc 
> */
> +            goto coalesce;
> +        } else {
> +            return virtio_net_rsc_handle_ack(chain, seg, buf, n_tcp, o_tcp);
> +        }
> +    } else if ((nseq - oseq) != o_data) {
> +        /* Not a consistent packet, out of order */
> +        return RSC_FINAL;
> +    } else {
> +coalesce:
> +        if ((o_ip_len + n_data) > max_data) {
> +            return RSC_FINAL;
> +        }
> +
> +        /* Here comes the right data, the payload lengh in v4/v6 is 
> different,
> +           so use the field value to update */
> +        *p_ip_len = htons(o_ip_len + n_data); /* Update new data len */
> +        o_tcp->th_offset_flags = n_tcp->th_offset_flags; /* Bring 'PUSH' big 
> */

Is it correct? How about URG pointer?

> +        o_tcp->th_ack = n_tcp->th_ack;
> +        o_tcp->th_win = n_tcp->th_win;
> +
> +        memmove(seg->buf + seg->size, data, n_data);
> +        seg->size += n_data;
> +        return RSC_COALESCE;
> +    }
> +}
>  
>  static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
>                         NetRscSeg *seg, const uint8_t *buf, size_t size)
>  {
> -    /* This real part of this function will be introduced in next patch, just
> -    *  return a 'final' to feed the compilation. */
> -    return RSC_FINAL;
> +    uint16_t o_ip_len, n_ip_len;    /* len in ip header field */
> +    uint16_t n_ip_hdrlen, o_ip_hdrlen;  /* ipv4 header len */
> +    uint16_t n_tcp_len, o_tcp_len;  /* tcp header len */
> +    uint16_t o_data, n_data;          /* payload without virtio/eth/ip/tcp */
> +    struct ip_header *n_ip, *o_ip;
> +    struct tcp_header *n_tcp, *o_tcp;
> +
> +    n_ip = (struct ip_header *)(buf + IP_OFFSET);
> +    n_ip_hdrlen = ((0xF & n_ip->ip_ver_len) << 2);
> +    n_ip_len = htons(n_ip->ip_len);
> +    n_tcp = (struct tcp_header *)(((uint8_t *)n_ip) + n_ip_hdrlen);
> +    n_tcp_len = (htons(n_tcp->th_offset_flags) & 0xF000) >> 10;
> +    n_data = n_ip_len - n_ip_hdrlen - n_tcp_len;
> +
> +    o_ip = (struct ip_header *)(seg->buf + IP_OFFSET);
> +    o_ip_hdrlen = ((0xF & o_ip->ip_ver_len) << 2);
> +    o_ip_len = htons(o_ip->ip_len);
> +    o_tcp = (struct tcp_header *)(((uint8_t *)o_ip) + o_ip_hdrlen);
> +    o_tcp_len = (htons(o_tcp->th_offset_flags) & 0xF000) >> 10;
> +    o_data = o_ip_len - o_ip_hdrlen - o_tcp_len;

Any chance to eliminate the above codes duplication into a helper? To
simplify the codes, you may want just store two pointers in seg (one is
ip header another is tcp header).

> +
> +    if ((n_ip->ip_src ^ o_ip->ip_src) || (n_ip->ip_dst ^ o_ip->ip_dst)
> +        || (n_tcp->th_sport ^ o_tcp->th_sport)
> +        || (n_tcp->th_dport ^ o_tcp->th_dport)) {

Lots of other fields need to be checked here:

- header length
- TOS
- Flag
- identification

I think we could not try to merge the packet with if any of the above
fields do not match. Since you split the coalescing function into
different layers, tcp header check should be postponed.

> +        return RSC_NO_MATCH;
> +    }
> +
> +    return virtio_net_rsc_coalesce_tcp(chain, seg, buf,
> +                                    n_tcp, n_tcp_len, n_data, o_tcp, 
> o_tcp_len,
> +                                    o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD);

Since you've passed seg and buf, do we really need such a huge parameter
list? Looks like some of the header parsing has already been done in
virtio_net_rsc_coalesce_tcp().

>  }
>  
>  static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,




reply via email to

[Prev in Thread] Current Thread [Next in Thread]