qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC Patch v2 04/10] virtio-net rsc: Detailed IPv4 and Gene


From: wexu
Subject: [Qemu-devel] [RFC Patch v2 04/10] virtio-net rsc: Detailed IPv4 and General TCP data coalescing
Date: Mon, 1 Feb 2016 02:13:23 +0800

From: Wei Xu <address@hidden>

Since this feature also needs to support IPv6, and there are
some protocol specific differences difference for IPv4/6 in the header,
so try to make the interface to be general.

IPv4/6 should set up both the new and old IP/TCP header before invoking
TCP coalescing, and should also tell the real payload.

The main handler of TCP includes TCP window update, duplicated ACK check
and the real data coalescing if the new segment passed invalid filter
and is identified as an expected one.

An expected segment means:
1. Segment is within current window and the sequence is the expected one.
2. ACK of the segment is in the valid window.
3. If the ACK in the segment is a duplicated one, then it must less than 2,
   this is to notify upper layer TCP starting retransmission due to the spec.

Signed-off-by: Wei Xu <address@hidden>
---
 hw/net/virtio-net.c | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 124 insertions(+), 3 deletions(-)

diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index cfbac6d..4f77fbe 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -41,6 +41,10 @@
 
 #define VIRTIO_HEADER   12    /* Virtio net header size */
 #define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
+#define TCP_WINDOW      65535
+
+/* IPv4 max payload, 16 bits in the header */
+#define MAX_IP4_PAYLOAD  (65535 - sizeof(struct ip_header))
 
 #define MAX_VIRTIO_IP_PAYLOAD  (65535 + IP_OFFSET)
 
@@ -1670,13 +1674,130 @@ out:
     return 0;
 }
 
+static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
+                                 const uint8_t *buf, struct tcp_header *n_tcp,
+                                 struct tcp_header *o_tcp)
+{
+    uint32_t nack, oack;
+    uint16_t nwin, owin;
+
+    nack = htonl(n_tcp->th_ack);
+    nwin = htons(n_tcp->th_win);
+    oack = htonl(o_tcp->th_ack);
+    owin = htons(o_tcp->th_win);
+
+    if ((nack - oack) >= TCP_WINDOW) {
+        return RSC_FINAL;
+    } else if (nack == oack) {
+        /* duplicated ack or window probe */
+        if (nwin == owin) {
+            /* duplicated ack, add dup ack count due to whql test up to 1 */
+
+            if (seg->dup_ack_count == 0) {
+                seg->dup_ack_count++;
+                return RSC_COALESCE;
+            } else {
+                /* Spec says should send it directly */
+                return RSC_FINAL;
+            }
+        } else {
+            /* Coalesce window update */
+            o_tcp->th_win = n_tcp->th_win;
+            return RSC_COALESCE;
+        }
+    } else {
+        /* pure ack, update ack */
+        o_tcp->th_ack = n_tcp->th_ack;
+        return RSC_COALESCE;
+    }
+}
+
+static int32_t virtio_net_rsc_coalesce_tcp(NetRscChain *chain, NetRscSeg *seg,
+               const uint8_t *buf, struct tcp_header *n_tcp, uint16_t 
n_tcp_len,
+               uint16_t n_data, struct tcp_header *o_tcp, uint16_t o_tcp_len,
+               uint16_t o_data, uint16_t *p_ip_len, uint16_t max_data)
+{
+    void *data;
+    uint16_t o_ip_len;
+    uint32_t nseq, oseq;
+
+    o_ip_len = htons(*p_ip_len);
+    nseq = htonl(n_tcp->th_seq);
+    oseq = htonl(o_tcp->th_seq);
+
+    /* Ignore packet with more/larger tcp options */
+    if (n_tcp_len > o_tcp_len) {
+        return RSC_FINAL;
+    }
+
+    /* out of order or retransmitted. */
+    if ((nseq - oseq) > TCP_WINDOW) {
+        return RSC_FINAL;
+    }
+
+    data = ((uint8_t *)n_tcp) + n_tcp_len;
+    if (nseq == oseq) {
+        if ((0 == o_data) && n_data) {
+            /* From no payload to payload, normal case, not a dup ack or etc */
+            goto coalesce;
+        } else {
+            return virtio_net_rsc_handle_ack(chain, seg, buf, n_tcp, o_tcp);
+        }
+    } else if ((nseq - oseq) != o_data) {
+        /* Not a consistent packet, out of order */
+        return RSC_FINAL;
+    } else {
+coalesce:
+        if ((o_ip_len + n_data) > max_data) {
+            return RSC_FINAL;
+        }
+
+        /* Here comes the right data, the payload lengh in v4/v6 is different,
+           so use the field value to update */
+        *p_ip_len = htons(o_ip_len + n_data); /* Update new data len */
+        o_tcp->th_offset_flags = n_tcp->th_offset_flags; /* Bring 'PUSH' big */
+        o_tcp->th_ack = n_tcp->th_ack;
+        o_tcp->th_win = n_tcp->th_win;
+
+        memmove(seg->buf + seg->size, data, n_data);
+        seg->size += n_data;
+        return RSC_COALESCE;
+    }
+}
 
 static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
                        NetRscSeg *seg, const uint8_t *buf, size_t size)
 {
-    /* This real part of this function will be introduced in next patch, just
-    *  return a 'final' to feed the compilation. */
-    return RSC_FINAL;
+    uint16_t o_ip_len, n_ip_len;    /* len in ip header field */
+    uint16_t n_ip_hdrlen, o_ip_hdrlen;  /* ipv4 header len */
+    uint16_t n_tcp_len, o_tcp_len;  /* tcp header len */
+    uint16_t o_data, n_data;          /* payload without virtio/eth/ip/tcp */
+    struct ip_header *n_ip, *o_ip;
+    struct tcp_header *n_tcp, *o_tcp;
+
+    n_ip = (struct ip_header *)(buf + IP_OFFSET);
+    n_ip_hdrlen = ((0xF & n_ip->ip_ver_len) << 2);
+    n_ip_len = htons(n_ip->ip_len);
+    n_tcp = (struct tcp_header *)(((uint8_t *)n_ip) + n_ip_hdrlen);
+    n_tcp_len = (htons(n_tcp->th_offset_flags) & 0xF000) >> 10;
+    n_data = n_ip_len - n_ip_hdrlen - n_tcp_len;
+
+    o_ip = (struct ip_header *)(seg->buf + IP_OFFSET);
+    o_ip_hdrlen = ((0xF & o_ip->ip_ver_len) << 2);
+    o_ip_len = htons(o_ip->ip_len);
+    o_tcp = (struct tcp_header *)(((uint8_t *)o_ip) + o_ip_hdrlen);
+    o_tcp_len = (htons(o_tcp->th_offset_flags) & 0xF000) >> 10;
+    o_data = o_ip_len - o_ip_hdrlen - o_tcp_len;
+
+    if ((n_ip->ip_src ^ o_ip->ip_src) || (n_ip->ip_dst ^ o_ip->ip_dst)
+        || (n_tcp->th_sport ^ o_tcp->th_sport)
+        || (n_tcp->th_dport ^ o_tcp->th_dport)) {
+        return RSC_NO_MATCH;
+    }
+
+    return virtio_net_rsc_coalesce_tcp(chain, seg, buf,
+                                    n_tcp, n_tcp_len, n_data, o_tcp, o_tcp_len,
+                                    o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD);
 }
 
 static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
-- 
2.4.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]