[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 tc
From: |
Jason Wang |
Subject: |
Re: [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic |
Date: |
Thu, 17 Mar 2016 16:50:58 +0800 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.5.1 |
On 03/15/2016 05:17 PM, address@hidden wrote:
> From: Wei Xu <address@hidden>
>
> Most things like ipv4 except there is a significant difference between ipv4
> and ipv6, the fragment lenght in ipv4 header includes itself, while it's not
> included for ipv6, thus means ipv6 can carry a real '65535' unit.
>
> Signed-off-by: Wei Xu <address@hidden>
> ---
> hw/net/virtio-net.c | 146
> ++++++++++++++++++++++++++++++++++++++++-----
> include/hw/virtio/virtio.h | 5 +-
> 2 files changed, 135 insertions(+), 16 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index c23b45f..ef61b74 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -52,9 +52,14 @@
> #define MAX_IP4_PAYLOAD (65535 - IP4_HDR_SZ)
> #define MAX_TCP_PAYLOAD 65535
>
> -/* max payload with virtio header */
> +#define IP6_HDR_SZ (sizeof(struct ip6_header))
> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
> +#define IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
> +#define MAX_IP6_PAYLOAD MAX_TCP_PAYLOAD
> +
> +/* ip6 max payload, payload in ipv6 don't include the header */
> #define MAX_VIRTIO_PAYLOAD (sizeof(struct virtio_net_hdr_mrg_rxbuf) \
> - + ETH_HDR_SZ + MAX_TCP_PAYLOAD)
> + + ETH_IP6_HDR_SZ + MAX_IP6_PAYLOAD)
>
> #define IP4_HEADER_LEN 5 /* header lenght value in ip header without option
> */
>
> @@ -1722,14 +1727,27 @@ static void virtio_net_rsc_extract_unit4(NetRscChain
> *chain,
> {
> uint16_t ip_hdrlen;
>
> - unit->ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ);
> - ip_hdrlen = ((0xF & unit->ip->ip_ver_len) << 2);
> - unit->ip_plen = &unit->ip->ip_len;
> - unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
> + unit->u_ip.ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ);
> + ip_hdrlen = ((0xF & unit->u_ip.ip->ip_ver_len) << 2);
> + unit->ip_plen = &unit->u_ip.ip->ip_len;
> + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip) +
> ip_hdrlen);
> unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
> unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
> }
>
> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
> + const uint8_t *buf, NetRscUnit*
> unit)
> +{
> + unit->u_ip.ip6 = (struct ip6_header *)(buf + chain->hdr_size +
> ETH_HDR_SZ);
The u_ip seems a little bit redundant. How about use a simple void * and
cast it to ipv4/ipv6 in proto specific callbacks?
The introducing of u_ip leads unnecessary ipv4 codes changes for ipv6
coalescing implementation.
> + unit->ip_plen = &(unit->u_ip.ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
> + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip6)\
> + + IP6_HDR_SZ);
> + unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
> + /* There is a difference between payload lenght in ipv4 and v6,
> + ip header is excluded in ipv6 */
> + unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
> +}
> +
> static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip)
> {
> uint32_t sum;
> @@ -1743,7 +1761,10 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain
> *chain, NetRscSeg *seg)
> {
> int ret;
>
> - virtio_net_rsc_ipv4_checksum(seg->unit.ip);
> + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
> + virtio_net_rsc_ipv4_checksum(seg->unit.u_ip.ip);
> + }
> +
> ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
> QTAILQ_REMOVE(&chain->buffers, seg, next);
> g_free(seg->buf);
> @@ -1807,7 +1828,11 @@ static void virtio_net_rsc_cache_buf(NetRscChain
> *chain, NetClientState *nc,
> QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
> chain->stat.cache++;
>
> - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
> + if (chain->proto == ETH_P_IP) {
> + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
> + } else {
A switch and a g_assert_not_reached() is better than this.
> + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
> + }
> }
>
> static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
> @@ -1930,8 +1955,8 @@ coalesce:
> static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
> const uint8_t *buf, size_t size, NetRscUnit *unit)
> {
> - if ((unit->ip->ip_src ^ seg->unit.ip->ip_src)
> - || (unit->ip->ip_dst ^ seg->unit.ip->ip_dst)
> + if ((unit->u_ip.ip->ip_src ^ seg->unit.u_ip.ip->ip_src)
> + || (unit->u_ip.ip->ip_dst ^ seg->unit.u_ip.ip->ip_dst)
> || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
> || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
> chain->stat.no_match++;
> @@ -1941,6 +1966,22 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain
> *chain, NetRscSeg *seg,
> return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
> }
>
> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg,
> + const uint8_t *buf, size_t size, NetRscUnit *unit)
> +{
> + if (memcmp(&unit->u_ip.ip6->ip6_src, &seg->unit.u_ip.ip6->ip6_src,
> + sizeof(struct in6_address))
> + || memcmp(&unit->u_ip.ip6->ip6_dst, &seg->unit.u_ip.ip6->ip6_dst,
> + sizeof(struct in6_address))
> + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
> + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
> + chain->stat.no_match++;
> + return RSC_NO_MATCH;
> + }
> +
> + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
> +}
> +
> /* Pakcets with 'SYN' should bypass, other flag should be sent after drain
> * to prevent out of order */
> static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
> @@ -1983,7 +2024,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain
> *chain, NetClientState *nc,
> NetRscSeg *seg, *nseg;
>
> QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
> - ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
> + if (chain->proto == ETH_P_IP) {
> + ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
> + } else {
> + ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
> + }
>
> if (ret == RSC_FINAL) {
> if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
> @@ -2082,7 +2127,8 @@ static size_t virtio_net_rsc_receive4(void *opq,
> NetClientState* nc,
>
> chain = (NetRscChain *)opq;
> virtio_net_rsc_extract_unit4(chain, buf, &unit);
> - if (RSC_WANT != virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size))
> {
> + if (RSC_WANT != virtio_net_rsc_sanity_check4(chain,
> + unit.u_ip.ip, buf, size)) {
> return virtio_net_do_receive(nc, buf, size);
> }
>
> @@ -2102,13 +2148,74 @@ static size_t virtio_net_rsc_receive4(void *opq,
> NetClientState* nc,
> return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
> }
>
> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain,
> + struct ip6_header *ip, const uint8_t *buf, size_t
> size)
Indentation is wrong here.
> +{
> + uint16_t ip_len;
> +
> + if (size < (chain->hdr_size + ETH_IP6_HDR_SZ + TCP_HDR_SZ)) {
> + return RSC_BYPASS;
> + }
> +
> + if (((0xF0 & ip->ip6_ctlun.ip6_un1.ip6_un1_flow) >> 4)
> + != IP_HEADER_VERSION_6) {
> + return RSC_BYPASS;
> + }
> +
> + /* Both option and protocol is checked in this */
> + if (ip->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
> + chain->stat.bypass_not_tcp++;
> + return RSC_BYPASS;
> + }
> +
> + /* Sanity check */
The comment is useless.
> + ip_len = htons(ip->ip6_ctlun.ip6_un1.ip6_un1_plen);
> + if (ip_len < TCP_HDR_SZ
> + || ip_len > (size - chain->hdr_size - ETH_IP6_HDR_SZ)) {
> + chain->stat.ip_hacked++;
> + return RSC_BYPASS;
> + }
> +
> + return RSC_WANT;
> +}
> +
> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
> + const uint8_t *buf, size_t size)
> +{
Rather similar to ipv4 version, need to unify the code.
> + int32_t ret;
> + NetRscChain *chain;
> + NetRscUnit unit;
> +
> + chain = (NetRscChain *)opq;
> + virtio_net_rsc_extract_unit6(chain, buf, &unit);
> + if (RSC_WANT != virtio_net_rsc_sanity_check6(chain,
> + unit.u_ip.ip6, buf, size)) {
> + return virtio_net_do_receive(nc, buf, size);
> + }
> +
> + ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
> + if (ret == RSC_BYPASS) {
> + return virtio_net_do_receive(nc, buf, size);
> + } else if (ret == RSC_FINAL) {
> + return virtio_net_rsc_drain_flow(chain, nc, buf, size,
> + ((chain->hdr_size + ETH_HDR_SZ) + 8), IP6_ADDR_SIZE,
> + (chain->hdr_size + ETH_IP6_HDR_SZ), TCP_PORT_SIZE);
> + }
> +
> + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) {
> + return size;
> + }
> +
> + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
> +}
> +
> static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
> NetClientState *nc, uint16_t
> proto)
> {
> NetRscChain *chain;
>
> /* Only handle IPv4/6 */
> - if (proto != (uint16_t)ETH_P_IP) {
> + if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
> return NULL;
> }
>
> @@ -2121,7 +2228,11 @@ static NetRscChain
> *virtio_net_rsc_lookup_chain(VirtIONet * n,
> chain = g_malloc(sizeof(*chain));
> chain->hdr_size = n->guest_hdr_len;
> chain->proto = proto;
> - chain->max_payload = MAX_IP4_PAYLOAD;
> + if (proto == (uint16_t)ETH_P_IP) {
> + chain->max_payload = MAX_IP4_PAYLOAD;
> + } else {
> + chain->max_payload = MAX_IP6_PAYLOAD;
> + }
> chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> virtio_net_rsc_purge, chain);
> memset(&chain->stat, 0, sizeof(chain->stat));
> @@ -2153,7 +2264,12 @@ static ssize_t virtio_net_rsc_receive(NetClientState
> *nc,
> return virtio_net_do_receive(nc, buf, size);
> } else {
> chain->stat.received++;
> - return virtio_net_rsc_receive4(chain, nc, buf, size);
> +
> + if (proto == (uint16_t)ETH_P_IP) {
> + return virtio_net_rsc_receive4(chain, nc, buf, size);
> + } else {
> + return virtio_net_rsc_receive6(chain, nc, buf, size);
> + }
> }
> }
>
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 3b1dfa8..13d20a4 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -170,7 +170,10 @@ typedef struct NetRscStat {
>
> /* Rsc unit general info used to checking if can coalescing */
> typedef struct NetRscUnit {
> - struct ip_header *ip; /* ip header */
> + union {
> + struct ip_header *ip; /* ip header */
> + struct ip6_header *ip6; /* ip6 header */
> + } u_ip;
> uint16_t *ip_plen; /* data len pointer in ip header field */
> struct tcp_header *tcp; /* tcp header */
> uint16_t tcp_hdrlen; /* tcp header len */
- [Qemu-devel] [ Patch 0/2] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest, wexu, 2016/03/15
- [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic, wexu, 2016/03/15
- Re: [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic,
Jason Wang <=
- [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, wexu, 2016/03/15
- Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, Michael S. Tsirkin, 2016/03/15
- Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, Jason Wang, 2016/03/17
- Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, Wei Xu, 2016/03/17
- Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, Jason Wang, 2016/03/17
- Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, Wei Xu, 2016/03/18
- Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, Jason Wang, 2016/03/18
- Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic, Wei Xu, 2016/03/18