|
From: | Peter Lieven |
Subject: | Re: [Qemu-devel] [PATCH 2/3] ui/vnc: optimize dirty bitmap tracking |
Date: | Tue, 19 Nov 2013 14:48:48 +0100 |
User-agent: | Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.1.0 |
On 18.11.2013 17:27, Anthony Liguori
wrote:
I made some aritificial analysis of vnc_update_client with the attached test code. $ gcc -O2 -o vnc_perf vnc_perf.c $ ./vnc_perf All bits clean - vnc_update_client_new: 0.07 secs vnc_update_client_old: 10.82 secs All bits dirty - vnc_update_client_new: 9.81 secs vnc_update_client_old: 20.00 secs Few bits dirty - vnc_update_client_new: 0.08 secs vnc_update_client_old: 10.62 secs find_and_clear_dirty_height() is still very slow, but I will look at this separately. Peter --- #define ITERATIONS 16*1024 #define VNC_MAX_WIDTH 2560 #define VNC_MAX_HEIGHT 2048 #define VNC_DIRTY_PIXELS_PER_BIT 16 /* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */ #define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT) /* VNC_DIRTY_BITS_PER_LINE might be greater than VNC_DIRTY_BITS due to alignment */ #define VNC_DIRTY_BITS_PER_LINE(x) (sizeof(x) / VNC_MAX_HEIGHT * BITS_PER_BYTE) #define BITS_PER_BYTE 8 #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) #define BIT(nr) (1UL << (nr)) #define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / BITS_PER_LONG) #define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) #define DECLARE_BITMAP(name,bits) \ unsigned long name[BITS_TO_LONGS(bits)] #define ctzl ctz64 #include <stdio.h> #include <stdint.h> #include <string.h> #include "time.h" static inline int ctz64(uint64_t val) { return val ? __builtin_ctzll(val) : 64; } DECLARE_BITMAP(dirty[VNC_MAX_HEIGHT], VNC_DIRTY_BITS); unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); unsigned long tmp; if (offset >= size) { return size; } size -= result; offset %= BITS_PER_LONG; if (offset) { tmp = *(p++); tmp &= (~0UL << offset); if (size < BITS_PER_LONG) { goto found_first; } if (tmp) { goto found_middle; } size -= BITS_PER_LONG; result += BITS_PER_LONG; } while (size >= 4*BITS_PER_LONG) { unsigned long d1, d2, d3; tmp = *p; d1 = *(p+1); d2 = *(p+2); d3 = *(p+3); if (tmp) { goto found_middle; } if (d1 | d2 | d3) { break; } p += 4; result += 4*BITS_PER_LONG; size -= 4*BITS_PER_LONG; } while (size >= BITS_PER_LONG) { if ((tmp = *(p++))) { goto found_middle; } result += BITS_PER_LONG; size -= BITS_PER_LONG; } if (!size) { return result; } tmp = *p; found_first: tmp &= (~0UL >> (BITS_PER_LONG - size)); if (tmp == 0UL) { /* Are any bits set? */ return result + size; /* Nope. */ } found_middle: return result + ctzl(tmp); } static inline int test_bit(int nr, const unsigned long *addr) { return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } static inline void clear_bit(int nr, unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = addr + BIT_WORD(nr); *p &= ~mask; } static inline void set_bit(int nr, unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = addr + BIT_WORD(nr); *p |= mask; } static inline int test_and_clear_bit(int nr, unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = addr + BIT_WORD(nr); unsigned long old = *p; *p = old & ~mask; return (old & mask) != 0; } static int find_and_clear_dirty_height(int y, int last_x, int x, int height) { int h; for (h = 1; h < (height - y); h++) { int tmp_x; if (!test_bit(last_x, dirty[y + h])) { break; } for (tmp_x = last_x; tmp_x < x; tmp_x++) { clear_bit(tmp_x, dirty[y + h]); } } return h; } #define VNC_CLIENT_UPDATE_RECT() \ if (last_x != -1) {\ int h = find_and_clear_dirty_height(y, last_x, x, height);\ } void vnc_update_client_new() { int width = VNC_MAX_WIDTH; int height = VNC_MAX_HEIGHT; int y = 0; for (;;) { int x; int last_x = -1; unsigned long offset = find_next_bit((unsigned long *) &dirty, height * VNC_DIRTY_BITS_PER_LINE(dirty), y * VNC_DIRTY_BITS_PER_LINE(dirty)); if (offset == height * VNC_DIRTY_BITS_PER_LINE(dirty)) { /* no more dirty bits */ break; } y = offset / VNC_DIRTY_BITS_PER_LINE(dirty); for (x = offset % VNC_DIRTY_BITS_PER_LINE(dirty); x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) { if (test_and_clear_bit(x, dirty[y])) { if (last_x == -1) { last_x = x; } } else { VNC_CLIENT_UPDATE_RECT(); last_x = -1; } } VNC_CLIENT_UPDATE_RECT(); y++; }} void vnc_update_client_old() { int width = VNC_MAX_WIDTH; int height = VNC_MAX_HEIGHT; int y; for (y = 0; y < height; y++) { int x; int last_x = -1; for (x = 0; x < width / 16; x++) { if (test_and_clear_bit(x, dirty[y])) { if (last_x == -1) { last_x = x; } } else { VNC_CLIENT_UPDATE_RECT(); last_x = -1; } } VNC_CLIENT_UPDATE_RECT(); } } void main() { int i; clock_t start, end; start = clock(); for (i = 0; i < ITERATIONS; i++) { memset(dirty, 0x00, sizeof(dirty)); vnc_update_client_new(); } end = clock(); printf("All bits clean - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC); start = clock(); for (i = 0; i < ITERATIONS; i++) { memset(dirty, 0x00, sizeof(dirty)); vnc_update_client_old(); } end = clock(); printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC); start = clock(); for (i = 0; i < ITERATIONS; i++) { memset(dirty, 0xff, sizeof(dirty)); vnc_update_client_new(); } end = clock(); printf("All bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC); start = clock(); for (i = 0; i < ITERATIONS; i++) { memset(dirty, 0xff, sizeof(dirty)); vnc_update_client_old(); } end = clock(); printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC); start = clock(); for (i = 0; i < ITERATIONS; i++) { int y; memset(dirty, 0x00, sizeof(dirty)); for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) { set_bit(VNC_DIRTY_BITS/2,dirty[y]); } vnc_update_client_new(); } end = clock(); printf("Few bits dirty - vnc_update_client_new: %.2f secs\n", (double) (end-start)/CLOCKS_PER_SEC); start = clock(); for (i = 0; i < ITERATIONS; i++) { int y; memset(dirty, 0x00, sizeof(dirty)); for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8; y++) { set_bit(VNC_DIRTY_BITS/2,dirty[y]); } vnc_update_client_old(); } end = clock(); printf(" vnc_update_client_old: %.2f secs\n\n", (double) (end-start)/CLOCKS_PER_SEC); return; } |
[Prev in Thread] | Current Thread | [Next in Thread] |