On 19.11.2013 14:48, Peter Lieven
wrote:
On 18.11.2013 17:27, Anthony Liguori
wrote:
On Nov 18, 2013 12:20 AM, "Peter Lieven" <address@hidden>
wrote:
>
> vnc_update_client currently scans the dirty bitmap of
each client
> bitwise which is a very costly operation if only few bits
are dirty.
> vnc_refresh_server_surface does almost the same.
> this patch optimizes both by utilizing the heavily
optimized
> function find_next_bit to find the offset of the next
dirty
> bit in the dirty bitmaps.
>
> Signed-off-by: Peter Lieven <address@hidden>
Can you include performance data?
I made some aritificial analysis of vnc_update_client with the
attached test code.
$ gcc -O2 -o vnc_perf vnc_perf.c
$ ./vnc_perf
All bits clean - vnc_update_client_new: 0.07 secs
vnc_update_client_old: 10.82 secs
All bits dirty - vnc_update_client_new: 9.81 secs
vnc_update_client_old: 20.00 secs
Few bits dirty - vnc_update_client_new: 0.08 secs
vnc_update_client_old: 10.62 secs
find_and_clear_dirty_height() is still very slow, but I will look
at this separately.
quite easy, but great effect:
replacing:
for (tmp_x = last_x; tmp_x < x; tmp_x++) {
clear_bit(tmp_x, dirty[y + h]);
}
with:
bitmap_clear(dirty[y + h], last_x, x - last_x);
in find_and_clear_dirty_height(), yields the following performance
;-)
All bits clean - vnc_update_client_new: 0.07 secs
vnc_update_client_old: 10.65 secs
All bits dirty - vnc_update_client_new: 0.69 secs
vnc_update_client_old: 19.86 secs
Few bits dirty - vnc_update_client_new: 0.07 secs
vnc_update_client_old: 10.69 secs
Peter
---
#define ITERATIONS 16*1024
#define VNC_MAX_WIDTH 2560
#define VNC_MAX_HEIGHT 2048
#define VNC_DIRTY_PIXELS_PER_BIT 16
/* VNC_DIRTY_BITS is the number of bits in the dirty bitmap. */
#define VNC_DIRTY_BITS (VNC_MAX_WIDTH / VNC_DIRTY_PIXELS_PER_BIT)
/* VNC_DIRTY_BITS_PER_LINE might be greater than VNC_DIRTY_BITS
due to alignment */
#define VNC_DIRTY_BITS_PER_LINE(x) (sizeof(x) / VNC_MAX_HEIGHT *
BITS_PER_BYTE)
#define BITS_PER_BYTE 8
#define BITS_PER_LONG (sizeof (unsigned long) *
BITS_PER_BYTE)
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
#define BIT(nr) (1UL << (nr))
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE *
sizeof(long))
#define DECLARE_BITMAP(name,bits) \
unsigned long name[BITS_TO_LONGS(bits)]
#define ctzl ctz64
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include "time.h"
static inline int ctz64(uint64_t val)
{
return val ? __builtin_ctzll(val) : 64;
}
DECLARE_BITMAP(dirty[VNC_MAX_HEIGHT], VNC_DIRTY_BITS);
unsigned long find_next_bit(const unsigned long *addr, unsigned
long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size) {
return size;
}
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG) {
goto found_first;
}
if (tmp) {
goto found_middle;
}
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size >= 4*BITS_PER_LONG) {
unsigned long d1, d2, d3;
tmp = *p;
d1 = *(p+1);
d2 = *(p+2);
d3 = *(p+3);
if (tmp) {
goto found_middle;
}
if (d1 | d2 | d3) {
break;
}
p += 4;
result += 4*BITS_PER_LONG;
size -= 4*BITS_PER_LONG;
}
while (size >= BITS_PER_LONG) {
if ((tmp = *(p++))) {
goto found_middle;
}
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size) {
return result;
}
tmp = *p;
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) { /* Are any bits set? */
return result + size; /* Nope. */
}
found_middle:
return result + ctzl(tmp);
}
static inline int test_bit(int nr, const unsigned long *addr)
{
return 1UL & (addr[BIT_WORD(nr)] >> (nr &
(BITS_PER_LONG-1)));
}
static inline void clear_bit(int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = addr + BIT_WORD(nr);
*p &= ~mask;
}
static inline void set_bit(int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = addr + BIT_WORD(nr);
*p |= mask;
}
static inline int test_and_clear_bit(int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = addr + BIT_WORD(nr);
unsigned long old = *p;
*p = old & ~mask;
return (old & mask) != 0;
}
static int find_and_clear_dirty_height(int y, int last_x, int x,
int height)
{
int h;
for (h = 1; h < (height - y); h++) {
int tmp_x;
if (!test_bit(last_x, dirty[y + h])) {
break;
}
for (tmp_x = last_x; tmp_x < x; tmp_x++) {
clear_bit(tmp_x, dirty[y + h]);
}
}
return h;
}
#define VNC_CLIENT_UPDATE_RECT() \
if (last_x != -1) {\
int h = find_and_clear_dirty_height(y, last_x, x,
height);\
}
void vnc_update_client_new() {
int width = VNC_MAX_WIDTH;
int height = VNC_MAX_HEIGHT;
int y = 0;
for (;;) {
int x;
int last_x = -1;
unsigned long offset = find_next_bit((unsigned long *)
&dirty,
height *
VNC_DIRTY_BITS_PER_LINE(dirty),
y *
VNC_DIRTY_BITS_PER_LINE(dirty));
if (offset == height * VNC_DIRTY_BITS_PER_LINE(dirty))
{
/* no more dirty bits */
break;
}
y = offset / VNC_DIRTY_BITS_PER_LINE(dirty);
for (x = offset % VNC_DIRTY_BITS_PER_LINE(dirty);
x < width / VNC_DIRTY_PIXELS_PER_BIT; x++) {
if (test_and_clear_bit(x, dirty[y])) {
if (last_x == -1) {
last_x = x;
}
} else {
VNC_CLIENT_UPDATE_RECT();
last_x = -1;
}
}
VNC_CLIENT_UPDATE_RECT();
y++;
}}
void vnc_update_client_old() {
int width = VNC_MAX_WIDTH;
int height = VNC_MAX_HEIGHT;
int y;
for (y = 0; y < height; y++) {
int x;
int last_x = -1;
for (x = 0; x < width / 16; x++) {
if (test_and_clear_bit(x, dirty[y])) {
if (last_x == -1) {
last_x = x;
}
} else {
VNC_CLIENT_UPDATE_RECT();
last_x = -1;
}
}
VNC_CLIENT_UPDATE_RECT();
}
}
void main() {
int i;
clock_t start, end;
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0x00, sizeof(dirty));
vnc_update_client_new();
}
end = clock();
printf("All bits clean - vnc_update_client_new: %.2f secs\n",
(double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0x00, sizeof(dirty));
vnc_update_client_old();
}
end = clock();
printf(" vnc_update_client_old: %.2f
secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0xff, sizeof(dirty));
vnc_update_client_new();
}
end = clock();
printf("All bits dirty - vnc_update_client_new: %.2f secs\n",
(double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
memset(dirty, 0xff, sizeof(dirty));
vnc_update_client_old();
}
end = clock();
printf(" vnc_update_client_old: %.2f
secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
int y;
memset(dirty, 0x00, sizeof(dirty));
for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8;
y++) {
set_bit(VNC_DIRTY_BITS/2,dirty[y]);
}
vnc_update_client_new();
}
end = clock();
printf("Few bits dirty - vnc_update_client_new: %.2f secs\n",
(double) (end-start)/CLOCKS_PER_SEC);
start = clock();
for (i = 0; i < ITERATIONS; i++) {
int y;
memset(dirty, 0x00, sizeof(dirty));
for (y = VNC_MAX_HEIGHT/2-8; y < VNC_MAX_HEIGHT/2+8;
y++) {
set_bit(VNC_DIRTY_BITS/2,dirty[y]);
}
vnc_update_client_old();
}
end = clock();
printf(" vnc_update_client_old: %.2f
secs\n\n", (double) (end-start)/CLOCKS_PER_SEC);
return;
}
--
Mit freundlichen Grüßen
Peter Lieven
...........................................................
KAMP Netzwerkdienste GmbH
Vestische Str. 89-91 | 46117 Oberhausen
Tel: +49 (0) 208.89 402-50 | Fax: +49 (0) 208.89 402-40
address@hidden | http://www.kamp.de
Geschäftsführer: Heiner Lante | Michael Lante
Amtsgericht Duisburg | HRB Nr. 12154
USt-Id-Nr.: DE 120607556
...........................................................
|