qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] console: make QMP screendump use coroutine


From: Markus Armbruster
Subject: Re: [PATCH] console: make QMP screendump use coroutine
Date: Thu, 20 Feb 2020 17:01:17 +0100
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/26.3 (gnu/linux)

Cc: David for questions regarding the HMP core.  David, please look for
"Is HMP blocking the main loop a problem?"

Marc-André Lureau <address@hidden> writes:

> Hi
>
> On Thu, Feb 20, 2020 at 8:49 AM Markus Armbruster <address@hidden> wrote:
>>
>> Marc-André Lureau <address@hidden> writes:
>>
>> > Thanks to the QMP coroutine support, the screendump handler can
>> > trigger a graphic_hw_update(), yield and let the main loop run until
>> > update is done. Then the handler is resumed, and the ppm_save() will
>> > write the screen image to disk in the coroutine context (thus
>> > non-blocking).
>> >
>> > For now, HMP doesn't have coroutine support, so it remains potentially
>> > outdated or glitched.
>> >
>> > Fixes:
>> > https://bugzilla.redhat.com/show_bug.cgi?id=1230527
>> >
>> > Based-on: <address@hidden>
>> >
>> > Cc: Kevin Wolf <address@hidden>
>> > Signed-off-by: Marc-André Lureau <address@hidden>
>> > ---
>> >  qapi/ui.json    |  3 ++-
>> >  ui/console.c    | 35 +++++++++++++++++++++++++++--------
>> >  ui/trace-events |  2 +-
>> >  3 files changed, 30 insertions(+), 10 deletions(-)
>> >
>> > diff --git a/qapi/ui.json b/qapi/ui.json
>> > index e04525d8b4..d941202f34 100644
>> > --- a/qapi/ui.json
>> > +++ b/qapi/ui.json
>> > @@ -96,7 +96,8 @@
>> >  #
>> >  ##
>> >  { 'command': 'screendump',
>> > -  'data': {'filename': 'str', '*device': 'str', '*head': 'int'} }
>> > +  'data': {'filename': 'str', '*device': 'str', '*head': 'int'},
>> > +  'coroutine': true }
>> >
>> >  ##
>> >  # == Spice
>> > diff --git a/ui/console.c b/ui/console.c
>> > index ac79d679f5..db184b473f 100644
>> > --- a/ui/console.c
>> > +++ b/ui/console.c
>> > @@ -167,6 +167,7 @@ struct QemuConsole {
>> >      QEMUFIFO out_fifo;
>> >      uint8_t out_fifo_buf[16];
>> >      QEMUTimer *kbd_timer;
>> > +    Coroutine *screendump_co;
>> >
>> >      QTAILQ_ENTRY(QemuConsole) next;
>> >  };
>> > @@ -194,7 +195,6 @@ static void dpy_refresh(DisplayState *s);
>> >  static DisplayState *get_alloc_displaystate(void);
>> >  static void text_console_update_cursor_timer(void);
>> >  static void text_console_update_cursor(void *opaque);
>> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp);
>> >
>> >  static void gui_update(void *opaque)
>> >  {
>> > @@ -263,6 +263,9 @@ static void gui_setup_refresh(DisplayState *ds)
>> >
>> >  void graphic_hw_update_done(QemuConsole *con)
>> >  {
>> > +    if (con && con->screendump_co) {
>>
>> How can !con happen?
>
> I don't think it can happen anymore (the patch evolved over several
> years, this is probably a left-over). In any case, it doesn't hurt.

I hate such dead checks, because they make me assume they can actually
happen.  Incorrect assumptions breed bugs.

But I'm willing to defer to the maintainer here.  Gerd?

>> > +        aio_co_wake(con->screendump_co);
>> > +    }
>> >  }
>> >
>> >  void graphic_hw_update(QemuConsole *con)
>> > @@ -310,16 +313,16 @@ void graphic_hw_invalidate(QemuConsole *con)
>> >      }
>> >  }
>> >
>> > -static bool ppm_save(int fd, DisplaySurface *ds, Error **errp)
>> > +static bool ppm_save(int fd, pixman_image_t *image, Error **errp)
>> >  {
>> > -    int width = pixman_image_get_width(ds->image);
>> > -    int height = pixman_image_get_height(ds->image);
>> > +    int width = pixman_image_get_width(image);
>> > +    int height = pixman_image_get_height(image);
>> >      g_autoptr(Object) ioc = OBJECT(qio_channel_file_new_fd(fd));
>> >      g_autofree char *header = NULL;
>> >      g_autoptr(pixman_image_t) linebuf = NULL;
>> >      int y;
>> >
>> > -    trace_ppm_save(fd, ds);
>> > +    trace_ppm_save(fd, image);
>> >
>> >      header = g_strdup_printf("P6\n%d %d\n%d\n", width, height, 255);
>> >      if (qio_channel_write_all(QIO_CHANNEL(ioc),
>> > @@ -329,7 +332,7 @@ static bool ppm_save(int fd, DisplaySurface *ds, Error 
>> > **errp)
>> >
>> >      linebuf = qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width);
>> >      for (y = 0; y < height; y++) {
>> > -        qemu_pixman_linebuf_fill(linebuf, ds->image, width, 0, y);
>> > +        qemu_pixman_linebuf_fill(linebuf, image, width, 0, y);
>> >          if (qio_channel_write_all(QIO_CHANNEL(ioc),
>> >                                    (char *)pixman_image_get_data(linebuf),
>> >                                    pixman_image_get_stride(linebuf), errp) 
>> > < 0) {
>>
>> Looks like an unrelated optimization / simplification.  If I was
>> maintainer, I'd ask for a separate patch.
>
> I can be split, but it's related. We should pass a reference to
> pixman_image_t, rather than a pointer to DisplaySurface, as the
> underlying image may change over time, and would result in corrupted
> coroutine save or worse.

Work that into your commit message, please.  Might be easier if you
split, but that's between you and the maintainer :)

>> > @@ -340,11 +343,18 @@ static bool ppm_save(int fd, DisplaySurface *ds, 
>> > Error **errp)
>> >      return true;
>> >  }
>> >
>> > +static void graphic_hw_update_bh(void *con)
>> > +{
>> > +    graphic_hw_update(con);
>> > +}
>> > +
>> > +/* may be called in coroutine context or not */
>>
>> Hmm.
>>
>> Even though the QMP core always calls in coroutine context, the comment
>> is correct: hmp_screendump() calls it outside coroutine context.
>> Because of that...
>>
>> >  void qmp_screendump(const char *filename, bool has_device, const char 
>> > *device,
>> >                      bool has_head, int64_t head, Error **errp)
>> >  {
>> >      QemuConsole *con;
>> >      DisplaySurface *surface;
>> > +    g_autoptr(pixman_image_t) image = NULL;
>> >      int fd;
>> >
>> >      if (has_device) {
>> > @@ -365,7 +375,15 @@ void qmp_screendump(const char *filename, bool 
>> > has_device, const char *device,
>> >          }
>> >      }
>> >
>> > -    graphic_hw_update(con);
>> > +    if (qemu_in_coroutine()) {
>> > +        assert(!con->screendump_co);
>>
>> What if multiple QMP monitors simultaneously screendump?  Hmm, it works
>> because all execute one after another in the same coroutine
>> qmp_dispatcher_co.  Implicit mutual exclusion.
>>
>> Executing them one after another is bad, because it lets an ill-behaved
>> QMP command starve *all* QMP monitors.  We do it only out of
>> (reasonable!) fear of implicit mutual exclusion requirements like the
>> one you add.
>>
>> Let's not add more if we can help it.
>
> The situation is not worse than the current blocking handling.

Really?

What makes executing multiple qmp_screendump() concurrently (in separate
threads) or interleaved (in separate coroutines in the same thread)
unsafe before this patch?

>> Your screendump_co is per QemuConsole instead of per QMP monitor only
>> because you need to find the coroutine in graphic_hw_update_done().  Can
>> we somehow pass it via function arguments?
>
> I think it could be done later, so I suggest a TODO.

We should avoid making our dependence on implicit mutual exclusion
worse.  When we do it anyway, a big, fat, ugly comment is definitely
called for.

>> In case avoiding the mutual exclusion is impractical: please explain it
>> in a comment to make it somewhat less implicit.

It is anything but: see appended patch.

>> > +        con->screendump_co = qemu_coroutine_self();
>> > +        aio_bh_schedule_oneshot(qemu_get_aio_context(),
>> > +                                graphic_hw_update_bh, con);
>> > +        qemu_coroutine_yield();
>> > +        con->screendump_co = NULL;
>> > +    }
>> > +
>>
>> ... the command handler needs extra code to cope with either.  Is this
>> really what we want for coroutine QMP command handlers?  We'll acquire
>> more of them, and I'd hate to make each one run both in and outside
>> coroutine context.  Shouldn't we let the HMP core take care of this?  Or
>> at least have some common infrastructure these handlers can use?
>
> We have several functions that have this dual support, for ex QIO.
>
> Changing both QMP & HMP commands to run in coroutine is likely
> additional work that we may not care at this point.

If it wasn't for non-QMP calls (typically HMP, but also CLI), then
handlers for QMP commands with 'coroutine': true could be coroutine_fn.

So far, coroutine_fn is merely documentation.  Perhaps it can guide a
checker for "do coroutine stuff only in coroutines" some day.  Would be
nice, because the coroutine calls are often buried deep, and far away
from the code that ensures they run in a coroutine.

My point is: marking functions coroutine_fn is good.  We should do it
more.  We should try to avoid stuff that hinders doing it more.

> I propose to leave a TODO, once we have several similar QMP & HMP mix
> cases we can try to find a common HMP solution to make the code
> simpler in QMP handler.

Collecting several users before building infrastructure makes sense when
the design of the infrastructure isn't obvious, or when the need for it
is in doubt.

Neither is the case for running QMP handlers in a coroutine: QMP
commands blocking the main loop is without doubt a problem we need to
solve, and the way to solve it was obvious enough for Kevin to do it
with one user: block_resize.  A second one quickly followed: screendump.

The only part that's different for HMP, I think, is "need".

Is HMP blocking the main loop a problem?

If yes, is it serious enough to justify solving it?

If yes, then putting workarounds into QMP handlers now so we can put off
solving it some more is taking on technical debt.

> I don't know if this is going to be a common pattern, we may end up
> with conversions that can run both without explicit handling (like the
> ppm_save() function, thanks to QIO).

Yes, such handlers may exist.  Running them out of coroutine context
would throw away their capability not to block the event loop, though,
wouldn't it?

>> Why is it okay not to call graphic_hw_update() anymore when
>> !qemu_in_coroutine()?
>
> You could call it, but then you should wait for completion by
> reentering the main loop (that was the point of my earlier qapi-async
> series)

Possibly stupid question: why is it necessary before this patch
(assuming it is, since we call it), and why is it no longer necessary
after?

Oh, see below.

>>
>> If qemu_in_coroutine(), we now run graphic_hw_update() in a bottom half,
>> then yield until the update completes (see graphic_hw_update_done()
>> above).  Can you explain the need for the bottom half?
>
> At least spice rendering is done in a separate thread, completion is async.

When I ask a stupid question like this one, I'm really after the "for
dummies" explanation.  I may be able to figure it out myself with some
effort, but having to put that kind of effort into patch review makes me
grumpy, and once I'm sufficiently grumpy, I don't want to review patches
anymore, let alone merge them.

Oh well, let me try.  We're in the main loop.  We want to trigger a
"graphics update" (whatever that is, doesn't matter) and wait for it to
complete without blocking the main loop.

"Without blocking the main loop" means the QMP coroutine yields.  I'd
naively expect

    QMP coroutine: schedule the job; yield
    whatever gets scheduled: complete the job; wake up QMP coroutine

Now let's examine the "graphics update" interface.

GraphicHwOps callback gfx_update() comes in two flavours:

* synchronous: complete the job, return

* asynchronous: start the job, return immediately,
  graphic_hw_update_done() will get called on job completion

graphic_hw_update() partly hides the difference:

* synchronous: complete the job, call graphic_hw_update_done()

* asynchronous: start the job, return immediately,
  graphic_hw_update_done() will get called on job completion

This lets you treat the synchronous case more like the asynchronous
case.

You use graphic_hw_update_done() to wake up the QMP coroutine.

I think I can now answer my question "why is it okay not to call
graphic_hw_update() anymore when !qemu_in_coroutine()?"

Before the patch, both QMP and HMP:

* with synchronous gfx_update(): we update before we write out the
  screendump.  The screendump is up-to-date.  Both update and write out
  block the main loop.

* with asynchronous gfx_update(): we start updating, but don't wait for
  it to complete before we write out.  This is wrong.  Write out blocks
  the main loop, but update does not.

After the patch:

* QMP with either gfx_update(): we update before we write out the
  screendump.  The screendump is up-to-date.  Neither update nor write
  out block the main loop.  Improvement.

* HMP with either gfx_update(): we don't update before we write out.
  Similarly wrong for asynchronous gfx_update(), regression for
  synchronous gfx_update().  Write out blocks the main loop as before.

Why is the regression okay?

Back to the bottom half.  The way graphic_hw_update() works, the QMP
coroutine can't schedule then yield.  It *has* to yield before
graphic_hw_update() runs.  That means we need a bottom half.

Alright, I'm officially grumpy now.

Please explain the need for a bottom half in a comment.

>> >      surface = qemu_console_surface(con);
>> >      if (!surface) {
>> >          error_setg(errp, "no surface");
>> > @@ -379,7 +397,8 @@ void qmp_screendump(const char *filename, bool 
>> > has_device, const char *device,
>> >          return;
>> >      }
>> >
>> > -    if (!ppm_save(fd, surface, errp)) {
>> > +    image = pixman_image_ref(surface->image);
>> > +    if (!ppm_save(fd, image, errp)) {
>> >          qemu_unlink(filename);
>> >      }
>> >  }
>> > diff --git a/ui/trace-events b/ui/trace-events
>> > index 0dcda393c1..e8726fc969 100644
>> > --- a/ui/trace-events
>> > +++ b/ui/trace-events
>> > @@ -15,7 +15,7 @@ displaysurface_create_pixman(void *display_surface) 
>> > "surface=%p"
>> >  displaysurface_free(void *display_surface) "surface=%p"
>> >  displaychangelistener_register(void *dcl, const char *name) "%p [ %s ]"
>> >  displaychangelistener_unregister(void *dcl, const char *name) "%p [ %s ]"
>> > -ppm_save(int fd, void *display_surface) "fd=%d surface=%p"
>> > +ppm_save(int fd, void *image) "fd=%d image=%p"
>> >
>> >  # gtk.c
>> >  # gtk-gl-area.c


diff --git a/ui/console.c b/ui/console.c
index 57df3a5439..c5aabf5a5f 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -167,7 +167,6 @@ struct QemuConsole {
     QEMUFIFO out_fifo;
     uint8_t out_fifo_buf[16];
     QEMUTimer *kbd_timer;
-    Coroutine *screendump_co;
 
     QTAILQ_ENTRY(QemuConsole) next;
 };
@@ -261,14 +260,20 @@ static void gui_setup_refresh(DisplayState *ds)
     ds->have_text = have_text;
 }
 
-void graphic_hw_update_done(QemuConsole *con)
+static void graphic_hw_update_done_with_kick(QemuConsole *con,
+                                             Coroutine *kick_me)
 {
-    if (con && con->screendump_co) {
-        aio_co_wake(con->screendump_co);
+    if (kick_me) {
+        aio_co_wake(kick_me);
     }
 }
 
-void graphic_hw_update(QemuConsole *con)
+void graphic_hw_update_done(QemuConsole *con)
+{
+    graphic_hw_update_done_with_kick(con, NULL);
+}
+
+static void graphic_hw_update_with_kick(QemuConsole *con, Coroutine *kick_me)
 {
     bool async = false;
     if (!con) {
@@ -279,10 +284,15 @@ void graphic_hw_update(QemuConsole *con)
         async = con->hw_ops->gfx_update_async;
     }
     if (!async) {
-        graphic_hw_update_done(con);
+        graphic_hw_update_done_with_kick(con, kick_me);
     }
 }
 
+void graphic_hw_update(QemuConsole *con)
+{
+    graphic_hw_update_with_kick(con, NULL);
+}
+
 void graphic_hw_gl_block(QemuConsole *con, bool block)
 {
     assert(con != NULL);
@@ -343,9 +353,16 @@ static bool ppm_save(int fd, pixman_image_t *image, Error 
**errp)
     return true;
 }
 
-static void graphic_hw_update_bh(void *con)
+typedef struct {
+    QemuConsole *con;
+    Coroutine *kick_me;
+} UpdateBHargs;
+
+static void graphic_hw_update_bh(void *p)
 {
-    graphic_hw_update(con);
+    UpdateBHargs *args = p;
+
+    graphic_hw_update_with_kick(args->con, args->kick_me);
 }
 
 /* may be called in coroutine context or not */
@@ -376,12 +393,13 @@ void qmp_screendump(const char *filename, bool 
has_device, const char *device,
     }
 
     if (qemu_in_coroutine()) {
-        assert(!con->screendump_co);
-        con->screendump_co = qemu_coroutine_self();
+        UpdateBHargs args = {
+            .con = con,
+            .kick_me = qemu_coroutine_self(),
+        };
         aio_bh_schedule_oneshot(qemu_get_aio_context(),
-                                graphic_hw_update_bh, con);
+                                graphic_hw_update_bh, &args);
         qemu_coroutine_yield();
-        con->screendump_co = NULL;
     }
 
     surface = qemu_console_surface(con);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]