blockjob.c 26.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * QEMU System Emulator block driver
 *
 * Copyright (c) 2011 IBM Corp.
 * Copyright (c) 2012 Red Hat, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

Peter Maydell's avatar
Peter Maydell committed
26
#include "qemu/osdep.h"
27
#include "qemu-common.h"
28
#include "block/block.h"
29
#include "block/blockjob_int.h"
30
#include "block/block_int.h"
31
#include "sysemu/block-backend.h"
32
#include "qapi/qmp/qerror.h"
33
#include "qapi/qmp/qjson.h"
34
#include "qemu/coroutine.h"
35
#include "qemu/id.h"
36
#include "qmp-commands.h"
37
#include "qemu/timer.h"
38
#include "qapi-event.h"
39

40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
/* Right now, this mutex is only needed to synchronize accesses to job->busy
 * and job->sleep_timer, such as concurrent calls to block_job_do_yield and
 * block_job_enter. */
static QemuMutex block_job_mutex;

static void block_job_lock(void)
{
    qemu_mutex_lock(&block_job_mutex);
}

static void block_job_unlock(void)
{
    qemu_mutex_unlock(&block_job_mutex);
}

static void __attribute__((__constructor__)) block_job_init(void)
{
    qemu_mutex_init(&block_job_mutex);
}

60 61
static void block_job_event_cancelled(BlockJob *job);
static void block_job_event_completed(BlockJob *job, const char *msg);
62
static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job));
63

64 65 66 67 68 69 70 71 72 73 74 75 76
/* Transactional group of block jobs */
struct BlockJobTxn {

    /* Is this txn being cancelled? */
    bool aborting;

    /* List of jobs */
    QLIST_HEAD(, BlockJob) jobs;

    /* Reference count */
    int refcnt;
};

77 78
static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
/*
 * The block job API is composed of two categories of functions.
 *
 * The first includes functions used by the monitor.  The monitor is
 * peculiar in that it accesses the block job list with block_job_get, and
 * therefore needs consistency across block_job_get and the actual operation
 * (e.g. block_job_set_speed).  The consistency is achieved with
 * aio_context_acquire/release.  These functions are declared in blockjob.h.
 *
 * The second includes functions used by the block job drivers and sometimes
 * by the core block layer.  These do not care about locking, because the
 * whole coroutine runs under the AioContext lock, and are declared in
 * blockjob_int.h.
 */

94 95 96 97 98 99 100 101
BlockJob *block_job_next(BlockJob *job)
{
    if (!job) {
        return QLIST_FIRST(&block_jobs);
    }
    return QLIST_NEXT(job, job_list);
}

102 103 104 105 106
BlockJob *block_job_get(const char *id)
{
    BlockJob *job;

    QLIST_FOREACH(job, &block_jobs, job_list) {
107
        if (job->id && !strcmp(id, job->id)) {
108 109 110 111 112 113 114
            return job;
        }
    }

    return NULL;
}

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
BlockJobTxn *block_job_txn_new(void)
{
    BlockJobTxn *txn = g_new0(BlockJobTxn, 1);
    QLIST_INIT(&txn->jobs);
    txn->refcnt = 1;
    return txn;
}

static void block_job_txn_ref(BlockJobTxn *txn)
{
    txn->refcnt++;
}

void block_job_txn_unref(BlockJobTxn *txn)
{
    if (txn && --txn->refcnt == 0) {
        g_free(txn);
    }
}

void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job)
{
    if (!txn) {
        return;
    }

    assert(!job->txn);
    job->txn = txn;

    QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
    block_job_txn_ref(txn);
}

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
static void block_job_pause(BlockJob *job)
{
    job->pause_count++;
}

static void block_job_resume(BlockJob *job)
{
    assert(job->pause_count > 0);
    job->pause_count--;
    if (job->pause_count) {
        return;
    }
    block_job_enter(job);
}

163
void block_job_ref(BlockJob *job)
164 165 166 167 168 169 170 171
{
    ++job->refcnt;
}

static void block_job_attached_aio_context(AioContext *new_context,
                                           void *opaque);
static void block_job_detach_aio_context(void *opaque);

172
void block_job_unref(BlockJob *job)
173 174 175
{
    if (--job->refcnt == 0) {
        BlockDriverState *bs = blk_bs(job->blk);
176
        QLIST_REMOVE(job, job_list);
177 178 179 180 181 182 183 184
        bs->job = NULL;
        block_job_remove_all_bdrv(job);
        blk_remove_aio_context_notifier(job->blk,
                                        block_job_attached_aio_context,
                                        block_job_detach_aio_context, job);
        blk_unref(job->blk);
        error_free(job->blocker);
        g_free(job->id);
185
        assert(!timer_pending(&job->sleep_timer));
186 187 188 189
        g_free(job);
    }
}

190 191 192 193 194 195 196 197 198 199 200 201
static void block_job_attached_aio_context(AioContext *new_context,
                                           void *opaque)
{
    BlockJob *job = opaque;

    if (job->driver->attached_aio_context) {
        job->driver->attached_aio_context(job, new_context);
    }

    block_job_resume(job);
}

202 203 204 205 206 207 208 209 210 211 212
static void block_job_drain(BlockJob *job)
{
    /* If job is !job->busy this kicks it into the next pause point. */
    block_job_enter(job);

    blk_drain(job->blk);
    if (job->driver->drain) {
        job->driver->drain(job);
    }
}

213 214 215 216 217 218 219 220 221 222
static void block_job_detach_aio_context(void *opaque)
{
    BlockJob *job = opaque;

    /* In case the job terminates during aio_poll()... */
    block_job_ref(job);

    block_job_pause(job);

    while (!job->paused && !job->completed) {
223
        block_job_drain(job);
224 225 226 227 228
    }

    block_job_unref(job);
}

229 230 231 232
static char *child_job_get_parent_desc(BdrvChild *c)
{
    BlockJob *job = c->opaque;
    return g_strdup_printf("%s job '%s'",
233
                           BlockJobType_str(job->driver->job_type),
234 235 236
                           job->id);
}

237
static void child_job_drained_begin(BdrvChild *c)
238
{
239
    BlockJob *job = c->opaque;
240 241 242
    block_job_pause(job);
}

243
static void child_job_drained_end(BdrvChild *c)
244
{
245
    BlockJob *job = c->opaque;
246 247 248
    block_job_resume(job);
}

249 250 251 252 253
static const BdrvChildRole child_job = {
    .get_parent_desc    = child_job_get_parent_desc,
    .drained_begin      = child_job_drained_begin,
    .drained_end        = child_job_drained_end,
    .stay_at_node       = true,
254 255
};

256 257 258 259 260 261 262 263 264 265 266 267
void block_job_remove_all_bdrv(BlockJob *job)
{
    GSList *l;
    for (l = job->nodes; l; l = l->next) {
        BdrvChild *c = l->data;
        bdrv_op_unblock_all(c->bs, job->blocker);
        bdrv_root_unref_child(c);
    }
    g_slist_free(job->nodes);
    job->nodes = NULL;
}

268 269
int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
                       uint64_t perm, uint64_t shared_perm, Error **errp)
270
{
271 272 273 274 275 276 277 278 279
    BdrvChild *c;

    c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
                               job, errp);
    if (c == NULL) {
        return -EPERM;
    }

    job->nodes = g_slist_prepend(job->nodes, c);
280 281
    bdrv_ref(bs);
    bdrv_op_block_all(bs, job->blocker);
282 283

    return 0;
284 285
}

286 287 288 289 290
bool block_job_is_internal(BlockJob *job)
{
    return (job->id == NULL);
}

291 292 293 294 295
static bool block_job_started(BlockJob *job)
{
    return job->co;
}

296 297 298 299 300 301 302 303 304 305 306 307 308
/**
 * All jobs must allow a pause point before entering their job proper. This
 * ensures that jobs can be paused prior to being started, then resumed later.
 */
static void coroutine_fn block_job_co_entry(void *opaque)
{
    BlockJob *job = opaque;

    assert(job && job->driver && job->driver->start);
    block_job_pause_point(job);
    job->driver->start(job);
}

309 310 311 312 313 314 315
static void block_job_sleep_timer_cb(void *opaque)
{
    BlockJob *job = opaque;

    block_job_enter(job);
}

316 317 318
void block_job_start(BlockJob *job)
{
    assert(job && !block_job_started(job) && job->paused &&
319 320 321 322 323
           job->driver && job->driver->start);
    job->co = qemu_coroutine_create(block_job_co_entry, job);
    job->pause_count--;
    job->busy = true;
    job->paused = false;
324
    bdrv_coroutine_enter(blk_bs(job->blk), job->co);
325 326
}

327 328
static void block_job_completed_single(BlockJob *job)
{
329 330
    assert(job->completed);

331 332 333 334 335 336 337 338 339
    if (!job->ret) {
        if (job->driver->commit) {
            job->driver->commit(job);
        }
    } else {
        if (job->driver->abort) {
            job->driver->abort(job);
        }
    }
340 341 342
    if (job->driver->clean) {
        job->driver->clean(job);
    }
343 344 345 346

    if (job->cb) {
        job->cb(job->opaque, job->ret);
    }
347 348 349 350 351 352 353 354 355 356 357

    /* Emit events only if we actually started */
    if (block_job_started(job)) {
        if (block_job_is_cancelled(job)) {
            block_job_event_cancelled(job);
        } else {
            const char *msg = NULL;
            if (job->ret < 0) {
                msg = strerror(-job->ret);
            }
            block_job_event_completed(job, msg);
358 359 360
        }
    }

361
    if (job->txn) {
362
        QLIST_REMOVE(job, txn_list);
363 364 365 366 367
        block_job_txn_unref(job->txn);
    }
    block_job_unref(job);
}

368 369 370 371 372 373 374 375 376 377 378 379 380
static void block_job_cancel_async(BlockJob *job)
{
    if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) {
        block_job_iostatus_reset(job);
    }
    if (job->user_paused) {
        /* Do not call block_job_enter here, the caller will handle it.  */
        job->user_paused = false;
        job->pause_count--;
    }
    job->cancelled = true;
}

381 382 383 384 385 386 387 388 389 390 391
static int block_job_finish_sync(BlockJob *job,
                                 void (*finish)(BlockJob *, Error **errp),
                                 Error **errp)
{
    Error *local_err = NULL;
    int ret;

    assert(blk_bs(job->blk)->job == job);

    block_job_ref(job);

392 393 394
    if (finish) {
        finish(job, &local_err);
    }
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
    if (local_err) {
        error_propagate(errp, local_err);
        block_job_unref(job);
        return -EBUSY;
    }
    /* block_job_drain calls block_job_enter, and it should be enough to
     * induce progress until the job completes or moves to the main thread.
    */
    while (!job->deferred_to_main_loop && !job->completed) {
        block_job_drain(job);
    }
    while (!job->completed) {
        aio_poll(qemu_get_aio_context(), true);
    }
    ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
    block_job_unref(job);
    return ret;
}

414 415 416 417
static void block_job_completed_txn_abort(BlockJob *job)
{
    AioContext *ctx;
    BlockJobTxn *txn = job->txn;
418
    BlockJob *other_job;
419 420 421 422 423 424 425 426

    if (txn->aborting) {
        /*
         * We are cancelled by another job, which will handle everything.
         */
        return;
    }
    txn->aborting = true;
427 428
    block_job_txn_ref(txn);

429 430
    /* We are the first failed job. Cancel other jobs. */
    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
431
        ctx = blk_get_aio_context(other_job->blk);
432 433
        aio_context_acquire(ctx);
    }
434 435 436 437

    /* Other jobs are effectively cancelled by us, set the status for
     * them; this job, however, may or may not be cancelled, depending
     * on the caller, so leave it. */
438
    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
439 440
        if (other_job != job) {
            block_job_cancel_async(other_job);
441 442
        }
    }
443 444
    while (!QLIST_EMPTY(&txn->jobs)) {
        other_job = QLIST_FIRST(&txn->jobs);
445
        ctx = blk_get_aio_context(other_job->blk);
446 447 448 449
        if (!other_job->completed) {
            assert(other_job->cancelled);
            block_job_finish_sync(other_job, NULL, NULL);
        }
450 451 452
        block_job_completed_single(other_job);
        aio_context_release(ctx);
    }
453 454

    block_job_txn_unref(txn);
455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
}

static void block_job_completed_txn_success(BlockJob *job)
{
    AioContext *ctx;
    BlockJobTxn *txn = job->txn;
    BlockJob *other_job, *next;
    /*
     * Successful completion, see if there are other running jobs in this
     * txn.
     */
    QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
        if (!other_job->completed) {
            return;
        }
    }
    /* We are the last completed job, commit the transaction. */
    QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
473
        ctx = blk_get_aio_context(other_job->blk);
474 475 476 477 478 479 480
        aio_context_acquire(ctx);
        assert(other_job->ret == 0);
        block_job_completed_single(other_job);
        aio_context_release(ctx);
    }
}

481 482 483 484 485 486
/* Assumes the block_job_mutex is held */
static bool block_job_timer_pending(BlockJob *job)
{
    return timer_pending(&job->sleep_timer);
}

487 488 489
void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
    Error *local_err = NULL;
490
    int64_t old_speed = job->speed;
491

492
    if (!job->driver->set_speed) {
493
        error_setg(errp, QERR_UNSUPPORTED);
494 495
        return;
    }
496
    job->driver->set_speed(job, speed, &local_err);
497
    if (local_err) {
498 499 500 501 502
        error_propagate(errp, local_err);
        return;
    }

    job->speed = speed;
503 504 505 506 507 508
    if (speed <= old_speed) {
        return;
    }

    /* kick only if a timer is pending */
    block_job_enter_cond(job, block_job_timer_pending);
509 510
}

511 512
void block_job_complete(BlockJob *job, Error **errp)
{
513 514
    /* Should not be reachable via external interface for internal jobs */
    assert(job->id);
515 516
    if (job->pause_count || job->cancelled ||
        !block_job_started(job) || !job->driver->complete) {
517 518
        error_setg(errp, "The active block job '%s' cannot be completed",
                   job->id);
519 520 521
        return;
    }

522
    job->driver->complete(job, errp);
523 524
}

525 526 527 528 529 530 531 532
void block_job_user_pause(BlockJob *job)
{
    job->user_paused = true;
    block_job_pause(job);
}

bool block_job_user_paused(BlockJob *job)
{
533
    return job->user_paused;
534 535 536 537 538
}

void block_job_user_resume(BlockJob *job)
{
    if (job && job->user_paused && job->pause_count > 0) {
539
        block_job_iostatus_reset(job);
540
        job->user_paused = false;
541 542 543 544
        block_job_resume(job);
    }
}

545 546
void block_job_cancel(BlockJob *job)
{
547
    if (block_job_started(job)) {
548
        block_job_cancel_async(job);
549 550 551 552
        block_job_enter(job);
    } else {
        block_job_completed(job, -ECANCELED);
    }
553 554
}

555 556 557 558 559 560 561 562 563 564 565 566 567
/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
 * used with block_job_finish_sync() without the need for (rather nasty)
 * function pointer casts there. */
static void block_job_cancel_err(BlockJob *job, Error **errp)
{
    block_job_cancel(job);
}

int block_job_cancel_sync(BlockJob *job)
{
    return block_job_finish_sync(job, &block_job_cancel_err, NULL);
}

568 569 570 571 572 573
void block_job_cancel_sync_all(void)
{
    BlockJob *job;
    AioContext *aio_context;

    while ((job = QLIST_FIRST(&block_jobs))) {
574
        aio_context = blk_get_aio_context(job->blk);
575 576 577 578 579 580
        aio_context_acquire(aio_context);
        block_job_cancel_sync(job);
        aio_context_release(aio_context);
    }
}

581 582 583 584 585
int block_job_complete_sync(BlockJob *job, Error **errp)
{
    return block_job_finish_sync(job, &block_job_complete, errp);
}

586
BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
587
{
588 589 590 591 592 593 594
    BlockJobInfo *info;

    if (block_job_is_internal(job)) {
        error_setg(errp, "Cannot query QEMU internal jobs");
        return NULL;
    }
    info = g_new0(BlockJobInfo, 1);
595
    info->type      = g_strdup(BlockJobType_str(job->driver->job_type));
596
    info->device    = g_strdup(job->id);
597
    info->len       = job->len;
598
    info->busy      = atomic_read(&job->busy);
Fam Zheng's avatar
Fam Zheng committed
599
    info->paused    = job->pause_count > 0;
600 601 602
    info->offset    = job->offset;
    info->speed     = job->speed;
    info->io_status = job->iostatus;
Max Reitz's avatar
Max Reitz committed
603
    info->ready     = job->ready;
604 605
    return info;
}
606 607 608 609 610 611 612 613 614

static void block_job_iostatus_set_err(BlockJob *job, int error)
{
    if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
        job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
                                          BLOCK_DEVICE_IO_STATUS_FAILED;
    }
}

615
static void block_job_event_cancelled(BlockJob *job)
616
{
617 618 619 620
    if (block_job_is_internal(job)) {
        return;
    }

621
    qapi_event_send_block_job_cancelled(job->driver->job_type,
622
                                        job->id,
623 624 625 626 627
                                        job->len,
                                        job->offset,
                                        job->speed,
                                        &error_abort);
}
628

629
static void block_job_event_completed(BlockJob *job, const char *msg)
630
{
631 632 633 634
    if (block_job_is_internal(job)) {
        return;
    }

635
    qapi_event_send_block_job_completed(job->driver->job_type,
636
                                        job->id,
637 638 639 640 641 642
                                        job->len,
                                        job->offset,
                                        job->speed,
                                        !!msg,
                                        msg,
                                        &error_abort);
643 644
}

645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
/*
 * API for block job drivers and the block layer.  These functions are
 * declared in blockjob_int.h.
 */

void *block_job_create(const char *job_id, const BlockJobDriver *driver,
                       BlockDriverState *bs, uint64_t perm,
                       uint64_t shared_perm, int64_t speed, int flags,
                       BlockCompletionFunc *cb, void *opaque, Error **errp)
{
    BlockBackend *blk;
    BlockJob *job;
    int ret;

    if (bs->job) {
        error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
        return NULL;
    }

    if (job_id == NULL && !(flags & BLOCK_JOB_INTERNAL)) {
        job_id = bdrv_get_device_name(bs);
        if (!*job_id) {
            error_setg(errp, "An explicit job ID is required for this node");
            return NULL;
        }
    }

    if (job_id) {
        if (flags & BLOCK_JOB_INTERNAL) {
            error_setg(errp, "Cannot specify job ID for internal block job");
            return NULL;
        }

        if (!id_wellformed(job_id)) {
            error_setg(errp, "Invalid job ID '%s'", job_id);
            return NULL;
        }

        if (block_job_get(job_id)) {
            error_setg(errp, "Job ID '%s' already in use", job_id);
            return NULL;
        }
    }

    blk = blk_new(perm, shared_perm);
    ret = blk_insert_bs(blk, bs, errp);
    if (ret < 0) {
        blk_unref(blk);
        return NULL;
    }

    job = g_malloc0(driver->instance_size);
    job->driver        = driver;
    job->id            = g_strdup(job_id);
    job->blk           = blk;
    job->cb            = cb;
    job->opaque        = opaque;
    job->busy          = false;
    job->paused        = true;
    job->pause_count   = 1;
    job->refcnt        = 1;
706 707 708
    aio_timer_init(qemu_get_aio_context(), &job->sleep_timer,
                   QEMU_CLOCK_REALTIME, SCALE_NS,
                   block_job_sleep_timer_cb, job);
709 710

    error_setg(&job->blocker, "block device is in use by block job: %s",
711
               BlockJobType_str(driver->job_type));
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735
    block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
    bs->job = job;

    bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);

    QLIST_INSERT_HEAD(&block_jobs, job, job_list);

    blk_add_aio_context_notifier(blk, block_job_attached_aio_context,
                                 block_job_detach_aio_context, job);

    /* Only set speed when necessary to avoid NotSupported error */
    if (speed != 0) {
        Error *local_err = NULL;

        block_job_set_speed(job, speed, &local_err);
        if (local_err) {
            block_job_unref(job);
            error_propagate(errp, local_err);
            return NULL;
        }
    }
    return job;
}

736 737 738 739 740 741 742
void block_job_pause_all(void)
{
    BlockJob *job = NULL;
    while ((job = block_job_next(job))) {
        AioContext *aio_context = blk_get_aio_context(job->blk);

        aio_context_acquire(aio_context);
743
        block_job_ref(job);
744 745 746 747 748
        block_job_pause(job);
        aio_context_release(aio_context);
    }
}

749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
void block_job_early_fail(BlockJob *job)
{
    block_job_unref(job);
}

void block_job_completed(BlockJob *job, int ret)
{
    assert(blk_bs(job->blk)->job == job);
    assert(!job->completed);
    job->completed = true;
    job->ret = ret;
    if (!job->txn) {
        block_job_completed_single(job);
    } else if (ret < 0 || block_job_is_cancelled(job)) {
        block_job_completed_txn_abort(job);
    } else {
        block_job_completed_txn_success(job);
    }
}

static bool block_job_should_pause(BlockJob *job)
{
    return job->pause_count > 0;
}

774 775 776 777 778 779 780
/* Yield, and schedule a timer to reenter the coroutine after @ns nanoseconds.
 * Reentering the job coroutine with block_job_enter() before the timer has
 * expired is allowed and cancels the timer.
 *
 * If @ns is (uint64_t) -1, no timer is scheduled and block_job_enter() must be
 * called explicitly. */
static void block_job_do_yield(BlockJob *job, uint64_t ns)
781
{
782 783 784 785
    block_job_lock();
    if (ns != -1) {
        timer_mod(&job->sleep_timer, ns);
    }
786
    job->busy = false;
787
    block_job_unlock();
788 789 790 791 792 793
    qemu_coroutine_yield();

    /* Set by block_job_enter before re-entering the coroutine.  */
    assert(job->busy);
}

794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
void coroutine_fn block_job_pause_point(BlockJob *job)
{
    assert(job && block_job_started(job));

    if (!block_job_should_pause(job)) {
        return;
    }
    if (block_job_is_cancelled(job)) {
        return;
    }

    if (job->driver->pause) {
        job->driver->pause(job);
    }

    if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
        job->paused = true;
811
        block_job_do_yield(job, -1);
812 813 814 815 816 817 818 819
        job->paused = false;
    }

    if (job->driver->resume) {
        job->driver->resume(job);
    }
}

820 821
void block_job_resume_all(void)
{
822 823 824
    BlockJob *job, *next;

    QLIST_FOREACH_SAFE(job, &block_jobs, job_list, next) {
825 826 827 828
        AioContext *aio_context = blk_get_aio_context(job->blk);

        aio_context_acquire(aio_context);
        block_job_resume(job);
829
        block_job_unref(job);
830 831 832 833
        aio_context_release(aio_context);
    }
}

834 835 836 837 838
/*
 * Conditionally enter a block_job pending a call to fn() while
 * under the block_job_lock critical section.
 */
static void block_job_enter_cond(BlockJob *job, bool(*fn)(BlockJob *job))
839
{
840 841 842 843 844 845 846
    if (!block_job_started(job)) {
        return;
    }
    if (job->deferred_to_main_loop) {
        return;
    }

847
    block_job_lock();
848
    if (job->busy) {
849
        block_job_unlock();
850
        return;
851
    }
852

853 854 855 856 857
    if (fn && !fn(job)) {
        block_job_unlock();
        return;
    }

858 859
    assert(!job->deferred_to_main_loop);
    timer_del(&job->sleep_timer);
860
    job->busy = true;
861
    block_job_unlock();
862
    aio_co_wake(job->co);
863 864
}

865 866 867 868 869
void block_job_enter(BlockJob *job)
{
    block_job_enter_cond(job, NULL);
}

870 871 872 873 874
bool block_job_is_cancelled(BlockJob *job)
{
    return job->cancelled;
}

875
void block_job_sleep_ns(BlockJob *job, int64_t ns)
876 877 878 879 880 881 882 883 884
{
    assert(job->busy);

    /* Check cancellation *before* setting busy = false, too!  */
    if (block_job_is_cancelled(job)) {
        return;
    }

    if (!block_job_should_pause(job)) {
885
        block_job_do_yield(job, qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + ns);
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900
    }

    block_job_pause_point(job);
}

void block_job_yield(BlockJob *job)
{
    assert(job->busy);

    /* Check cancellation *before* setting busy = false, too!  */
    if (block_job_is_cancelled(job)) {
        return;
    }

    if (!block_job_should_pause(job)) {
901
        block_job_do_yield(job, -1);
902 903 904 905 906
    }

    block_job_pause_point(job);
}

907 908
void block_job_iostatus_reset(BlockJob *job)
{
909 910 911 912
    if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
        return;
    }
    assert(job->user_paused && job->pause_count > 0);
913 914 915
    job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}

916
void block_job_event_ready(BlockJob *job)
917
{
Max Reitz's avatar
Max Reitz committed
918 919
    job->ready = true;

920 921 922 923
    if (block_job_is_internal(job)) {
        return;
    }

924
    qapi_event_send_block_job_ready(job->driver->job_type,
925
                                    job->id,
926 927 928
                                    job->len,
                                    job->offset,
                                    job->speed, &error_abort);
929 930
}

931
BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
932 933 934 935 936 937
                                        int is_read, int error)
{
    BlockErrorAction action;

    switch (on_err) {
    case BLOCKDEV_ON_ERROR_ENOSPC:
938
    case BLOCKDEV_ON_ERROR_AUTO:
939 940
        action = (error == ENOSPC) ?
                 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
941 942
        break;
    case BLOCKDEV_ON_ERROR_STOP:
943
        action = BLOCK_ERROR_ACTION_STOP;
944 945
        break;
    case BLOCKDEV_ON_ERROR_REPORT:
946
        action = BLOCK_ERROR_ACTION_REPORT;
947 948
        break;
    case BLOCKDEV_ON_ERROR_IGNORE:
949
        action = BLOCK_ERROR_ACTION_IGNORE;
950 951 952 953
        break;
    default:
        abort();
    }
954 955 956 957 958 959
    if (!block_job_is_internal(job)) {
        qapi_event_send_block_job_error(job->id,
                                        is_read ? IO_OPERATION_TYPE_READ :
                                        IO_OPERATION_TYPE_WRITE,
                                        action, &error_abort);
    }
960
    if (action == BLOCK_ERROR_ACTION_STOP) {
Fam Zheng's avatar
Fam Zheng committed
961
        /* make the pause user visible, which will be resumed from QMP. */
962
        block_job_user_pause(job);
963 964 965 966
        block_job_iostatus_set_err(job, error);
    }
    return action;
}
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983

typedef struct {
    BlockJob *job;
    AioContext *aio_context;
    BlockJobDeferToMainLoopFn *fn;
    void *opaque;
} BlockJobDeferToMainLoopData;

static void block_job_defer_to_main_loop_bh(void *opaque)
{
    BlockJobDeferToMainLoopData *data = opaque;
    AioContext *aio_context;

    /* Prevent race with block_job_defer_to_main_loop() */
    aio_context_acquire(data->aio_context);

    /* Fetch BDS AioContext again, in case it has changed */
984
    aio_context = blk_get_aio_context(data->job->blk);
985 986 987
    if (aio_context != data->aio_context) {
        aio_context_acquire(aio_context);
    }
988 989 990

    data->fn(data->job, data->opaque);

991 992 993
    if (aio_context != data->aio_context) {
        aio_context_release(aio_context);
    }
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005

    aio_context_release(data->aio_context);

    g_free(data);
}

void block_job_defer_to_main_loop(BlockJob *job,
                                  BlockJobDeferToMainLoopFn *fn,
                                  void *opaque)
{
    BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
    data->job = job;
1006
    data->aio_context = blk_get_aio_context(job->blk);
1007 1008
    data->fn = fn;
    data->opaque = opaque;
1009
    job->deferred_to_main_loop = true;
1010

1011 1012
    aio_bh_schedule_oneshot(qemu_get_aio_context(),
                            block_job_defer_to_main_loop_bh, data);
1013
}