cpus.c 54 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

/* Needed early for CONFIG_BSD etc. */
Peter Maydell's avatar
Peter Maydell committed
26
#include "qemu/osdep.h"
27
#include "qemu-common.h"
28
#include "qemu/config-file.h"
29
#include "cpu.h"
30
#include "monitor/monitor.h"
Wenchao Xia's avatar
Wenchao Xia committed
31
#include "qapi/qmp/qerror.h"
32
#include "qemu/error-report.h"
33
#include "sysemu/sysemu.h"
34
#include "sysemu/block-backend.h"
35
#include "exec/gdbstub.h"
36
#include "sysemu/dma.h"
37
#include "sysemu/hw_accel.h"
38
#include "sysemu/kvm.h"
39
#include "sysemu/hax.h"
40
#include "qmp-commands.h"
41
#include "exec/exec-all.h"
42

43
#include "qemu/thread.h"
44 45
#include "sysemu/cpus.h"
#include "sysemu/qtest.h"
46 47
#include "qemu/main-loop.h"
#include "qemu/bitmap.h"
48
#include "qemu/seqlock.h"
49
#include "tcg.h"
Wenchao Xia's avatar
Wenchao Xia committed
50
#include "qapi-event.h"
51
#include "hw/nmi.h"
52
#include "sysemu/replay.h"
53
#include "hw/boards.h"
54

55 56 57 58
#ifdef CONFIG_LINUX

#include <sys/prctl.h>

59 60 61 62
#ifndef PR_MCE_KILL
#define PR_MCE_KILL 33
#endif

63 64 65 66 67 68 69 70 71 72
#ifndef PR_MCE_KILL_SET
#define PR_MCE_KILL_SET 1
#endif

#ifndef PR_MCE_KILL_EARLY
#define PR_MCE_KILL_EARLY 1
#endif

#endif /* CONFIG_LINUX */

73 74
int64_t max_delay;
int64_t max_advance;
75

76 77 78 79 80 81 82 83
/* vcpu throttling controls */
static QEMUTimer *throttle_timer;
static unsigned int throttle_percentage;

#define CPU_THROTTLE_PCT_MIN 1
#define CPU_THROTTLE_PCT_MAX 99
#define CPU_THROTTLE_TIMESLICE_NS 10000000

84 85 86 87 88
bool cpu_is_stopped(CPUState *cpu)
{
    return cpu->stopped || !runstate_is_running();
}

89
static bool cpu_thread_is_idle(CPUState *cpu)
90
{
91
    if (cpu->stop || cpu->queued_work_first) {
92 93
        return false;
    }
94
    if (cpu_is_stopped(cpu)) {
95 96
        return true;
    }
97
    if (!cpu->halted || cpu_has_work(cpu) ||
98
        kvm_halt_in_kernel()) {
99 100 101 102 103 104 105
        return false;
    }
    return true;
}

static bool all_cpu_threads_idle(void)
{
106
    CPUState *cpu;
107

108
    CPU_FOREACH(cpu) {
109
        if (!cpu_thread_is_idle(cpu)) {
110 111 112 113 114 115
            return false;
        }
    }
    return true;
}

116 117 118
/***********************************************************/
/* guest cycle counter */

119 120
/* Protected by TimersState seqlock */

121
static bool icount_sleep = true;
122
static int64_t vm_clock_warp_start = -1;
123 124 125 126
/* Conversion factor from emulated instructions to virtual clock ticks.  */
static int icount_time_shift;
/* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
#define MAX_ICOUNT_SHIFT 10
127

128 129 130 131 132
static QEMUTimer *icount_rt_timer;
static QEMUTimer *icount_vm_timer;
static QEMUTimer *icount_warp_timer;

typedef struct TimersState {
133
    /* Protected by BQL.  */
134 135
    int64_t cpu_ticks_prev;
    int64_t cpu_ticks_offset;
136 137 138 139 140

    /* cpu_clock_offset can be read out of BQL, so protect it with
     * this lock.
     */
    QemuSeqLock vm_clock_seqlock;
141 142 143
    int64_t cpu_clock_offset;
    int32_t cpu_ticks_enabled;
    int64_t dummy;
144 145 146 147 148

    /* Compensate for varying guest execution speed.  */
    int64_t qemu_icount_bias;
    /* Only written by TCG thread */
    int64_t qemu_icount;
149 150
} TimersState;

151
static TimersState timers_state;
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
bool mttcg_enabled;

/*
 * We default to false if we know other options have been enabled
 * which are currently incompatible with MTTCG. Otherwise when each
 * guest (target) has been updated to support:
 *   - atomic instructions
 *   - memory ordering primitives (barriers)
 * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
 *
 * Once a guest architecture has been converted to the new primitives
 * there are two remaining limitations to check.
 *
 * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
 * - The host must have a stronger memory order than the guest
 *
 * It may be possible in future to support strong guests on weak hosts
 * but that will require tagging all load/stores in a guest with their
 * implicit memory order requirements which would likely slow things
 * down a lot.
 */

static bool check_tcg_memory_orders_compatible(void)
{
#if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
    return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
#else
    return false;
#endif
}

static bool default_mttcg_enabled(void)
{
185
    if (use_icount || TCG_OVERSIZED_GUEST) {
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202
        return false;
    } else {
#ifdef TARGET_SUPPORTS_MTTCG
        return check_tcg_memory_orders_compatible();
#else
        return false;
#endif
    }
}

void qemu_tcg_configure(QemuOpts *opts, Error **errp)
{
    const char *t = qemu_opt_get(opts, "thread");
    if (t) {
        if (strcmp(t, "multi") == 0) {
            if (TCG_OVERSIZED_GUEST) {
                error_setg(errp, "No MTTCG when guest word size > hosts");
203 204
            } else if (use_icount) {
                error_setg(errp, "No MTTCG when icount is enabled");
205
            } else {
206
#ifndef TARGET_SUPPORTS_MTTCG
207 208 209
                error_report("Guest not yet converted to MTTCG - "
                             "you may get unexpected results");
#endif
210 211 212
                if (!check_tcg_memory_orders_compatible()) {
                    error_report("Guest expects a stronger memory ordering "
                                 "than the host provides");
213
                    error_printf("This may cause strange/hard to debug errors\n");
214 215 216 217 218 219 220 221 222 223 224 225
                }
                mttcg_enabled = true;
            }
        } else if (strcmp(t, "single") == 0) {
            mttcg_enabled = false;
        } else {
            error_setg(errp, "Invalid 'thread' setting %s", t);
        }
    } else {
        mttcg_enabled = default_mttcg_enabled();
    }
}
226

227 228 229 230 231 232 233 234 235
/* The current number of executed instructions is based on what we
 * originally budgeted minus the current state of the decrementing
 * icount counters in extra/u16.low.
 */
static int64_t cpu_get_icount_executed(CPUState *cpu)
{
    return cpu->icount_budget - (cpu->icount_decr.u16.low + cpu->icount_extra);
}

236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
/*
 * Update the global shared timer_state.qemu_icount to take into
 * account executed instructions. This is done by the TCG vCPU
 * thread so the main-loop can see time has moved forward.
 */
void cpu_update_icount(CPUState *cpu)
{
    int64_t executed = cpu_get_icount_executed(cpu);
    cpu->icount_budget -= executed;

#ifdef CONFIG_ATOMIC64
    atomic_set__nocheck(&timers_state.qemu_icount,
                        atomic_read__nocheck(&timers_state.qemu_icount) +
                        executed);
#else /* FIXME: we need 64bit atomics to do this safely */
    timers_state.qemu_icount += executed;
#endif
}

255
int64_t cpu_get_icount_raw(void)
256
{
257
    CPUState *cpu = current_cpu;
258

259
    if (cpu && cpu->running) {
260
        if (!cpu->can_do_io) {
261 262
            fprintf(stderr, "Bad icount read\n");
            exit(1);
263
        }
264
        /* Take into account what has run */
265
        cpu_update_icount(cpu);
266
    }
267 268 269 270 271
#ifdef CONFIG_ATOMIC64
    return atomic_read__nocheck(&timers_state.qemu_icount);
#else /* FIXME: we need 64bit atomics to do this safely */
    return timers_state.qemu_icount;
#endif
272 273 274 275 276 277
}

/* Return the virtual CPU time, based on the instruction counter.  */
static int64_t cpu_get_icount_locked(void)
{
    int64_t icount = cpu_get_icount_raw();
278
    return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
279 280
}

281 282 283 284 285 286 287 288 289 290 291 292 293
int64_t cpu_get_icount(void)
{
    int64_t icount;
    unsigned start;

    do {
        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
        icount = cpu_get_icount_locked();
    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));

    return icount;
}

294 295 296 297 298
int64_t cpu_icount_to_ns(int64_t icount)
{
    return icount << icount_time_shift;
}

Cao jin's avatar
Cao jin committed
299 300 301 302 303 304
/* return the time elapsed in VM between vm_start and vm_stop.  Unless
 * icount is active, cpu_get_ticks() uses units of the host CPU cycle
 * counter.
 *
 * Caller must hold the BQL
 */
305 306
int64_t cpu_get_ticks(void)
{
307 308
    int64_t ticks;

309 310 311
    if (use_icount) {
        return cpu_get_icount();
    }
312 313 314

    ticks = timers_state.cpu_ticks_offset;
    if (timers_state.cpu_ticks_enabled) {
315
        ticks += cpu_get_host_ticks();
316 317 318 319 320 321 322
    }

    if (timers_state.cpu_ticks_prev > ticks) {
        /* Note: non increasing ticks may happen if the host uses
           software suspend */
        timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
        ticks = timers_state.cpu_ticks_prev;
323
    }
324 325 326

    timers_state.cpu_ticks_prev = ticks;
    return ticks;
327 328
}

329
static int64_t cpu_get_clock_locked(void)
330
{
331
    int64_t time;
332

333
    time = timers_state.cpu_clock_offset;
334
    if (timers_state.cpu_ticks_enabled) {
335
        time += get_clock();
336
    }
337

338
    return time;
339 340
}

Cao jin's avatar
Cao jin committed
341
/* Return the monotonic time elapsed in VM, i.e.,
342 343
 * the time between vm_start and vm_stop
 */
344 345 346 347 348 349 350 351 352 353 354
int64_t cpu_get_clock(void)
{
    int64_t ti;
    unsigned start;

    do {
        start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
        ti = cpu_get_clock_locked();
    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));

    return ti;
355 356
}

357
/* enable cpu_get_ticks()
358
 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
359
 */
360 361
void cpu_enable_ticks(void)
{
362
    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
363
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
364
    if (!timers_state.cpu_ticks_enabled) {
365
        timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
366 367 368
        timers_state.cpu_clock_offset -= get_clock();
        timers_state.cpu_ticks_enabled = 1;
    }
369
    seqlock_write_end(&timers_state.vm_clock_seqlock);
370 371 372
}

/* disable cpu_get_ticks() : the clock is stopped. You must not call
373
 * cpu_get_ticks() after that.
374
 * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
375
 */
376 377
void cpu_disable_ticks(void)
{
378
    /* Here, the really thing protected by seqlock is cpu_clock_offset. */
379
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
380
    if (timers_state.cpu_ticks_enabled) {
381
        timers_state.cpu_ticks_offset += cpu_get_host_ticks();
382
        timers_state.cpu_clock_offset = cpu_get_clock_locked();
383 384
        timers_state.cpu_ticks_enabled = 0;
    }
385
    seqlock_write_end(&timers_state.vm_clock_seqlock);
386 387 388 389 390 391
}

/* Correlation between real and virtual time is always going to be
   fairly approximate, so ignore small variation.
   When the guest is idle real and virtual time will be aligned in
   the IO wait loop.  */
392
#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
393 394 395 396 397 398

static void icount_adjust(void)
{
    int64_t cur_time;
    int64_t cur_icount;
    int64_t delta;
399 400

    /* Protected by TimersState mutex.  */
401
    static int64_t last_delta;
402

403 404 405 406
    /* If the VM is not running, then do nothing.  */
    if (!runstate_is_running()) {
        return;
    }
407

408
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
409 410
    cur_time = cpu_get_clock_locked();
    cur_icount = cpu_get_icount_locked();
411

412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
    delta = cur_icount - cur_time;
    /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */
    if (delta > 0
        && last_delta + ICOUNT_WOBBLE < delta * 2
        && icount_time_shift > 0) {
        /* The guest is getting too far ahead.  Slow time down.  */
        icount_time_shift--;
    }
    if (delta < 0
        && last_delta - ICOUNT_WOBBLE > delta * 2
        && icount_time_shift < MAX_ICOUNT_SHIFT) {
        /* The guest is getting too far behind.  Speed time up.  */
        icount_time_shift++;
    }
    last_delta = delta;
427 428
    timers_state.qemu_icount_bias = cur_icount
                              - (timers_state.qemu_icount << icount_time_shift);
429
    seqlock_write_end(&timers_state.vm_clock_seqlock);
430 431 432 433
}

static void icount_adjust_rt(void *opaque)
{
434
    timer_mod(icount_rt_timer,
435
              qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
436 437 438 439 440
    icount_adjust();
}

static void icount_adjust_vm(void *opaque)
{
441 442
    timer_mod(icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
443
                   NANOSECONDS_PER_SECOND / 10);
444 445 446 447 448 449 450 451
    icount_adjust();
}

static int64_t qemu_icount_round(int64_t count)
{
    return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
}

452
static void icount_warp_rt(void)
453
{
454 455 456
    unsigned seq;
    int64_t warp_start;

457 458 459
    /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
     * changes from -1 to another value, so the race here is okay.
     */
460 461 462 463 464 465
    do {
        seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
        warp_start = vm_clock_warp_start;
    } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));

    if (warp_start == -1) {
466 467 468
        return;
    }

469
    seqlock_write_begin(&timers_state.vm_clock_seqlock);
470
    if (runstate_is_running()) {
471 472
        int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
                                     cpu_get_clock_locked());
473 474 475 476
        int64_t warp_delta;

        warp_delta = clock - vm_clock_warp_start;
        if (use_icount == 2) {
477
            /*
478
             * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
479 480
             * far ahead of real time.
             */
481
            int64_t cur_icount = cpu_get_icount_locked();
482
            int64_t delta = clock - cur_icount;
483
            warp_delta = MIN(warp_delta, delta);
484
        }
485
        timers_state.qemu_icount_bias += warp_delta;
486 487
    }
    vm_clock_warp_start = -1;
488
    seqlock_write_end(&timers_state.vm_clock_seqlock);
489 490 491 492

    if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
    }
493 494
}

495
static void icount_timer_cb(void *opaque)
496
{
497 498 499 500
    /* No need for a checkpoint because the timer already synchronizes
     * with CHECKPOINT_CLOCK_VIRTUAL_RT.
     */
    icount_warp_rt();
501 502
}

503 504
void qtest_clock_warp(int64_t dest)
{
505
    int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
506
    AioContext *aio_context;
507
    assert(qtest_enabled());
508
    aio_context = qemu_get_aio_context();
509
    while (clock < dest) {
510
        int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
511
        int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
512

513
        seqlock_write_begin(&timers_state.vm_clock_seqlock);
514
        timers_state.qemu_icount_bias += warp;
515
        seqlock_write_end(&timers_state.vm_clock_seqlock);
516

517
        qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
518
        timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
519
        clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
520
    }
521
    qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
522 523
}

524
void qemu_start_warp_timer(void)
525
{
526
    int64_t clock;
527 528
    int64_t deadline;

529
    if (!use_icount) {
530 531 532
        return;
    }

Pavel Dovgalyuk's avatar
Pavel Dovgalyuk committed
533 534 535 536 537 538 539 540
    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
     * do not fire, so computing the deadline does not make sense.
     */
    if (!runstate_is_running()) {
        return;
    }

    /* warp clock deterministically in record/replay mode */
541
    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
Pavel Dovgalyuk's avatar
Pavel Dovgalyuk committed
542 543 544
        return;
    }

545
    if (!all_cpu_threads_idle()) {
546 547 548
        return;
    }

549 550
    if (qtest_enabled()) {
        /* When testing, qtest commands advance icount.  */
551
        return;
552 553
    }

554
    /* We want to use the earliest deadline from ALL vm_clocks */
555
    clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
556
    deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
557
    if (deadline < 0) {
558 559 560 561 562
        static bool notified;
        if (!icount_sleep && !notified) {
            error_report("WARNING: icount sleep disabled and no active timers");
            notified = true;
        }
563
        return;
564 565
    }

566 567
    if (deadline > 0) {
        /*
568
         * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
569 570 571
         * sleep.  Otherwise, the CPU might be waiting for a future timer
         * interrupt to wake it up, but the interrupt never comes because
         * the vCPU isn't running any insns and thus doesn't advance the
572
         * QEMU_CLOCK_VIRTUAL.
573
         */
574 575 576 577 578 579 580 581
        if (!icount_sleep) {
            /*
             * We never let VCPUs sleep in no sleep icount mode.
             * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
             * to the next QEMU_CLOCK_VIRTUAL event and notify it.
             * It is useful when we want a deterministic execution time,
             * isolated from host latencies.
             */
582
            seqlock_write_begin(&timers_state.vm_clock_seqlock);
583
            timers_state.qemu_icount_bias += deadline;
584
            seqlock_write_end(&timers_state.vm_clock_seqlock);
585 586 587 588 589 590 591 592 593 594
            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
        } else {
            /*
             * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
             * "real" time, (related to the time left until the next event) has
             * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
             * This avoids that the warps are visible externally; for example,
             * you will not be sending network packets continuously instead of
             * every 100ms.
             */
595
            seqlock_write_begin(&timers_state.vm_clock_seqlock);
596 597 598
            if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
                vm_clock_warp_start = clock;
            }
599
            seqlock_write_end(&timers_state.vm_clock_seqlock);
600
            timer_mod_anticipate(icount_warp_timer, clock + deadline);
601
        }
602
    } else if (deadline == 0) {
603
        qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
604 605 606
    }
}

607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
static void qemu_account_warp_timer(void)
{
    if (!use_icount || !icount_sleep) {
        return;
    }

    /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
     * do not fire, so computing the deadline does not make sense.
     */
    if (!runstate_is_running()) {
        return;
    }

    /* warp clock deterministically in record/replay mode */
    if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
        return;
    }

    timer_del(icount_warp_timer);
    icount_warp_rt();
}

629 630 631 632 633 634 635 636 637 638 639 640
static bool icount_state_needed(void *opaque)
{
    return use_icount;
}

/*
 * This is a subsection for icount migration.
 */
static const VMStateDescription icount_vmstate_timers = {
    .name = "timer/icount",
    .version_id = 1,
    .minimum_version_id = 1,
641
    .needed = icount_state_needed,
642 643 644 645 646 647 648
    .fields = (VMStateField[]) {
        VMSTATE_INT64(qemu_icount_bias, TimersState),
        VMSTATE_INT64(qemu_icount, TimersState),
        VMSTATE_END_OF_LIST()
    }
};

649 650 651 652
static const VMStateDescription vmstate_timers = {
    .name = "timer",
    .version_id = 2,
    .minimum_version_id = 1,
653
    .fields = (VMStateField[]) {
654 655 656 657
        VMSTATE_INT64(cpu_ticks_offset, TimersState),
        VMSTATE_INT64(dummy, TimersState),
        VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
        VMSTATE_END_OF_LIST()
658
    },
659 660 661
    .subsections = (const VMStateDescription*[]) {
        &icount_vmstate_timers,
        NULL
662 663 664
    }
};

665
static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
{
    double pct;
    double throttle_ratio;
    long sleeptime_ns;

    if (!cpu_throttle_get_percentage()) {
        return;
    }

    pct = (double)cpu_throttle_get_percentage()/100;
    throttle_ratio = pct / (1 - pct);
    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);

    qemu_mutex_unlock_iothread();
    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
    qemu_mutex_lock_iothread();
682
    atomic_set(&cpu->throttle_thread_scheduled, 0);
683 684 685 686 687 688 689 690 691 692 693 694 695
}

static void cpu_throttle_timer_tick(void *opaque)
{
    CPUState *cpu;
    double pct;

    /* Stop the timer if needed */
    if (!cpu_throttle_get_percentage()) {
        return;
    }
    CPU_FOREACH(cpu) {
        if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
696 697
            async_run_on_cpu(cpu, cpu_throttle_thread,
                             RUN_ON_CPU_NULL);
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
        }
    }

    pct = (double)cpu_throttle_get_percentage()/100;
    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
                                   CPU_THROTTLE_TIMESLICE_NS / (1-pct));
}

void cpu_throttle_set(int new_throttle_pct)
{
    /* Ensure throttle percentage is within valid range */
    new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
    new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);

    atomic_set(&throttle_percentage, new_throttle_pct);

    timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
                                       CPU_THROTTLE_TIMESLICE_NS);
}

void cpu_throttle_stop(void)
{
    atomic_set(&throttle_percentage, 0);
}

bool cpu_throttle_active(void)
{
    return (cpu_throttle_get_percentage() != 0);
}

int cpu_throttle_get_percentage(void)
{
    return atomic_read(&throttle_percentage);
}

733 734
void cpu_ticks_init(void)
{
735
    seqlock_init(&timers_state.vm_clock_seqlock);
736
    vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
737 738
    throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                           cpu_throttle_timer_tick, NULL);
739 740
}

741
void configure_icount(QemuOpts *opts, Error **errp)
742
{
743
    const char *option;
744
    char *rem_str = NULL;
745 746

    option = qemu_opt_get(opts, "shift");
747
    if (!option) {
748 749 750
        if (qemu_opt_get(opts, "align") != NULL) {
            error_setg(errp, "Please specify shift option when using align");
        }
751 752
        return;
    }
753 754

    icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
755 756
    if (icount_sleep) {
        icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
757
                                         icount_timer_cb, NULL);
758
    }
759

760
    icount_align_option = qemu_opt_get_bool(opts, "align", false);
761 762

    if (icount_align_option && !icount_sleep) {
763
        error_setg(errp, "align=on and sleep=off are incompatible");
764
    }
765
    if (strcmp(option, "auto") != 0) {
766 767 768 769 770
        errno = 0;
        icount_time_shift = strtol(option, &rem_str, 0);
        if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
            error_setg(errp, "icount: Invalid shift value");
        }
771 772
        use_icount = 1;
        return;
773 774
    } else if (icount_align_option) {
        error_setg(errp, "shift=auto and align=on are incompatible");
775
    } else if (!icount_sleep) {
776
        error_setg(errp, "shift=auto and sleep=off are incompatible");
777 778 779 780 781 782 783 784 785 786 787 788 789
    }

    use_icount = 2;

    /* 125MIPS seems a reasonable initial guess at the guest speed.
       It will be corrected fairly quickly anyway.  */
    icount_time_shift = 3;

    /* Have both realtime and virtual time triggers for speed adjustment.
       The realtime trigger catches emulated time passing too slowly,
       the virtual time trigger catches emulated time passing too fast.
       Realtime triggers occur even when idle, so use them less frequently
       than VM triggers.  */
790 791
    icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
                                   icount_adjust_rt, NULL);
792
    timer_mod(icount_rt_timer,
793
                   qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
794 795 796 797
    icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                        icount_adjust_vm, NULL);
    timer_mod(icount_vm_timer,
                   qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
798
                   NANOSECONDS_PER_SECOND / 10);
799 800
}

801 802 803 804 805 806 807 808 809 810 811 812 813
/***********************************************************/
/* TCG vCPU kick timer
 *
 * The kick timer is responsible for moving single threaded vCPU
 * emulation on to the next vCPU. If more than one vCPU is running a
 * timer event with force a cpu->exit so the next vCPU can get
 * scheduled.
 *
 * The timer is removed if all vCPUs are idle and restarted again once
 * idleness is complete.
 */

static QEMUTimer *tcg_kick_vcpu_timer;
814
static CPUState *tcg_current_rr_cpu;
815 816 817 818 819 820 821 822

#define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)

static inline int64_t qemu_tcg_next_kick(void)
{
    return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
}

823 824 825 826 827 828 829 830 831 832 833 834
/* Kick the currently round-robin scheduled vCPU */
static void qemu_cpu_kick_rr_cpu(void)
{
    CPUState *cpu;
    do {
        cpu = atomic_mb_read(&tcg_current_rr_cpu);
        if (cpu) {
            cpu_exit(cpu);
        }
    } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
}

835 836 837 838
static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
{
}

839 840
void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
{
841 842 843 844 845 846 847 848 849 850 851 852 853
    if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
        qemu_notify_event();
        return;
    }

    if (!qemu_in_vcpu_thread() && first_cpu) {
        /* qemu_cpu_kick is not enough to kick a halted CPU out of
         * qemu_tcg_wait_io_event.  async_run_on_cpu, instead,
         * causes cpu_thread_is_idle to return false.  This way,
         * handle_icount_deadline can run.
         */
        async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
    }
854 855
}

856 857 858
static void kick_tcg_thread(void *opaque)
{
    timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
859
    qemu_cpu_kick_rr_cpu();
860 861 862 863
}

static void start_tcg_kick_timer(void)
{
864
    if (!mttcg_enabled && !tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
865 866 867 868 869 870 871 872 873 874 875 876 877 878
        tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                           kick_tcg_thread, NULL);
        timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
    }
}

static void stop_tcg_kick_timer(void)
{
    if (tcg_kick_vcpu_timer) {
        timer_del(tcg_kick_vcpu_timer);
        tcg_kick_vcpu_timer = NULL;
    }
}

879 880 881 882
/***********************************************************/
void hw_error(const char *fmt, ...)
{
    va_list ap;
883
    CPUState *cpu;
884 885 886 887 888

    va_start(ap, fmt);
    fprintf(stderr, "qemu: hardware error: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
889
    CPU_FOREACH(cpu) {
890
        fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
891
        cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU);
892 893 894 895 896 897 898
    }
    va_end(ap);
    abort();
}

void cpu_synchronize_all_states(void)
{
899
    CPUState *cpu;
900

901
    CPU_FOREACH(cpu) {
902
        cpu_synchronize_state(cpu);
903 904 905 906 907
    }
}

void cpu_synchronize_all_post_reset(void)
{
908
    CPUState *cpu;
909

910
    CPU_FOREACH(cpu) {
911
        cpu_synchronize_post_reset(cpu);
912 913 914 915 916
    }
}

void cpu_synchronize_all_post_init(void)
{
917
    CPUState *cpu;
918

919
    CPU_FOREACH(cpu) {
920
        cpu_synchronize_post_init(cpu);
921 922 923
    }
}

924 925 926 927 928 929 930 931 932
void cpu_synchronize_all_pre_loadvm(void)
{
    CPUState *cpu;

    CPU_FOREACH(cpu) {
        cpu_synchronize_pre_loadvm(cpu);
    }
}

933
static int do_vm_stop(RunState state)
934
{
935 936
    int ret = 0;

937
    if (runstate_is_running()) {
938 939
        cpu_disable_ticks();
        pause_all_vcpus();
940
        runstate_set(state);
941
        vm_state_notify(0, state);
Wenchao Xia's avatar
Wenchao Xia committed
942
        qapi_event_send_stop(&error_abort);
943
    }
944

945
    bdrv_drain_all();
946
    replay_disable_events();
947
    ret = bdrv_flush_all();
948

949
    return ret;
950 951
}

952
static bool cpu_can_run(CPUState *cpu)
953
{
954
    if (cpu->stop) {
955
        return false;
956
    }
957
    if (cpu_is_stopped(cpu)) {
958
        return false;
959
    }
960
    return true;
961 962
}

963
static void cpu_handle_guest_debug(CPUState *cpu)
964
{
965
    gdb_set_stop_cpu(cpu);
966
    qemu_system_debug_request();
967
    cpu->stopped = true;
968 969
}

970 971 972 973 974 975 976 977 978 979 980 981
#ifdef CONFIG_LINUX
static void sigbus_reraise(void)
{
    sigset_t set;
    struct sigaction action;

    memset(&action, 0, sizeof(action));
    action.sa_handler = SIG_DFL;
    if (!sigaction(SIGBUS, &action, NULL)) {
        raise(SIGBUS);
        sigemptyset(&set);
        sigaddset(&set, SIGBUS);
982
        pthread_sigmask(SIG_UNBLOCK, &set, NULL);
983 984 985 986 987
    }
    perror("Failed to re-raise SIGBUS!\n");
    abort();
}

988
static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
989
{
990 991 992 993
    if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
        sigbus_reraise();
    }

994 995 996 997 998 999 1000 1001 1002 1003
    if (current_cpu) {
        /* Called asynchronously in VCPU thread.  */
        if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
            sigbus_reraise();
        }
    } else {
        /* Called synchronously (via signalfd) in main thread.  */
        if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
            sigbus_reraise();
        }
1004 1005 1006 1007 1008 1009 1010 1011 1012
    }
}

static void qemu_init_sigbus(void)
{
    struct sigaction action;

    memset(&action, 0, sizeof(action));
    action.sa_flags = SA_SIGINFO;
1013
    action.sa_sigaction = sigbus_handler;
1014 1015 1016 1017 1018 1019 1020 1021
    sigaction(SIGBUS, &action, NULL);

    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
}
#else /* !CONFIG_LINUX */
static void qemu_init_sigbus(void)
{
}
1022
#endif /* !CONFIG_LINUX */
1023

1024
static QemuMutex qemu_global_mutex;
1025 1026 1027 1028 1029 1030 1031 1032

static QemuThread io_thread;

/* cpu creation */
static QemuCond qemu_cpu_cond;
/* system init */
static QemuCond qemu_pause_cond;

1033
void qemu_init_cpu_loop(void)
1034
{
1035
    qemu_init_sigbus();
1036 1037
    qemu_cond_init(&qemu_cpu_cond);
    qemu_cond_init(&qemu_pause_cond);
1038 1039
    qemu_mutex_init(&qemu_global_mutex);

1040
    qemu_thread_get_self(&io_thread);
1041 1042
}

1043
void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
1044
{
1045
    do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
1046 1047
}

Gu Zheng's avatar
Gu Zheng committed
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059
static void qemu_kvm_destroy_vcpu(CPUState *cpu)
{
    if (kvm_destroy_vcpu(cpu) < 0) {
        error_report("kvm_destroy_vcpu failed");
        exit(EXIT_FAILURE);
    }
}

static void qemu_tcg_destroy_vcpu(CPUState *cpu)
{
}

1060
static void qemu_wait_io_event_common(CPUState *cpu)
1061
{
1062
    atomic_mb_set(&cpu->thread_kicked, false);
1063 1064
    if (cpu->stop) {
        cpu->stop = false;
1065
        cpu->stopped = true;
1066
        qemu_cond_broadcast(&qemu_pause_cond);
1067
    }
1068
    process_queued_cpu_work(cpu);
1069 1070 1071 1072 1073 1074 1075 1076 1077
}

static bool qemu_tcg_should_sleep(CPUState *cpu)
{
    if (mttcg_enabled) {
        return cpu_thread_is_idle(cpu);
    } else {
        return all_cpu_threads_idle();
    }
1078 1079
}

1080
static void qemu_tcg_wait_io_event(CPUState *cpu)
1081
{
1082
    while (qemu_tcg_should_sleep(cpu)) {
1083
        stop_tcg_kick_timer();
1084
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1085
    }
1086

1087 1088
    start_tcg_kick_timer();

1089
    qemu_wait_io_event_common(cpu);
1090 1091
}

1092
static void qemu_kvm_wait_io_event(CPUState *cpu)
1093
{
1094
    while (cpu_thread_is_idle(cpu)) {
1095
        qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
1096
    }
1097

1098
    qemu_wait_io_event_common(cpu);
1099 1100
}

1101
static void *qemu_kvm_cpu_thread_fn(void *arg)
1102
{
1103
    CPUState *cpu = arg;
1104
    int r;
1105

1106 1107
    rcu_register_thread();

1108
    qemu_mutex_lock_iothread();
1109
    qemu_thread_get_self(cpu->thread);
1110
    cpu->thread_id = qemu_get_thread_id();
1111
    cpu->can_do_io = 1;
1112
    current_cpu = cpu;
1113

1114
    r = kvm_init_vcpu(cpu);
1115 1116 1117 1118
    if (r < 0) {
        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
        exit(1);
    }
1119

1120
    kvm_init_cpu_signals(cpu);
1121 1122

    /* signal CPU creation */
1123
    cpu->created = true;
1124 1125
    qemu_cond_signal(&qemu_cpu_cond);

Gu Zheng's avatar
Gu Zheng committed
1126
    do {
1127
        if (cpu_can_run(cpu)) {
1128
            r = kvm_cpu_exec(cpu);
1129
            if (r == EXCP_DEBUG) {
1130
                cpu_handle_guest_debug(cpu);
1131
            }
1132
        }
1133
        qemu_kvm_wait_io_event(cpu);
Gu Zheng's avatar
Gu Zheng committed
1134
    } while (!cpu->unplug || cpu_can_run(cpu));
1135

Gu Zheng's avatar
Gu Zheng committed
1136
    qemu_kvm_destroy_vcpu(cpu);
1137 1138
    cpu->created = false;
    qemu_cond_signal(&qemu_cpu_cond);
Gu Zheng's avatar
Gu Zheng committed
1139
    qemu_mutex_unlock_iothread();
1140 1141 1142
    return NULL;
}

1143 1144 1145 1146 1147 1148
static void *qemu_dummy_cpu_thread_fn(void *arg)
{
#ifdef _WIN32
    fprintf(stderr, "qtest is not supported under Windows\n");
    exit(1);
#else
1149
    CPUState *cpu = arg;
1150 1151 1152
    sigset_t waitset;
    int r;

1153 1154
    rcu_register_thread();

1155
    qemu_mutex_lock_iothread();
1156
    qemu_thread_get_self(cpu->thread);
1157
    cpu->thread_id = qemu_get_thread_id();
1158
    cpu->can_do_io = 1;
1159
    current_cpu = cpu;
1160 1161 1162 1163 1164

    sigemptyset(&waitset);
    sigaddset(&waitset, SIG_IPI);

    /* signal CPU creation */
1165
    cpu->created = true;
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178
    qemu_cond_signal(&qemu_cpu_cond);

    while (1) {
        qemu_mutex_unlock_iothread();
        do {
            int sig;
            r = sigwait(&waitset, &sig);
        } while (r == -1 && (errno == EAGAIN || errno == EINTR));
        if (r == -1) {
            perror("sigwait");
            exit(1);
        }
        qemu_mutex_lock_iothread();
1179
        qemu_wait_io_event_common(cpu);
1180 1181 1182 1183 1184 1185
    }

    return NULL;
#endif
}

1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
static int64_t tcg_get_icount_limit(void)
{
    int64_t deadline;

    if (replay_mode != REPLAY_MODE_PLAY) {
        deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);

        /* Maintain prior (possibly buggy) behaviour where if no deadline
         * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
         * INT32_MAX nanoseconds ahead, we still use INT32_MAX
         * nanoseconds.
         */
        if ((deadline < 0) || (deadline > INT32_MAX)) {
            deadline = INT32_MAX;
        }

        return qemu_icount_round(deadline);
    } else {
        return replay_get_instructions();
    }
}

1208 1209
static void handle_icount_deadline(void)
{
1210
    assert(qemu_in_vcpu_thread());
1211 1212 1213 1214 1215
    if (use_icount) {
        int64_t deadline =
            qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);

        if (deadline == 0) {
1216
            /* Wake up other AioContexts.  */
1217
            qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
1218
            qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
1219 1220 1221 1222
        }
    }
}

1223
static void prepare_icount_for_run(CPUState *cpu)
1224 1225
{
    if (use_icount) {
1226
        int insns_left;
1227 1228 1229 1230 1231 1232 1233 1234

        /* These should always be cleared by process_icount_data after
         * each vCPU execution. However u16.high can be raised
         * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
         */
        g_assert(cpu->icount_decr.u16.low == 0);
        g_assert(cpu->icount_extra == 0);

1235 1236 1237 1238
        cpu->icount_budget = tcg_get_icount_limit();
        insns_left = MIN(0xffff, cpu->icount_budget);
        cpu->icount_decr.u16.low = insns_left;
        cpu->icount_extra = cpu->icount_budget - insns_left;
1239
    }
1240 1241 1242 1243
}

static void process_icount_data(CPUState *cpu)
{
1244
    if (use_icount) {
1245
        /* Account for executed instructions */
1246
        cpu_update_icount(cpu);
1247 1248 1249

        /* Reset the counters */
        cpu->icount_decr.u16.low = 0;
1250
        cpu->icount_extra = 0;
1251 1252
        cpu->icount_budget = 0;

1253 1254
        replay_account_executed_instructions();
    }
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275
}


static int tcg_cpu_exec(CPUState *cpu)
{
    int ret;
#ifdef CONFIG_PROFILER
    int64_t ti;
#endif

#ifdef CONFIG_PROFILER
    ti = profile_getclock();
#endif
    qemu_mutex_unlock_iothread();
    cpu_exec_start(cpu);
    ret = cpu_exec(cpu);
    cpu_exec_end(cpu);
    qemu_mutex_lock_iothread();
#ifdef CONFIG_PROFILER
    tcg_time += profile_getclock() - ti;
#endif
1276 1277 1278
    return ret;
}

1279 1280 1281 1282
/* Destroy any remaining vCPUs which have been unplugged and have
 * finished running
 */
static void deal_with_unplugged_cpus(void)
1283
{
1284
    CPUState *cpu;
1285

1286 1287 1288 1289 1290
    CPU_FOREACH(cpu) {
        if (cpu->unplug && !cpu_can_run(cpu)) {
            qemu_tcg_destroy_vcpu(cpu);
            cpu->created = false;
            qemu_cond_signal(&qemu_cpu_cond);
1291 1292 1293 1294
            break;
        }
    }
}
1295

1296 1297 1298 1299 1300 1301 1302 1303 1304
/* Single-threaded TCG
 *
 * In the single-threaded case each vCPU is simulated in turn. If
 * there is more than a single vCPU we create a simple timer to kick
 * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
 * This is done explicitly rather than relying on side-effects
 * elsewhere.
 */

1305
static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
1306
{
1307
    CPUState *cpu = arg;
1308

1309 1310
    rcu_register_thread();

1311
    qemu_mutex_lock_iothread();
1312
    qemu_thread_get_self(cpu->thread);
1313

1314 1315 1316
    CPU_FOREACH(cpu) {
        cpu->thread_id = qemu_get_thread_id();
        cpu->created = true;
1317
        cpu->can_do_io = 1;
1318
    }
1319 1320
    qemu_cond_signal(&qemu_cpu_cond);

1321
    /* wait for initial kick-off after machine start */
1322
    while (first_cpu->stopped) {
1323
        qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
1324 1325

        /* process any pending work */
1326
        CPU_FOREACH(cpu) {
1327
            current_cpu = cpu;
1328
            qemu_wait_io_event_common(cpu);
1329
        }
1330
    }
1331

1332 1333
    start_tcg_kick_timer();

1334 1335
    cpu = first_cpu;

1336 1337 1338
    /* process any pending work */
    cpu->exit_request = 1;

1339
    while (1) {
1340 1341 1342
        /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
        qemu_account_warp_timer();

1343 1344 1345 1346 1347
        /* Run the timers here.  This is much more efficient than
         * waking up the I/O thread and waiting for completion.
         */
        handle_icount_deadline();

1348 1349 1350 1351
        if (!cpu) {
            cpu = first_cpu;
        }

1352 1353
        while (cpu && !cpu->queued_work_first && !cpu->exit_request) {

1354
            atomic_mb_set(&tcg_current_rr_cpu, cpu);
1355
            current_cpu = cpu;
1356 1357 1358 1359 1360 1361

            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);

            if (cpu_can_run(cpu)) {
                int r;
1362 1363 1364

                prepare_icount_for_run(cpu);