exec.c 105 KB
Newer Older
bellard's avatar
bellard committed
1
/*
2
 *  Virtual page mapping
3
 *
bellard's avatar
bellard committed
4 5 6 7 8 9 10 11 12 13 14 15 16
 *  Copyright (c) 2003 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
bellard's avatar
bellard committed
18
 */
Peter Maydell's avatar
Peter Maydell committed
19
#include "qemu/osdep.h"
20
#include "qapi/error.h"
21
#ifndef _WIN32
bellard's avatar
bellard committed
22
#endif
bellard's avatar
bellard committed
23

24
#include "qemu/cutils.h"
bellard's avatar
bellard committed
25
#include "cpu.h"
26
#include "exec/exec-all.h"
bellard's avatar
bellard committed
27
#include "tcg.h"
28
#include "hw/qdev-core.h"
29
#if !defined(CONFIG_USER_ONLY)
30
#include "hw/boards.h"
31
#include "hw/xen/xen.h"
32
#endif
33
#include "sysemu/kvm.h"
34
#include "sysemu/sysemu.h"
35 36
#include "qemu/timer.h"
#include "qemu/config-file.h"
37
#include "qemu/error-report.h"
38 39
#if defined(CONFIG_USER_ONLY)
#include <qemu.h>
40
#else /* !CONFIG_USER_ONLY */
41 42
#include "hw/hw.h"
#include "exec/memory.h"
43
#include "exec/ioport.h"
44 45
#include "sysemu/dma.h"
#include "exec/address-spaces.h"
46
#include "sysemu/xen-mapcache.h"
47
#include "trace.h"
48
#endif
49
#include "exec/cpu-all.h"
Mike Day's avatar
Mike Day committed
50
#include "qemu/rcu_queue.h"
51
#include "qemu/main-loop.h"
52
#include "translate-all.h"
53
#include "sysemu/replay.h"
54

55
#include "exec/memory-internal.h"
56
#include "exec/ram_addr.h"
57
#include "exec/log.h"
58

59 60
#include "migration/vmstate.h"

61
#include "qemu/range.h"
62 63 64
#ifndef _WIN32
#include "qemu/mmap-alloc.h"
#endif
65

66
//#define DEBUG_SUBPAGE
ths's avatar
ths committed
67

68
#if !defined(CONFIG_USER_ONLY)
Mike Day's avatar
Mike Day committed
69 70 71
/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
 * are protected by the ramlist lock.
 */
72
RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
73 74

static MemoryRegion *system_memory;
75
static MemoryRegion *system_io;
76

77 78
AddressSpace address_space_io;
AddressSpace address_space_memory;
79

80
MemoryRegion io_mem_rom, io_mem_notdirty;
81
static MemoryRegion io_mem_unassigned;
82

83 84 85
/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
#define RAM_PREALLOC   (1 << 0)

86 87 88
/* RAM is mmap-ed with MAP_SHARED */
#define RAM_SHARED     (1 << 1)

89 90 91 92 93
/* Only a portion of RAM (used_length) is actually used, and migrated.
 * This used_length size can change across reboots.
 */
#define RAM_RESIZEABLE (1 << 2)

94
#endif
95

96
struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
bellard's avatar
bellard committed
97 98
/* current CPU in the current thread. It is only valid inside
   cpu_exec() */
Paolo Bonzini's avatar
Paolo Bonzini committed
99
__thread CPUState *current_cpu;
pbrook's avatar
pbrook committed
100
/* 0 = Do not count executed instructions.
101
   1 = Precise instruction counting.
pbrook's avatar
pbrook committed
102
   2 = Adaptive rate instruction counting.  */
103
int use_icount;
bellard's avatar
bellard committed
104

105
#if !defined(CONFIG_USER_ONLY)
106

107 108 109
typedef struct PhysPageEntry PhysPageEntry;

struct PhysPageEntry {
110
    /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111
    uint32_t skip : 6;
112
     /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113
    uint32_t ptr : 26;
114 115
};

116 117
#define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)

118
/* Size of the L2 (and L3, etc) page tables.  */
119
#define ADDR_SPACE_BITS 64
120

121
#define P_L2_BITS 9
122 123 124 125 126
#define P_L2_SIZE (1 << P_L2_BITS)

#define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)

typedef PhysPageEntry Node[P_L2_SIZE];
127

128
typedef struct PhysPageMap {
129 130
    struct rcu_head rcu;

131 132 133 134 135 136 137 138
    unsigned sections_nb;
    unsigned sections_nb_alloc;
    unsigned nodes_nb;
    unsigned nodes_nb_alloc;
    Node *nodes;
    MemoryRegionSection *sections;
} PhysPageMap;

139
struct AddressSpaceDispatch {
140 141
    struct rcu_head rcu;

142
    MemoryRegionSection *mru_section;
143 144 145 146
    /* This is a multi-level map on the physical address space.
     * The bottom level has pointers to MemoryRegionSections.
     */
    PhysPageEntry phys_map;
147
    PhysPageMap map;
148
    AddressSpace *as;
149 150
};

151 152 153
#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
typedef struct subpage_t {
    MemoryRegion iomem;
154
    AddressSpace *as;
155 156 157 158
    hwaddr base;
    uint16_t sub_section[TARGET_PAGE_SIZE];
} subpage_t;

159 160 161 162
#define PHYS_SECTION_UNASSIGNED 0
#define PHYS_SECTION_NOTDIRTY 1
#define PHYS_SECTION_ROM 2
#define PHYS_SECTION_WATCH 3
163

164
static void io_mem_init(void);
165
static void memory_map_init(void);
166
static void tcg_commit(MemoryListener *listener);
167

168
static MemoryRegion io_mem_watch;
169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

/**
 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
 * @cpu: the CPU whose AddressSpace this is
 * @as: the AddressSpace itself
 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
 * @tcg_as_listener: listener for tracking changes to the AddressSpace
 */
struct CPUAddressSpace {
    CPUState *cpu;
    AddressSpace *as;
    struct AddressSpaceDispatch *memory_dispatch;
    MemoryListener tcg_as_listener;
};

184
#endif
bellard's avatar
bellard committed
185

186
#if !defined(CONFIG_USER_ONLY)
187

188
static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
189
{
190 191 192 193
    if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
        map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
        map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
194
    }
195 196
}

197
static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
198 199
{
    unsigned i;
200
    uint32_t ret;
201 202
    PhysPageEntry e;
    PhysPageEntry *p;
203

204
    ret = map->nodes_nb++;
205
    p = map->nodes[ret];
206
    assert(ret != PHYS_MAP_NODE_NIL);
207
    assert(ret != map->nodes_nb_alloc);
208 209 210

    e.skip = leaf ? 0 : 1;
    e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
211
    for (i = 0; i < P_L2_SIZE; ++i) {
212
        memcpy(&p[i], &e, sizeof(e));
213
    }
214
    return ret;
215 216
}

217 218
static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
                                hwaddr *index, hwaddr *nb, uint16_t leaf,
219
                                int level)
220 221
{
    PhysPageEntry *p;
222
    hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
223

224
    if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
225
        lp->ptr = phys_map_node_alloc(map, level == 0);
226
    }
227
    p = map->nodes[lp->ptr];
228
    lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
229

230
    while (*nb && lp < &p[P_L2_SIZE]) {
231
        if ((*index & (step - 1)) == 0 && *nb >= step) {
232
            lp->skip = 0;
233
            lp->ptr = leaf;
234 235
            *index += step;
            *nb -= step;
236
        } else {
237
            phys_page_set_level(map, lp, index, nb, leaf, level - 1);
238 239
        }
        ++lp;
240 241 242
    }
}

243
static void phys_page_set(AddressSpaceDispatch *d,
244
                          hwaddr index, hwaddr nb,
245
                          uint16_t leaf)
246
{
247
    /* Wildly overreserve - it doesn't matter much. */
248
    phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
249

250
    phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
251 252
}

253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
/* Compact a non leaf page entry. Simply detect that the entry has a single child,
 * and update our entry so we can skip it and go directly to the destination.
 */
static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
{
    unsigned valid_ptr = P_L2_SIZE;
    int valid = 0;
    PhysPageEntry *p;
    int i;

    if (lp->ptr == PHYS_MAP_NODE_NIL) {
        return;
    }

    p = nodes[lp->ptr];
    for (i = 0; i < P_L2_SIZE; i++) {
        if (p[i].ptr == PHYS_MAP_NODE_NIL) {
            continue;
        }

        valid_ptr = i;
        valid++;
        if (p[i].skip) {
            phys_page_compact(&p[i], nodes, compacted);
        }
    }

    /* We can only compress if there's only one child. */
    if (valid != 1) {
        return;
    }

    assert(valid_ptr < P_L2_SIZE);

    /* Don't compress if it won't fit in the # of bits we have. */
    if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
        return;
    }

    lp->ptr = p[valid_ptr].ptr;
    if (!p[valid_ptr].skip) {
        /* If our only child is a leaf, make this a leaf. */
        /* By design, we should have made this node a leaf to begin with so we
         * should never reach here.
         * But since it's so simple to handle this, let's do it just in case we
         * change this rule.
         */
        lp->skip = 0;
    } else {
        lp->skip += p[valid_ptr].skip;
    }
}

static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
{
    DECLARE_BITMAP(compacted, nodes_nb);

    if (d->phys_map.skip) {
311
        phys_page_compact(&d->phys_map, d->map.nodes, compacted);
312 313 314
    }
}

315 316 317 318 319 320 321 322 323 324 325
static inline bool section_covers_addr(const MemoryRegionSection *section,
                                       hwaddr addr)
{
    /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
     * the section must cover the entire address space.
     */
    return section->size.hi ||
           range_covers_byte(section->offset_within_address_space,
                             section->size.lo, addr);
}

326
static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
327
                                           Node *nodes, MemoryRegionSection *sections)
328
{
329
    PhysPageEntry *p;
330
    hwaddr index = addr >> TARGET_PAGE_BITS;
331
    int i;
332

333
    for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
334
        if (lp.ptr == PHYS_MAP_NODE_NIL) {
335
            return &sections[PHYS_SECTION_UNASSIGNED];
336
        }
337
        p = nodes[lp.ptr];
338
        lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
339
    }
340

341
    if (section_covers_addr(&sections[lp.ptr], addr)) {
342 343 344 345
        return &sections[lp.ptr];
    } else {
        return &sections[PHYS_SECTION_UNASSIGNED];
    }
346 347
}

Blue Swirl's avatar
Blue Swirl committed
348 349
bool memory_region_is_unassigned(MemoryRegion *mr)
{
350
    return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
351
        && mr != &io_mem_watch;
bellard's avatar
bellard committed
352
}
353

354
/* Called from RCU critical section */
355
static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
356 357
                                                        hwaddr addr,
                                                        bool resolve_subpage)
358
{
359
    MemoryRegionSection *section = atomic_read(&d->mru_section);
360
    subpage_t *subpage;
361
    bool update;
362

363 364 365 366 367 368 369 370
    if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
        section_covers_addr(section, addr)) {
        update = false;
    } else {
        section = phys_page_find(d->phys_map, addr, d->map.nodes,
                                 d->map.sections);
        update = true;
    }
371 372
    if (resolve_subpage && section->mr->subpage) {
        subpage = container_of(section->mr, subpage_t, iomem);
373
        section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
374
    }
375 376 377
    if (update) {
        atomic_set(&d->mru_section, section);
    }
378
    return section;
379 380
}

381
/* Called from RCU critical section */
382
static MemoryRegionSection *
383
address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
384
                                 hwaddr *plen, bool resolve_subpage)
385 386
{
    MemoryRegionSection *section;
387
    MemoryRegion *mr;
388
    Int128 diff;
389

390
    section = address_space_lookup_region(d, addr, resolve_subpage);
391 392 393 394 395 396
    /* Compute offset within MemoryRegionSection */
    addr -= section->offset_within_address_space;

    /* Compute offset within MemoryRegion */
    *xlat = addr + section->offset_within_region;

397
    mr = section->mr;
398 399 400 401 402 403 404 405 406 407 408 409

    /* MMIO registers can be expected to perform full-width accesses based only
     * on their address, without considering adjacent registers that could
     * decode to completely different MemoryRegions.  When such registers
     * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
     * regions overlap wildly.  For this reason we cannot clamp the accesses
     * here.
     *
     * If the length is small (as is the case for address_space_ldl/stl),
     * everything works fine.  If the incoming length is large, however,
     * the caller really has to do the clamping through memory_access_size.
     */
410
    if (memory_region_is_ram(mr)) {
411
        diff = int128_sub(section->size, int128_make64(addr));
412 413
        *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
    }
414 415
    return section;
}
416

417
/* Called from RCU critical section */
418 419 420
MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                      hwaddr *xlat, hwaddr *plen,
                                      bool is_write)
421
{
Avi Kivity's avatar
Avi Kivity committed
422 423 424 425 426
    IOMMUTLBEntry iotlb;
    MemoryRegionSection *section;
    MemoryRegion *mr;

    for (;;) {
427 428
        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
        section = address_space_translate_internal(d, addr, &addr, plen, true);
Avi Kivity's avatar
Avi Kivity committed
429 430 431 432 433 434
        mr = section->mr;

        if (!mr->iommu_ops) {
            break;
        }

435
        iotlb = mr->iommu_ops->translate(mr, addr, is_write);
Avi Kivity's avatar
Avi Kivity committed
436 437
        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
                | (addr & iotlb.addr_mask));
438
        *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
Avi Kivity's avatar
Avi Kivity committed
439 440 441 442 443 444 445 446
        if (!(iotlb.perm & (1 << is_write))) {
            mr = &io_mem_unassigned;
            break;
        }

        as = iotlb.target_as;
    }

447
    if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
448
        hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
449
        *plen = MIN(page, *plen);
450 451
    }

Avi Kivity's avatar
Avi Kivity committed
452 453
    *xlat = addr;
    return mr;
454 455
}

456
/* Called from RCU critical section */
457
MemoryRegionSection *
458
address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
459
                                  hwaddr *xlat, hwaddr *plen)
460
{
Avi Kivity's avatar
Avi Kivity committed
461
    MemoryRegionSection *section;
462 463 464
    AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;

    section = address_space_translate_internal(d, addr, xlat, plen, false);
Avi Kivity's avatar
Avi Kivity committed
465 466 467

    assert(!section->mr->iommu_ops);
    return section;
468
}
469
#endif
bellard's avatar
bellard committed
470

471
#if !defined(CONFIG_USER_ONLY)
472 473

static int cpu_common_post_load(void *opaque, int version_id)
bellard's avatar
bellard committed
474
{
475
    CPUState *cpu = opaque;
bellard's avatar
bellard committed
476

477 478
    /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
       version_id is increased. */
479
    cpu->interrupt_request &= ~0x01;
480
    tlb_flush(cpu, 1);
481 482

    return 0;
bellard's avatar
bellard committed
483
}
bellard's avatar
bellard committed
484

485 486 487 488
static int cpu_common_pre_load(void *opaque)
{
    CPUState *cpu = opaque;

489
    cpu->exception_index = -1;
490 491 492 493 494 495 496 497

    return 0;
}

static bool cpu_common_exception_index_needed(void *opaque)
{
    CPUState *cpu = opaque;

498
    return tcg_enabled() && cpu->exception_index != -1;
499 500 501 502 503 504
}

static const VMStateDescription vmstate_cpu_common_exception_index = {
    .name = "cpu_common/exception_index",
    .version_id = 1,
    .minimum_version_id = 1,
505
    .needed = cpu_common_exception_index_needed,
506 507 508 509 510 511
    .fields = (VMStateField[]) {
        VMSTATE_INT32(exception_index, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
static bool cpu_common_crash_occurred_needed(void *opaque)
{
    CPUState *cpu = opaque;

    return cpu->crash_occurred;
}

static const VMStateDescription vmstate_cpu_common_crash_occurred = {
    .name = "cpu_common/crash_occurred",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = cpu_common_crash_occurred_needed,
    .fields = (VMStateField[]) {
        VMSTATE_BOOL(crash_occurred, CPUState),
        VMSTATE_END_OF_LIST()
    }
};

530
const VMStateDescription vmstate_cpu_common = {
531 532 533
    .name = "cpu_common",
    .version_id = 1,
    .minimum_version_id = 1,
534
    .pre_load = cpu_common_pre_load,
535
    .post_load = cpu_common_post_load,
536
    .fields = (VMStateField[]) {
537 538
        VMSTATE_UINT32(halted, CPUState),
        VMSTATE_UINT32(interrupt_request, CPUState),
539
        VMSTATE_END_OF_LIST()
540
    },
541 542
    .subsections = (const VMStateDescription*[]) {
        &vmstate_cpu_common_exception_index,
543
        &vmstate_cpu_common_crash_occurred,
544
        NULL
545 546
    }
};
547

548
#endif
bellard's avatar
bellard committed
549

550
CPUState *qemu_get_cpu(int index)
bellard's avatar
bellard committed
551
{
552
    CPUState *cpu;
bellard's avatar
bellard committed
553

554
    CPU_FOREACH(cpu) {
555
        if (cpu->cpu_index == index) {
556
            return cpu;
557
        }
bellard's avatar
bellard committed
558
    }
559

560
    return NULL;
bellard's avatar
bellard committed
561 562
}

563
#if !defined(CONFIG_USER_ONLY)
564
void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
565
{
566 567 568 569 570
    CPUAddressSpace *newas;

    /* Target code should have set num_ases before calling us */
    assert(asidx < cpu->num_ases);

571 572 573 574 575
    if (asidx == 0) {
        /* address space 0 gets the convenience alias */
        cpu->as = as;
    }

576 577
    /* KVM cannot currently support multiple address spaces. */
    assert(asidx == 0 || !kvm_enabled());
578

579 580
    if (!cpu->cpu_ases) {
        cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
581
    }
582

583 584 585
    newas = &cpu->cpu_ases[asidx];
    newas->cpu = cpu;
    newas->as = as;
586
    if (tcg_enabled()) {
587 588
        newas->tcg_as_listener.commit = tcg_commit;
        memory_listener_register(&newas->tcg_as_listener, as);
589
    }
590
}
591 592 593 594 595 596

AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
{
    /* Return the AddressSpace corresponding to the specified index */
    return cpu->cpu_ases[asidx].as;
}
597 598
#endif

599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
#ifndef CONFIG_USER_ONLY
static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);

static int cpu_get_free_index(Error **errp)
{
    int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);

    if (cpu >= MAX_CPUMASK_BITS) {
        error_setg(errp, "Trying to use more CPUs than max of %d",
                   MAX_CPUMASK_BITS);
        return -1;
    }

    bitmap_set(cpu_index_map, cpu, 1);
    return cpu;
}

616
static void cpu_release_index(CPUState *cpu)
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
{
    bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
}
#else

static int cpu_get_free_index(Error **errp)
{
    CPUState *some_cpu;
    int cpu_index = 0;

    CPU_FOREACH(some_cpu) {
        cpu_index++;
    }
    return cpu_index;
}

633
static void cpu_release_index(CPUState *cpu)
634
{
635
    return;
636 637 638
}
#endif

639 640
void cpu_exec_exit(CPUState *cpu)
{
641 642
    CPUClass *cc = CPU_GET_CLASS(cpu);

643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
    if (cpu->cpu_index == -1) {
        /* cpu_index was never allocated by this @cpu or was already freed. */
#if defined(CONFIG_USER_ONLY)
        cpu_list_unlock();
#endif
        return;
    }

    QTAILQ_REMOVE(&cpus, cpu, node);
    cpu_release_index(cpu);
    cpu->cpu_index = -1;
#if defined(CONFIG_USER_ONLY)
    cpu_list_unlock();
#endif
660 661 662 663 664 665 666

    if (cc->vmsd != NULL) {
        vmstate_unregister(NULL, cc->vmsd, cpu);
    }
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
        vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
    }
667 668
}

669
void cpu_exec_init(CPUState *cpu, Error **errp)
bellard's avatar
bellard committed
670
{
671
    CPUClass *cc = CPU_GET_CLASS(cpu);
672
    Error *local_err = NULL;
673

674
    cpu->as = NULL;
675
    cpu->num_ases = 0;
676

677 678
#ifndef CONFIG_USER_ONLY
    cpu->thread_id = qemu_get_thread_id();
679 680 681 682 683 684 685 686 687 688 689 690 691 692

    /* This is a softmmu CPU object, so create a property for it
     * so users can wire up its memory. (This can't go in qom/cpu.c
     * because that file is compiled only once for both user-mode
     * and system builds.) The default if no link is set up is to use
     * the system address space.
     */
    object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
                             (Object **)&cpu->memory,
                             qdev_prop_allow_set_link_before_realize,
                             OBJ_PROP_LINK_UNREF_ON_RELEASE,
                             &error_abort);
    cpu->memory = system_memory;
    object_ref(OBJECT(cpu->memory));
693 694
#endif

695 696 697
#if defined(CONFIG_USER_ONLY)
    cpu_list_lock();
#endif
698
    cpu->cpu_index = cpu_get_free_index(&local_err);
699 700 701 702 703 704
    if (local_err) {
        error_propagate(errp, local_err);
#if defined(CONFIG_USER_ONLY)
        cpu_list_unlock();
#endif
        return;
705
    }
706
    QTAILQ_INSERT_TAIL(&cpus, cpu, node);
707
#if defined(CONFIG_USER_ONLY)
708
    (void) cc;
709
    cpu_list_unlock();
710
#else
711
    if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
712
        vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
713
    }
714
    if (cc->vmsd != NULL) {
715
        vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
716
    }
717
#endif
bellard's avatar
bellard committed
718 719
}

720
#if defined(CONFIG_USER_ONLY)
721
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
722 723 724 725
{
    tb_invalidate_phys_page_range(pc, pc + 1, 0);
}
#else
726
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
727
{
728 729 730
    MemTxAttrs attrs;
    hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
    int asidx = cpu_asidx_from_attrs(cpu, attrs);
731
    if (phys != -1) {
732
        tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
733
                                phys | (pc & ~TARGET_PAGE_MASK));
734
    }
735
}
bellard's avatar
bellard committed
736
#endif
737

738
#if defined(CONFIG_USER_ONLY)
739
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
740 741 742 743

{
}

744 745 746 747 748 749 750 751 752 753
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
                          int flags)
{
    return -ENOSYS;
}

void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
}

754
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
755 756 757 758 759
                          int flags, CPUWatchpoint **watchpoint)
{
    return -ENOSYS;
}
#else
760
/* Add a watchpoint.  */
761
int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
762
                          int flags, CPUWatchpoint **watchpoint)
763
{
764
    CPUWatchpoint *wp;
765

766
    /* forbid ranges which are empty or run off the end of the address space */
767
    if (len == 0 || (addr + len - 1) < addr) {
768 769
        error_report("tried to set invalid watchpoint at %"
                     VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
770 771
        return -EINVAL;
    }
772
    wp = g_malloc(sizeof(*wp));
773 774

    wp->vaddr = addr;
775
    wp->len = len;
776 777
    wp->flags = flags;

778
    /* keep all GDB-injected watchpoints in front */
779 780 781 782 783
    if (flags & BP_GDB) {
        QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
    } else {
        QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
    }
784

785
    tlb_flush_page(cpu, addr);
786 787 788 789

    if (watchpoint)
        *watchpoint = wp;
    return 0;
790 791
}

792
/* Remove a specific watchpoint.  */
793
int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
794
                          int flags)
795
{
796
    CPUWatchpoint *wp;
797

798
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
799
        if (addr == wp->vaddr && len == wp->len
800
                && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
801
            cpu_watchpoint_remove_by_ref(cpu, wp);
802 803 804
            return 0;
        }
    }
805
    return -ENOENT;
806 807
}

808
/* Remove a specific watchpoint by reference.  */
809
void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
810
{
811
    QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
812

813
    tlb_flush_page(cpu, watchpoint->vaddr);
814

815
    g_free(watchpoint);
816 817 818
}

/* Remove all matching watchpoints.  */
819
void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
820
{
821
    CPUWatchpoint *wp, *next;
822

823
    QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
824 825 826
        if (wp->flags & mask) {
            cpu_watchpoint_remove_by_ref(cpu, wp);
        }
827
    }
828
}
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849

/* Return true if this watchpoint address matches the specified
 * access (ie the address range covered by the watchpoint overlaps
 * partially or completely with the address range covered by the
 * access).
 */
static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
                                                  vaddr addr,
                                                  vaddr len)
{
    /* We know the lengths are non-zero, but a little caution is
     * required to avoid errors in the case where the range ends
     * exactly at the top of the address space and so addr + len
     * wraps round to zero.
     */
    vaddr wpend = wp->vaddr + wp->len - 1;
    vaddr addrend = addr + len - 1;

    return !(addr > wpend || wp->vaddr > addrend);
}

850
#endif
851

852
/* Add a breakpoint.  */
853
int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
854
                          CPUBreakpoint **breakpoint)
bellard's avatar
bellard committed
855
{
856
    CPUBreakpoint *bp;
857

858
    bp = g_malloc(sizeof(*bp));
bellard's avatar
bellard committed
859

860 861 862
    bp->pc = pc;
    bp->flags = flags;

863
    /* keep all GDB-injected breakpoints in front */
864
    if (flags & BP_GDB) {
865
        QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
866
    } else {
867
        QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
868
    }
869

870
    breakpoint_invalidate(cpu, pc);
871

872
    if (breakpoint) {
873
        *breakpoint = bp;
874
    }
bellard's avatar
bellard committed
875 876 877
    return 0;
}

878
/* Remove a specific breakpoint.  */
879
int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
880 881 882
{
    CPUBreakpoint *bp;

883
    QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
884
        if (bp->pc == pc && bp->flags == flags) {
885
            cpu_breakpoint_remove_by_ref(cpu, bp);
886 887
            return 0;
        }
888
    }
889
    return -ENOENT;
890 891
}

892
/* Remove a specific breakpoint by reference.  */
893
void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
bellard's avatar
bellard committed
894
{
895 896 897
    QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);

    breakpoint_invalidate(cpu, breakpoint->pc);
898

899
    g_free(breakpoint);
900 901 902
}

/* Remove all matching breakpoints. */
903
void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
904
{
905
    CPUBreakpoint *bp, *next;
906

907
    QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
908 909 910
        if (bp->flags & mask) {
            cpu_breakpoint_remove_by_ref(cpu, bp);
        }
911
    }
bellard's avatar
bellard committed
912 913
}

914 915
/* enable or disable single step mode. EXCP_DEBUG is returned by the
   CPU loop after each instruction */
916
void cpu_single_step(CPUState *cpu, int enabled)
917
{
918 919 920
    if (cpu->singlestep_enabled != enabled) {
        cpu->singlestep_enabled = enabled;
        if (kvm_enabled()) {
921
            kvm_update_guest_debug(cpu, 0);
922
        } else {
923
            /* must flush all the translated code to avoid inconsistencies */
924
            /* XXX: only flush what is necessary */
925
            tb_flush(cpu);
926
        }
927 928 929
    }
}

930
void cpu_abort(CPUState *cpu, const char *fmt, ...)
bellard's avatar
bellard committed
931 932
{
    va_list ap;
pbrook's avatar
pbrook committed
933
    va_list ap2;
bellard's avatar
bellard committed
934 935

    va_start(ap, fmt);
pbrook's avatar
pbrook committed
936
    va_copy(ap2, ap);
bellard's avatar
bellard committed
937 938 939
    fprintf(stderr, "qemu: fatal: ");
    vfprintf(stderr, fmt, ap);
    fprintf(stderr, "\n");
940
    cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
941
    if (qemu_log_separate()) {
942 943 944
        qemu_log("qemu: fatal: ");
        qemu_log_vprintf(fmt, ap2);
        qemu_log("\n");
945
        log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
946
        qemu_log_flush();
947
        qemu_log_close();
948
    }
pbrook's avatar
pbrook committed
949
    va_end(ap2);
950
    va_end(ap);
951
    replay_finish();
952 953 954 955 956 957 958 959
#if defined(CONFIG_USER_ONLY)
    {
        struct sigaction act;
        sigfillset(&act.sa_mask);
        act.sa_handler = SIG_DFL;
        sigaction(SIGABRT, &act, NULL);
    }
#endif
bellard's avatar
bellard committed
960 961 962
    abort();
}

963
#if !defined(CONFIG_USER_ONLY)
Mike Day's avatar
Mike Day committed
964
/* Called from RCU critical section */
965 966 967 968
static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
{
    RAMBlock *block;

969
    block = atomic_rcu_read(&ram_list.mru_block);
970
    if (block && addr - block->offset < block->max_length) {
971
        return block;
972
    }
Mike Day's avatar
Mike Day committed
973
    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
974
        if (addr - block->offset < block->max_length) {
975 976 977 978 979 980 981 982
            goto found;
        }
    }

    fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
    abort();

found:
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998
    /* It is safe to write mru_block outside the iothread lock.  This
     * is what happens:
     *
     *     mru_block = xxx
     *     rcu_read_unlock()
     *                                        xxx removed from list
     *                  rcu_read_lock()
     *                  read mru_block
     *                                        mru_block = NULL;
     *                                        call_rcu(reclaim_ramblock, xxx);
     *                  rcu_read_unlock()
     *
     * atomic_rcu_set is not needed here.  The block was already published
     * when it was placed into the list.  Here we're just making an extra
     * copy of the pointer.
     */
999 1000 1001 1002
    ram_list.mru_block = block;
    return block;
}

1003
static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
1004
{
1005
    CPUState *cpu;
1006
    ram_addr_t start1;
1007 1008 1009 1010 1011
    RAMBlock *block;
    ram_addr_t end;

    end = TARGET_PAGE_ALIGN(start + length);
    start &= TARGET_PAGE_MASK;
1012

Mike Day's avatar
Mike Day committed
1013
    rcu_read_lock();
1014 1015
    block = qemu_get_ram_block(start);
    assert(block == qemu_get_ram_block(end - 1));
1016
    start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
1017 1018 1019
    CPU_FOREACH(cpu) {
        tlb_reset_dirty(cpu, start1, length);
    }
Mike Day's avatar
Mike Day committed
1020
    rcu_read_unlock();
1021 1022
}

pbrook's avatar
pbrook committed
1023
/* Note: start and end must be within the same ram block.  */
1024 1025 1026
bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
                                              ram_addr_t length,
                                              unsigned client)
1027
{
1028
    DirtyMemoryBlocks *blocks;
1029
    unsigned long end, page;
1030
    bool dirty = false;
1031 1032 1033 1034

    if (length == 0) {
        return false;
    }
bellard's avatar
bellard committed
1035

1036 1037
    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
    page = start >> TARGET_PAGE_BITS;
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053

    rcu_read_lock();

    blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);

    while (page < end) {
        unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
        unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);

        dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
                                              offset, num);
        page += num;
    }

    rcu_read_unlock();
1054 1055

    if (dirty && tcg_enabled()) {
1056
        tlb_reset_dirty_range_all(start, length);
pbrook's avatar
pbrook committed
1057
    }
1058 1059

    return dirty;
1060 1061
}

1062
/* Called from RCU critical section */
1063
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
1064 1065 1066 1067 1068
                                       MemoryRegionSection *section,
                                       target_ulong vaddr,
                                       hwaddr paddr, hwaddr xlat,
                                       int prot,
                                       target_ulong *address)
Blue Swirl's avatar
Blue Swirl committed
1069
{
1070
    hwaddr iotlb;
Blue Swirl's avatar
Blue Swirl committed
1071 1072
    CPUWatchpoint *wp;

1073
    if (memory_region_is_ram(section->mr)) {
Blue Swirl's avatar
Blue Swirl committed
1074
        /* Normal RAM.  */
1075
        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
Blue Swirl's avatar
Blue Swirl committed
1076
        if (!section->readonly) {
1077
            iotlb |= PHYS_SECTION_NOTDIRTY;
Blue Swirl's avatar
Blue Swirl committed
1078
        } else {
1079
            iotlb |= PHYS_SECTION_ROM;
Blue Swirl's avatar
Blue Swirl committed
1080 1081
        }
    } else {
1082 1083 1084 1085
        AddressSpaceDispatch *d;

        d = atomic_rcu_read(&section->address_space->dispatch);
        iotlb = section - d->map.sections;
1086
        iotlb += xlat;
Blue Swirl's avatar
Blue Swirl committed
1087 1088 1089 1090
    }

    /* Make accesses to pages with watchpoints go via the
       watchpoint trap routines.  */
1091
    QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1092
        if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
Blue Swirl's avatar
Blue Swirl committed
1093 1094
            /* Avoid trapping reads of pages with a write breakpoint. */
            if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1095
                iotlb = PHYS_SECTION_WATCH + paddr;
Blue Swirl's avatar
Blue Swirl committed
1096 1097 1098 1099 1100 1101 1102 1103
                *address |= TLB_MMIO;
                break;
            }
        }
    }

    return iotlb;
}
1104 1105
#endif /* defined(CONFIG_USER_ONLY) */

1106
#if !defined(CONFIG_USER_ONLY)
1107

1108
static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1109
                             uint16_t section);
1110
static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1111

1112 1113
static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
                               qemu_anon_ram_alloc;
1114 1115 1116 1117 1118 1119

/*
 * Set a custom physical guest memory alloator.
 * Accelerators with unusual needs may need this.  Hopefully, we can
 * get rid of it eventually.
 */
1120
void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1121 1122 1123 1124
{
    phys_mem_alloc = alloc;
}

1125 1126
static uint16_t phys_section_add(PhysPageMap *map,
                                 MemoryRegionSection *section)