diff options
author | Geoff Levand <geoff@infradead.org> | 2023-06-26 08:17:16 -0500 |
---|---|---|
committer | Geoff Levand <geoff@infradead.org> | 2023-06-26 08:17:16 -0500 |
commit | 98ec4e7cee0f26a1af070ffc697d82c0f2714848 (patch) | |
tree | ae7176bc5d6ad1cc65476e71e98142b91450956d /virt | |
parent | 917ca4e843ee2239821d891825ec4f121f97caa4 (diff) | |
parent | 84506709c1c3fcb2422eeee9e99fc93dffbaf289 (diff) | |
download | ps3-linux-master.tar.gz |
Signed-off-by: Geoff Levand <geoff@infradead.org>
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/kvm_main.c | 112 |
1 files changed, 73 insertions, 39 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b1679d08a21609..65f94f592ff883 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -62,11 +62,14 @@ #include "kvm_mm.h" #include "vfio.h" +#include <trace/events/ipi.h> + #define CREATE_TRACE_POINTS #include <trace/events/kvm.h> #include <linux/kvm_dirty_ring.h> + /* Worst case buffer size needed for holding an integer. */ #define ITOA_MAX_LEN 12 @@ -683,6 +686,24 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn return __kvm_handle_hva_range(kvm, &range); } + +static bool kvm_change_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) +{ + /* + * Skipping invalid memslots is correct if and only change_pte() is + * surrounded by invalidate_range_{start,end}(), which is currently + * guaranteed by the primary MMU. If that ever changes, KVM needs to + * unmap the memslot instead of skipping the memslot to ensure that KVM + * doesn't hold references to the old PFN. + */ + WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); + + if (range->slot->flags & KVM_MEMSLOT_INVALID) + return false; + + return kvm_set_spte_gfn(kvm, range); +} + static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address, @@ -704,7 +725,7 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, if (!READ_ONCE(kvm->mmu_invalidate_in_progress)) return; - kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn); + kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn); } void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, @@ -1298,7 +1319,7 @@ static void kvm_destroy_vm(struct kvm *kvm) * At this point, pending calls to invalidate_range_start() * have completed but no more MMU notifiers will run, so * mn_active_invalidate_count may remain unbalanced. - * No threads can be waiting in install_new_memslots as the + * No threads can be waiting in kvm_swap_active_memslots() as the * last reference on KVM has been dropped, but freeing * memslots would deadlock without this manual intervention. */ @@ -1742,13 +1763,13 @@ static void kvm_invalidate_memslot(struct kvm *kvm, kvm_arch_flush_shadow_memslot(kvm, old); kvm_arch_guest_memory_reclaimed(kvm); - /* Was released by kvm_swap_active_memslots, reacquire. */ + /* Was released by kvm_swap_active_memslots(), reacquire. */ mutex_lock(&kvm->slots_arch_lock); /* * Copy the arch-specific field of the newly-installed slot back to the * old slot as the arch data could have changed between releasing - * slots_arch_lock in install_new_memslots() and re-acquiring the lock + * slots_arch_lock in kvm_swap_active_memslots() and re-acquiring the lock * above. Writers are required to retrieve memslots *after* acquiring * slots_arch_lock, thus the active slot's data is guaranteed to be fresh. */ @@ -1810,11 +1831,11 @@ static int kvm_set_memslot(struct kvm *kvm, int r; /* - * Released in kvm_swap_active_memslots. + * Released in kvm_swap_active_memslots(). * - * Must be held from before the current memslots are copied until - * after the new memslots are installed with rcu_assign_pointer, - * then released before the synchronize srcu in kvm_swap_active_memslots. + * Must be held from before the current memslots are copied until after + * the new memslots are installed with rcu_assign_pointer, then + * released before the synchronize srcu in kvm_swap_active_memslots(). * * When modifying memslots outside of the slots_lock, must be held * before reading the pointer to the current memslots until after all @@ -3866,7 +3887,7 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS static int vcpu_get_pid(void *data, u64 *val) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data; + struct kvm_vcpu *vcpu = data; *val = pid_nr(rcu_access_pointer(vcpu->pid)); return 0; } @@ -3959,18 +3980,19 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) } vcpu->vcpu_idx = atomic_read(&kvm->online_vcpus); - r = xa_insert(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, GFP_KERNEL_ACCOUNT); - BUG_ON(r == -EBUSY); + r = xa_reserve(&kvm->vcpu_array, vcpu->vcpu_idx, GFP_KERNEL_ACCOUNT); if (r) goto unlock_vcpu_destroy; /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); - if (r < 0) { - xa_erase(&kvm->vcpu_array, vcpu->vcpu_idx); - kvm_put_kvm_no_destroy(kvm); - goto unlock_vcpu_destroy; + if (r < 0) + goto kvm_put_xa_release; + + if (KVM_BUG_ON(!!xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { + r = -EINVAL; + goto kvm_put_xa_release; } /* @@ -3985,6 +4007,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) kvm_create_vcpu_debugfs(vcpu); return r; +kvm_put_xa_release: + kvm_put_kvm_no_destroy(kvm); + xa_release(&kvm->vcpu_array, vcpu->vcpu_idx); unlock_vcpu_destroy: mutex_unlock(&kvm->lock); kvm_dirty_ring_free(&vcpu->dirty_ring); @@ -4467,7 +4492,7 @@ static int kvm_ioctl_create_device(struct kvm *kvm, return 0; } -static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) +static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) { switch (arg) { case KVM_CAP_USER_MEMORY: @@ -5044,7 +5069,7 @@ put_fd: static long kvm_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - long r = -EINVAL; + int r = -EINVAL; switch (ioctl) { case KVM_GET_API_VERSION: @@ -5181,7 +5206,20 @@ static void hardware_disable_all(void) static int hardware_enable_all(void) { atomic_t failed = ATOMIC_INIT(0); - int r = 0; + int r; + + /* + * Do not enable hardware virtualization if the system is going down. + * If userspace initiated a forced reboot, e.g. reboot -f, then it's + * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling + * after kvm_reboot() is called. Note, this relies on system_state + * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops + * hook instead of registering a dedicated reboot notifier (the latter + * runs before system_state is updated). + */ + if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF || + system_state == SYSTEM_RESTART) + return -EBUSY; /* * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu() @@ -5194,6 +5232,8 @@ static int hardware_enable_all(void) cpus_read_lock(); mutex_lock(&kvm_lock); + r = 0; + kvm_usage_count++; if (kvm_usage_count == 1) { on_each_cpu(hardware_enable_nolock, &failed, 1); @@ -5210,26 +5250,24 @@ static int hardware_enable_all(void) return r; } -static int kvm_reboot(struct notifier_block *notifier, unsigned long val, - void *v) +static void kvm_shutdown(void) { /* - * Some (well, at least mine) BIOSes hang on reboot if - * in vmx root mode. - * - * And Intel TXT required VMX off for all cpu when system shutdown. + * Disable hardware virtualization and set kvm_rebooting to indicate + * that KVM has asynchronously disabled hardware virtualization, i.e. + * that relevant errors and exceptions aren't entirely unexpected. + * Some flavors of hardware virtualization need to be disabled before + * transferring control to firmware (to perform shutdown/reboot), e.g. + * on x86, virtualization can block INIT interrupts, which are used by + * firmware to pull APs back under firmware control. Note, this path + * is used for both shutdown and reboot scenarios, i.e. neither name is + * 100% comprehensive. */ pr_info("kvm: exiting hardware virtualization\n"); kvm_rebooting = true; on_each_cpu(hardware_disable_nolock, NULL, 1); - return NOTIFY_OK; } -static struct notifier_block kvm_reboot_notifier = { - .notifier_call = kvm_reboot, - .priority = 0, -}; - static int kvm_suspend(void) { /* @@ -5260,6 +5298,7 @@ static void kvm_resume(void) static struct syscore_ops kvm_syscore_ops = { .suspend = kvm_suspend, .resume = kvm_resume, + .shutdown = kvm_shutdown, }; #else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ static int hardware_enable_all(void) @@ -5571,8 +5610,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, const char *fmt) { int ret; - struct kvm_stat_data *stat_data = (struct kvm_stat_data *) - inode->i_private; + struct kvm_stat_data *stat_data = inode->i_private; /* * The debugfs files are a reference to the kvm struct which @@ -5593,8 +5631,7 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, static int kvm_debugfs_release(struct inode *inode, struct file *file) { - struct kvm_stat_data *stat_data = (struct kvm_stat_data *) - inode->i_private; + struct kvm_stat_data *stat_data = inode->i_private; simple_attr_release(inode, file); kvm_put_kvm(stat_data->kvm); @@ -5643,7 +5680,7 @@ static int kvm_clear_stat_per_vcpu(struct kvm *kvm, size_t offset) static int kvm_stat_data_get(void *data, u64 *val) { int r = -EFAULT; - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + struct kvm_stat_data *stat_data = data; switch (stat_data->kind) { case KVM_STAT_VM: @@ -5662,7 +5699,7 @@ static int kvm_stat_data_get(void *data, u64 *val) static int kvm_stat_data_clear(void *data, u64 val) { int r = -EFAULT; - struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data; + struct kvm_stat_data *stat_data = data; if (val) return -EINVAL; @@ -5966,7 +6003,6 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) if (r) return r; - register_reboot_notifier(&kvm_reboot_notifier); register_syscore_ops(&kvm_syscore_ops); #endif @@ -6038,7 +6074,6 @@ err_cpu_kick_mask: err_vcpu_cache: #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING unregister_syscore_ops(&kvm_syscore_ops); - unregister_reboot_notifier(&kvm_reboot_notifier); cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); #endif return r; @@ -6064,7 +6099,6 @@ void kvm_exit(void) kvm_async_pf_deinit(); #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING unregister_syscore_ops(&kvm_syscore_ops); - unregister_reboot_notifier(&kvm_reboot_notifier); cpuhp_remove_state_nocalls(CPUHP_AP_KVM_ONLINE); #endif kvm_irqfd_exit(); |