Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[linux] / arch / s390 / kvm / kvm-s390.c
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/random.h>
27 #include <linux/slab.h>
28 #include <linux/timer.h>
29 #include <linux/vmalloc.h>
30 #include <linux/bitmap.h>
31 #include <asm/asm-offsets.h>
32 #include <asm/lowcore.h>
33 #include <asm/stp.h>
34 #include <asm/pgtable.h>
35 #include <asm/gmap.h>
36 #include <asm/nmi.h>
37 #include <asm/switch_to.h>
38 #include <asm/isc.h>
39 #include <asm/sclp.h>
40 #include <asm/cpacf.h>
41 #include <asm/timex.h>
42 #include "kvm-s390.h"
43 #include "gaccess.h"
44
45 #define KMSG_COMPONENT "kvm-s390"
46 #undef pr_fmt
47 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
48
49 #define CREATE_TRACE_POINTS
50 #include "trace.h"
51 #include "trace-s390.h"
52
53 #define MEM_OP_MAX_SIZE 65536   /* Maximum transfer size for KVM_S390_MEM_OP */
54 #define LOCAL_IRQS 32
55 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
56                            (KVM_MAX_VCPUS + LOCAL_IRQS))
57
58 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
59
60 struct kvm_stats_debugfs_item debugfs_entries[] = {
61         { "userspace_handled", VCPU_STAT(exit_userspace) },
62         { "exit_null", VCPU_STAT(exit_null) },
63         { "exit_validity", VCPU_STAT(exit_validity) },
64         { "exit_stop_request", VCPU_STAT(exit_stop_request) },
65         { "exit_external_request", VCPU_STAT(exit_external_request) },
66         { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
67         { "exit_instruction", VCPU_STAT(exit_instruction) },
68         { "exit_pei", VCPU_STAT(exit_pei) },
69         { "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
70         { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
71         { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
72         { "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
73         { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
74         { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
75         { "halt_wakeup", VCPU_STAT(halt_wakeup) },
76         { "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
77         { "instruction_lctl", VCPU_STAT(instruction_lctl) },
78         { "instruction_stctl", VCPU_STAT(instruction_stctl) },
79         { "instruction_stctg", VCPU_STAT(instruction_stctg) },
80         { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
81         { "deliver_external_call", VCPU_STAT(deliver_external_call) },
82         { "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
83         { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
84         { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
85         { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
86         { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
87         { "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
88         { "exit_wait_state", VCPU_STAT(exit_wait_state) },
89         { "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
90         { "instruction_stidp", VCPU_STAT(instruction_stidp) },
91         { "instruction_spx", VCPU_STAT(instruction_spx) },
92         { "instruction_stpx", VCPU_STAT(instruction_stpx) },
93         { "instruction_stap", VCPU_STAT(instruction_stap) },
94         { "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
95         { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
96         { "instruction_stsch", VCPU_STAT(instruction_stsch) },
97         { "instruction_chsc", VCPU_STAT(instruction_chsc) },
98         { "instruction_essa", VCPU_STAT(instruction_essa) },
99         { "instruction_stsi", VCPU_STAT(instruction_stsi) },
100         { "instruction_stfl", VCPU_STAT(instruction_stfl) },
101         { "instruction_tprot", VCPU_STAT(instruction_tprot) },
102         { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
103         { "instruction_sie", VCPU_STAT(instruction_sie) },
104         { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
105         { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
106         { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
107         { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
108         { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
109         { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
110         { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
111         { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
112         { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
113         { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
114         { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
115         { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
116         { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
117         { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
118         { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
119         { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
120         { "diagnose_10", VCPU_STAT(diagnose_10) },
121         { "diagnose_44", VCPU_STAT(diagnose_44) },
122         { "diagnose_9c", VCPU_STAT(diagnose_9c) },
123         { "diagnose_258", VCPU_STAT(diagnose_258) },
124         { "diagnose_308", VCPU_STAT(diagnose_308) },
125         { "diagnose_500", VCPU_STAT(diagnose_500) },
126         { NULL }
127 };
128
129 /* allow nested virtualization in KVM (if enabled by user space) */
130 static int nested;
131 module_param(nested, int, S_IRUGO);
132 MODULE_PARM_DESC(nested, "Nested virtualization support");
133
134 /* upper facilities limit for kvm */
135 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
136
137 unsigned long kvm_s390_fac_list_mask_size(void)
138 {
139         BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
140         return ARRAY_SIZE(kvm_s390_fac_list_mask);
141 }
142
143 /* available cpu features supported by kvm */
144 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
145 /* available subfunctions indicated via query / "test bit" */
146 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
147
148 static struct gmap_notifier gmap_notifier;
149 static struct gmap_notifier vsie_gmap_notifier;
150 debug_info_t *kvm_s390_dbf;
151
152 /* Section: not file related */
153 int kvm_arch_hardware_enable(void)
154 {
155         /* every s390 is virtualization enabled ;-) */
156         return 0;
157 }
158
159 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
160                               unsigned long end);
161
162 /*
163  * This callback is executed during stop_machine(). All CPUs are therefore
164  * temporarily stopped. In order not to change guest behavior, we have to
165  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
166  * so a CPU won't be stopped while calculating with the epoch.
167  */
168 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
169                           void *v)
170 {
171         struct kvm *kvm;
172         struct kvm_vcpu *vcpu;
173         int i;
174         unsigned long long *delta = v;
175
176         list_for_each_entry(kvm, &vm_list, vm_list) {
177                 kvm->arch.epoch -= *delta;
178                 kvm_for_each_vcpu(i, vcpu, kvm) {
179                         vcpu->arch.sie_block->epoch -= *delta;
180                         if (vcpu->arch.cputm_enabled)
181                                 vcpu->arch.cputm_start += *delta;
182                         if (vcpu->arch.vsie_block)
183                                 vcpu->arch.vsie_block->epoch -= *delta;
184                 }
185         }
186         return NOTIFY_OK;
187 }
188
189 static struct notifier_block kvm_clock_notifier = {
190         .notifier_call = kvm_clock_sync,
191 };
192
193 int kvm_arch_hardware_setup(void)
194 {
195         gmap_notifier.notifier_call = kvm_gmap_notifier;
196         gmap_register_pte_notifier(&gmap_notifier);
197         vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
198         gmap_register_pte_notifier(&vsie_gmap_notifier);
199         atomic_notifier_chain_register(&s390_epoch_delta_notifier,
200                                        &kvm_clock_notifier);
201         return 0;
202 }
203
204 void kvm_arch_hardware_unsetup(void)
205 {
206         gmap_unregister_pte_notifier(&gmap_notifier);
207         gmap_unregister_pte_notifier(&vsie_gmap_notifier);
208         atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
209                                          &kvm_clock_notifier);
210 }
211
212 static void allow_cpu_feat(unsigned long nr)
213 {
214         set_bit_inv(nr, kvm_s390_available_cpu_feat);
215 }
216
217 static inline int plo_test_bit(unsigned char nr)
218 {
219         register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
220         int cc = 3; /* subfunction not available */
221
222         asm volatile(
223                 /* Parameter registers are ignored for "test bit" */
224                 "       plo     0,0,0,0(0)\n"
225                 "       ipm     %0\n"
226                 "       srl     %0,28\n"
227                 : "=d" (cc)
228                 : "d" (r0)
229                 : "cc");
230         return cc == 0;
231 }
232
233 static void kvm_s390_cpu_feat_init(void)
234 {
235         int i;
236
237         for (i = 0; i < 256; ++i) {
238                 if (plo_test_bit(i))
239                         kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
240         }
241
242         if (test_facility(28)) /* TOD-clock steering */
243                 ptff(kvm_s390_available_subfunc.ptff,
244                      sizeof(kvm_s390_available_subfunc.ptff),
245                      PTFF_QAF);
246
247         if (test_facility(17)) { /* MSA */
248                 __cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
249                               kvm_s390_available_subfunc.kmac);
250                 __cpacf_query(CPACF_KMC, (cpacf_mask_t *)
251                               kvm_s390_available_subfunc.kmc);
252                 __cpacf_query(CPACF_KM, (cpacf_mask_t *)
253                               kvm_s390_available_subfunc.km);
254                 __cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
255                               kvm_s390_available_subfunc.kimd);
256                 __cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
257                               kvm_s390_available_subfunc.klmd);
258         }
259         if (test_facility(76)) /* MSA3 */
260                 __cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
261                               kvm_s390_available_subfunc.pckmo);
262         if (test_facility(77)) { /* MSA4 */
263                 __cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
264                               kvm_s390_available_subfunc.kmctr);
265                 __cpacf_query(CPACF_KMF, (cpacf_mask_t *)
266                               kvm_s390_available_subfunc.kmf);
267                 __cpacf_query(CPACF_KMO, (cpacf_mask_t *)
268                               kvm_s390_available_subfunc.kmo);
269                 __cpacf_query(CPACF_PCC, (cpacf_mask_t *)
270                               kvm_s390_available_subfunc.pcc);
271         }
272         if (test_facility(57)) /* MSA5 */
273                 __cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
274                               kvm_s390_available_subfunc.ppno);
275
276         if (MACHINE_HAS_ESOP)
277                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
278         /*
279          * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
280          * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
281          */
282         if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
283             !test_facility(3) || !nested)
284                 return;
285         allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
286         if (sclp.has_64bscao)
287                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
288         if (sclp.has_siif)
289                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
290         if (sclp.has_gpere)
291                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
292         if (sclp.has_gsls)
293                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
294         if (sclp.has_ib)
295                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
296         if (sclp.has_cei)
297                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
298         if (sclp.has_ibs)
299                 allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
300         /*
301          * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
302          * all skey handling functions read/set the skey from the PGSTE
303          * instead of the real storage key.
304          *
305          * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
306          * pages being detected as preserved although they are resident.
307          *
308          * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
309          * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
310          *
311          * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
312          * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
313          * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
314          *
315          * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
316          * cannot easily shadow the SCA because of the ipte lock.
317          */
318 }
319
320 int kvm_arch_init(void *opaque)
321 {
322         kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
323         if (!kvm_s390_dbf)
324                 return -ENOMEM;
325
326         if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
327                 debug_unregister(kvm_s390_dbf);
328                 return -ENOMEM;
329         }
330
331         kvm_s390_cpu_feat_init();
332
333         /* Register floating interrupt controller interface. */
334         return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
335 }
336
337 void kvm_arch_exit(void)
338 {
339         debug_unregister(kvm_s390_dbf);
340 }
341
342 /* Section: device related */
343 long kvm_arch_dev_ioctl(struct file *filp,
344                         unsigned int ioctl, unsigned long arg)
345 {
346         if (ioctl == KVM_S390_ENABLE_SIE)
347                 return s390_enable_sie();
348         return -EINVAL;
349 }
350
351 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
352 {
353         int r;
354
355         switch (ext) {
356         case KVM_CAP_S390_PSW:
357         case KVM_CAP_S390_GMAP:
358         case KVM_CAP_SYNC_MMU:
359 #ifdef CONFIG_KVM_S390_UCONTROL
360         case KVM_CAP_S390_UCONTROL:
361 #endif
362         case KVM_CAP_ASYNC_PF:
363         case KVM_CAP_SYNC_REGS:
364         case KVM_CAP_ONE_REG:
365         case KVM_CAP_ENABLE_CAP:
366         case KVM_CAP_S390_CSS_SUPPORT:
367         case KVM_CAP_IOEVENTFD:
368         case KVM_CAP_DEVICE_CTRL:
369         case KVM_CAP_ENABLE_CAP_VM:
370         case KVM_CAP_S390_IRQCHIP:
371         case KVM_CAP_VM_ATTRIBUTES:
372         case KVM_CAP_MP_STATE:
373         case KVM_CAP_S390_INJECT_IRQ:
374         case KVM_CAP_S390_USER_SIGP:
375         case KVM_CAP_S390_USER_STSI:
376         case KVM_CAP_S390_SKEYS:
377         case KVM_CAP_S390_IRQ_STATE:
378         case KVM_CAP_S390_USER_INSTR0:
379                 r = 1;
380                 break;
381         case KVM_CAP_S390_MEM_OP:
382                 r = MEM_OP_MAX_SIZE;
383                 break;
384         case KVM_CAP_NR_VCPUS:
385         case KVM_CAP_MAX_VCPUS:
386                 r = KVM_S390_BSCA_CPU_SLOTS;
387                 if (sclp.has_esca && sclp.has_64bscao)
388                         r = KVM_S390_ESCA_CPU_SLOTS;
389                 break;
390         case KVM_CAP_NR_MEMSLOTS:
391                 r = KVM_USER_MEM_SLOTS;
392                 break;
393         case KVM_CAP_S390_COW:
394                 r = MACHINE_HAS_ESOP;
395                 break;
396         case KVM_CAP_S390_VECTOR_REGISTERS:
397                 r = MACHINE_HAS_VX;
398                 break;
399         case KVM_CAP_S390_RI:
400                 r = test_facility(64);
401                 break;
402         default:
403                 r = 0;
404         }
405         return r;
406 }
407
408 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
409                                         struct kvm_memory_slot *memslot)
410 {
411         gfn_t cur_gfn, last_gfn;
412         unsigned long address;
413         struct gmap *gmap = kvm->arch.gmap;
414
415         /* Loop over all guest pages */
416         last_gfn = memslot->base_gfn + memslot->npages;
417         for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
418                 address = gfn_to_hva_memslot(memslot, cur_gfn);
419
420                 if (test_and_clear_guest_dirty(gmap->mm, address))
421                         mark_page_dirty(kvm, cur_gfn);
422                 if (fatal_signal_pending(current))
423                         return;
424                 cond_resched();
425         }
426 }
427
428 /* Section: vm related */
429 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
430
431 /*
432  * Get (and clear) the dirty memory log for a memory slot.
433  */
434 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
435                                struct kvm_dirty_log *log)
436 {
437         int r;
438         unsigned long n;
439         struct kvm_memslots *slots;
440         struct kvm_memory_slot *memslot;
441         int is_dirty = 0;
442
443         mutex_lock(&kvm->slots_lock);
444
445         r = -EINVAL;
446         if (log->slot >= KVM_USER_MEM_SLOTS)
447                 goto out;
448
449         slots = kvm_memslots(kvm);
450         memslot = id_to_memslot(slots, log->slot);
451         r = -ENOENT;
452         if (!memslot->dirty_bitmap)
453                 goto out;
454
455         kvm_s390_sync_dirty_log(kvm, memslot);
456         r = kvm_get_dirty_log(kvm, log, &is_dirty);
457         if (r)
458                 goto out;
459
460         /* Clear the dirty log */
461         if (is_dirty) {
462                 n = kvm_dirty_bitmap_bytes(memslot);
463                 memset(memslot->dirty_bitmap, 0, n);
464         }
465         r = 0;
466 out:
467         mutex_unlock(&kvm->slots_lock);
468         return r;
469 }
470
471 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
472 {
473         unsigned int i;
474         struct kvm_vcpu *vcpu;
475
476         kvm_for_each_vcpu(i, vcpu, kvm) {
477                 kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
478         }
479 }
480
481 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
482 {
483         int r;
484
485         if (cap->flags)
486                 return -EINVAL;
487
488         switch (cap->cap) {
489         case KVM_CAP_S390_IRQCHIP:
490                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
491                 kvm->arch.use_irqchip = 1;
492                 r = 0;
493                 break;
494         case KVM_CAP_S390_USER_SIGP:
495                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
496                 kvm->arch.user_sigp = 1;
497                 r = 0;
498                 break;
499         case KVM_CAP_S390_VECTOR_REGISTERS:
500                 mutex_lock(&kvm->lock);
501                 if (kvm->created_vcpus) {
502                         r = -EBUSY;
503                 } else if (MACHINE_HAS_VX) {
504                         set_kvm_facility(kvm->arch.model.fac_mask, 129);
505                         set_kvm_facility(kvm->arch.model.fac_list, 129);
506                         r = 0;
507                 } else
508                         r = -EINVAL;
509                 mutex_unlock(&kvm->lock);
510                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
511                          r ? "(not available)" : "(success)");
512                 break;
513         case KVM_CAP_S390_RI:
514                 r = -EINVAL;
515                 mutex_lock(&kvm->lock);
516                 if (kvm->created_vcpus) {
517                         r = -EBUSY;
518                 } else if (test_facility(64)) {
519                         set_kvm_facility(kvm->arch.model.fac_mask, 64);
520                         set_kvm_facility(kvm->arch.model.fac_list, 64);
521                         r = 0;
522                 }
523                 mutex_unlock(&kvm->lock);
524                 VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
525                          r ? "(not available)" : "(success)");
526                 break;
527         case KVM_CAP_S390_USER_STSI:
528                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
529                 kvm->arch.user_stsi = 1;
530                 r = 0;
531                 break;
532         case KVM_CAP_S390_USER_INSTR0:
533                 VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
534                 kvm->arch.user_instr0 = 1;
535                 icpt_operexc_on_all_vcpus(kvm);
536                 r = 0;
537                 break;
538         default:
539                 r = -EINVAL;
540                 break;
541         }
542         return r;
543 }
544
545 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
546 {
547         int ret;
548
549         switch (attr->attr) {
550         case KVM_S390_VM_MEM_LIMIT_SIZE:
551                 ret = 0;
552                 VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
553                          kvm->arch.mem_limit);
554                 if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
555                         ret = -EFAULT;
556                 break;
557         default:
558                 ret = -ENXIO;
559                 break;
560         }
561         return ret;
562 }
563
564 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
565 {
566         int ret;
567         unsigned int idx;
568         switch (attr->attr) {
569         case KVM_S390_VM_MEM_ENABLE_CMMA:
570                 ret = -ENXIO;
571                 if (!sclp.has_cmma)
572                         break;
573
574                 ret = -EBUSY;
575                 VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
576                 mutex_lock(&kvm->lock);
577                 if (!kvm->created_vcpus) {
578                         kvm->arch.use_cmma = 1;
579                         ret = 0;
580                 }
581                 mutex_unlock(&kvm->lock);
582                 break;
583         case KVM_S390_VM_MEM_CLR_CMMA:
584                 ret = -ENXIO;
585                 if (!sclp.has_cmma)
586                         break;
587                 ret = -EINVAL;
588                 if (!kvm->arch.use_cmma)
589                         break;
590
591                 VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
592                 mutex_lock(&kvm->lock);
593                 idx = srcu_read_lock(&kvm->srcu);
594                 s390_reset_cmma(kvm->arch.gmap->mm);
595                 srcu_read_unlock(&kvm->srcu, idx);
596                 mutex_unlock(&kvm->lock);
597                 ret = 0;
598                 break;
599         case KVM_S390_VM_MEM_LIMIT_SIZE: {
600                 unsigned long new_limit;
601
602                 if (kvm_is_ucontrol(kvm))
603                         return -EINVAL;
604
605                 if (get_user(new_limit, (u64 __user *)attr->addr))
606                         return -EFAULT;
607
608                 if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
609                     new_limit > kvm->arch.mem_limit)
610                         return -E2BIG;
611
612                 if (!new_limit)
613                         return -EINVAL;
614
615                 /* gmap_create takes last usable address */
616                 if (new_limit != KVM_S390_NO_MEM_LIMIT)
617                         new_limit -= 1;
618
619                 ret = -EBUSY;
620                 mutex_lock(&kvm->lock);
621                 if (!kvm->created_vcpus) {
622                         /* gmap_create will round the limit up */
623                         struct gmap *new = gmap_create(current->mm, new_limit);
624
625                         if (!new) {
626                                 ret = -ENOMEM;
627                         } else {
628                                 gmap_remove(kvm->arch.gmap);
629                                 new->private = kvm;
630                                 kvm->arch.gmap = new;
631                                 ret = 0;
632                         }
633                 }
634                 mutex_unlock(&kvm->lock);
635                 VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
636                 VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
637                          (void *) kvm->arch.gmap->asce);
638                 break;
639         }
640         default:
641                 ret = -ENXIO;
642                 break;
643         }
644         return ret;
645 }
646
647 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
648
649 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
650 {
651         struct kvm_vcpu *vcpu;
652         int i;
653
654         if (!test_kvm_facility(kvm, 76))
655                 return -EINVAL;
656
657         mutex_lock(&kvm->lock);
658         switch (attr->attr) {
659         case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
660                 get_random_bytes(
661                         kvm->arch.crypto.crycb->aes_wrapping_key_mask,
662                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
663                 kvm->arch.crypto.aes_kw = 1;
664                 VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
665                 break;
666         case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
667                 get_random_bytes(
668                         kvm->arch.crypto.crycb->dea_wrapping_key_mask,
669                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
670                 kvm->arch.crypto.dea_kw = 1;
671                 VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
672                 break;
673         case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
674                 kvm->arch.crypto.aes_kw = 0;
675                 memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
676                         sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
677                 VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
678                 break;
679         case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
680                 kvm->arch.crypto.dea_kw = 0;
681                 memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
682                         sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
683                 VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
684                 break;
685         default:
686                 mutex_unlock(&kvm->lock);
687                 return -ENXIO;
688         }
689
690         kvm_for_each_vcpu(i, vcpu, kvm) {
691                 kvm_s390_vcpu_crypto_setup(vcpu);
692                 exit_sie(vcpu);
693         }
694         mutex_unlock(&kvm->lock);
695         return 0;
696 }
697
698 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
699 {
700         u8 gtod_high;
701
702         if (copy_from_user(&gtod_high, (void __user *)attr->addr,
703                                            sizeof(gtod_high)))
704                 return -EFAULT;
705
706         if (gtod_high != 0)
707                 return -EINVAL;
708         VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
709
710         return 0;
711 }
712
713 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
714 {
715         u64 gtod;
716
717         if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
718                 return -EFAULT;
719
720         kvm_s390_set_tod_clock(kvm, gtod);
721         VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
722         return 0;
723 }
724
725 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
726 {
727         int ret;
728
729         if (attr->flags)
730                 return -EINVAL;
731
732         switch (attr->attr) {
733         case KVM_S390_VM_TOD_HIGH:
734                 ret = kvm_s390_set_tod_high(kvm, attr);
735                 break;
736         case KVM_S390_VM_TOD_LOW:
737                 ret = kvm_s390_set_tod_low(kvm, attr);
738                 break;
739         default:
740                 ret = -ENXIO;
741                 break;
742         }
743         return ret;
744 }
745
746 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
747 {
748         u8 gtod_high = 0;
749
750         if (copy_to_user((void __user *)attr->addr, &gtod_high,
751                                          sizeof(gtod_high)))
752                 return -EFAULT;
753         VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
754
755         return 0;
756 }
757
758 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
759 {
760         u64 gtod;
761
762         gtod = kvm_s390_get_tod_clock_fast(kvm);
763         if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
764                 return -EFAULT;
765         VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
766
767         return 0;
768 }
769
770 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
771 {
772         int ret;
773
774         if (attr->flags)
775                 return -EINVAL;
776
777         switch (attr->attr) {
778         case KVM_S390_VM_TOD_HIGH:
779                 ret = kvm_s390_get_tod_high(kvm, attr);
780                 break;
781         case KVM_S390_VM_TOD_LOW:
782                 ret = kvm_s390_get_tod_low(kvm, attr);
783                 break;
784         default:
785                 ret = -ENXIO;
786                 break;
787         }
788         return ret;
789 }
790
791 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
792 {
793         struct kvm_s390_vm_cpu_processor *proc;
794         u16 lowest_ibc, unblocked_ibc;
795         int ret = 0;
796
797         mutex_lock(&kvm->lock);
798         if (kvm->created_vcpus) {
799                 ret = -EBUSY;
800                 goto out;
801         }
802         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
803         if (!proc) {
804                 ret = -ENOMEM;
805                 goto out;
806         }
807         if (!copy_from_user(proc, (void __user *)attr->addr,
808                             sizeof(*proc))) {
809                 kvm->arch.model.cpuid = proc->cpuid;
810                 lowest_ibc = sclp.ibc >> 16 & 0xfff;
811                 unblocked_ibc = sclp.ibc & 0xfff;
812                 if (lowest_ibc && proc->ibc) {
813                         if (proc->ibc > unblocked_ibc)
814                                 kvm->arch.model.ibc = unblocked_ibc;
815                         else if (proc->ibc < lowest_ibc)
816                                 kvm->arch.model.ibc = lowest_ibc;
817                         else
818                                 kvm->arch.model.ibc = proc->ibc;
819                 }
820                 memcpy(kvm->arch.model.fac_list, proc->fac_list,
821                        S390_ARCH_FAC_LIST_SIZE_BYTE);
822         } else
823                 ret = -EFAULT;
824         kfree(proc);
825 out:
826         mutex_unlock(&kvm->lock);
827         return ret;
828 }
829
830 static int kvm_s390_set_processor_feat(struct kvm *kvm,
831                                        struct kvm_device_attr *attr)
832 {
833         struct kvm_s390_vm_cpu_feat data;
834         int ret = -EBUSY;
835
836         if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
837                 return -EFAULT;
838         if (!bitmap_subset((unsigned long *) data.feat,
839                            kvm_s390_available_cpu_feat,
840                            KVM_S390_VM_CPU_FEAT_NR_BITS))
841                 return -EINVAL;
842
843         mutex_lock(&kvm->lock);
844         if (!atomic_read(&kvm->online_vcpus)) {
845                 bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
846                             KVM_S390_VM_CPU_FEAT_NR_BITS);
847                 ret = 0;
848         }
849         mutex_unlock(&kvm->lock);
850         return ret;
851 }
852
853 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
854                                           struct kvm_device_attr *attr)
855 {
856         /*
857          * Once supported by kernel + hw, we have to store the subfunctions
858          * in kvm->arch and remember that user space configured them.
859          */
860         return -ENXIO;
861 }
862
863 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
864 {
865         int ret = -ENXIO;
866
867         switch (attr->attr) {
868         case KVM_S390_VM_CPU_PROCESSOR:
869                 ret = kvm_s390_set_processor(kvm, attr);
870                 break;
871         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
872                 ret = kvm_s390_set_processor_feat(kvm, attr);
873                 break;
874         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
875                 ret = kvm_s390_set_processor_subfunc(kvm, attr);
876                 break;
877         }
878         return ret;
879 }
880
881 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
882 {
883         struct kvm_s390_vm_cpu_processor *proc;
884         int ret = 0;
885
886         proc = kzalloc(sizeof(*proc), GFP_KERNEL);
887         if (!proc) {
888                 ret = -ENOMEM;
889                 goto out;
890         }
891         proc->cpuid = kvm->arch.model.cpuid;
892         proc->ibc = kvm->arch.model.ibc;
893         memcpy(&proc->fac_list, kvm->arch.model.fac_list,
894                S390_ARCH_FAC_LIST_SIZE_BYTE);
895         if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
896                 ret = -EFAULT;
897         kfree(proc);
898 out:
899         return ret;
900 }
901
902 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
903 {
904         struct kvm_s390_vm_cpu_machine *mach;
905         int ret = 0;
906
907         mach = kzalloc(sizeof(*mach), GFP_KERNEL);
908         if (!mach) {
909                 ret = -ENOMEM;
910                 goto out;
911         }
912         get_cpu_id((struct cpuid *) &mach->cpuid);
913         mach->ibc = sclp.ibc;
914         memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
915                S390_ARCH_FAC_LIST_SIZE_BYTE);
916         memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
917                S390_ARCH_FAC_LIST_SIZE_BYTE);
918         if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
919                 ret = -EFAULT;
920         kfree(mach);
921 out:
922         return ret;
923 }
924
925 static int kvm_s390_get_processor_feat(struct kvm *kvm,
926                                        struct kvm_device_attr *attr)
927 {
928         struct kvm_s390_vm_cpu_feat data;
929
930         bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
931                     KVM_S390_VM_CPU_FEAT_NR_BITS);
932         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
933                 return -EFAULT;
934         return 0;
935 }
936
937 static int kvm_s390_get_machine_feat(struct kvm *kvm,
938                                      struct kvm_device_attr *attr)
939 {
940         struct kvm_s390_vm_cpu_feat data;
941
942         bitmap_copy((unsigned long *) data.feat,
943                     kvm_s390_available_cpu_feat,
944                     KVM_S390_VM_CPU_FEAT_NR_BITS);
945         if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
946                 return -EFAULT;
947         return 0;
948 }
949
950 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
951                                           struct kvm_device_attr *attr)
952 {
953         /*
954          * Once we can actually configure subfunctions (kernel + hw support),
955          * we have to check if they were already set by user space, if so copy
956          * them from kvm->arch.
957          */
958         return -ENXIO;
959 }
960
961 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
962                                         struct kvm_device_attr *attr)
963 {
964         if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
965             sizeof(struct kvm_s390_vm_cpu_subfunc)))
966                 return -EFAULT;
967         return 0;
968 }
969 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
970 {
971         int ret = -ENXIO;
972
973         switch (attr->attr) {
974         case KVM_S390_VM_CPU_PROCESSOR:
975                 ret = kvm_s390_get_processor(kvm, attr);
976                 break;
977         case KVM_S390_VM_CPU_MACHINE:
978                 ret = kvm_s390_get_machine(kvm, attr);
979                 break;
980         case KVM_S390_VM_CPU_PROCESSOR_FEAT:
981                 ret = kvm_s390_get_processor_feat(kvm, attr);
982                 break;
983         case KVM_S390_VM_CPU_MACHINE_FEAT:
984                 ret = kvm_s390_get_machine_feat(kvm, attr);
985                 break;
986         case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
987                 ret = kvm_s390_get_processor_subfunc(kvm, attr);
988                 break;
989         case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
990                 ret = kvm_s390_get_machine_subfunc(kvm, attr);
991                 break;
992         }
993         return ret;
994 }
995
996 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
997 {
998         int ret;
999
1000         switch (attr->group) {
1001         case KVM_S390_VM_MEM_CTRL:
1002                 ret = kvm_s390_set_mem_control(kvm, attr);
1003                 break;
1004         case KVM_S390_VM_TOD:
1005                 ret = kvm_s390_set_tod(kvm, attr);
1006                 break;
1007         case KVM_S390_VM_CPU_MODEL:
1008                 ret = kvm_s390_set_cpu_model(kvm, attr);
1009                 break;
1010         case KVM_S390_VM_CRYPTO:
1011                 ret = kvm_s390_vm_set_crypto(kvm, attr);
1012                 break;
1013         default:
1014                 ret = -ENXIO;
1015                 break;
1016         }
1017
1018         return ret;
1019 }
1020
1021 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1022 {
1023         int ret;
1024
1025         switch (attr->group) {
1026         case KVM_S390_VM_MEM_CTRL:
1027                 ret = kvm_s390_get_mem_control(kvm, attr);
1028                 break;
1029         case KVM_S390_VM_TOD:
1030                 ret = kvm_s390_get_tod(kvm, attr);
1031                 break;
1032         case KVM_S390_VM_CPU_MODEL:
1033                 ret = kvm_s390_get_cpu_model(kvm, attr);
1034                 break;
1035         default:
1036                 ret = -ENXIO;
1037                 break;
1038         }
1039
1040         return ret;
1041 }
1042
1043 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1044 {
1045         int ret;
1046
1047         switch (attr->group) {
1048         case KVM_S390_VM_MEM_CTRL:
1049                 switch (attr->attr) {
1050                 case KVM_S390_VM_MEM_ENABLE_CMMA:
1051                 case KVM_S390_VM_MEM_CLR_CMMA:
1052                         ret = sclp.has_cmma ? 0 : -ENXIO;
1053                         break;
1054                 case KVM_S390_VM_MEM_LIMIT_SIZE:
1055                         ret = 0;
1056                         break;
1057                 default:
1058                         ret = -ENXIO;
1059                         break;
1060                 }
1061                 break;
1062         case KVM_S390_VM_TOD:
1063                 switch (attr->attr) {
1064                 case KVM_S390_VM_TOD_LOW:
1065                 case KVM_S390_VM_TOD_HIGH:
1066                         ret = 0;
1067                         break;
1068                 default:
1069                         ret = -ENXIO;
1070                         break;
1071                 }
1072                 break;
1073         case KVM_S390_VM_CPU_MODEL:
1074                 switch (attr->attr) {
1075                 case KVM_S390_VM_CPU_PROCESSOR:
1076                 case KVM_S390_VM_CPU_MACHINE:
1077                 case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1078                 case KVM_S390_VM_CPU_MACHINE_FEAT:
1079                 case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1080                         ret = 0;
1081                         break;
1082                 /* configuring subfunctions is not supported yet */
1083                 case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1084                 default:
1085                         ret = -ENXIO;
1086                         break;
1087                 }
1088                 break;
1089         case KVM_S390_VM_CRYPTO:
1090                 switch (attr->attr) {
1091                 case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1092                 case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1093                 case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1094                 case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1095                         ret = 0;
1096                         break;
1097                 default:
1098                         ret = -ENXIO;
1099                         break;
1100                 }
1101                 break;
1102         default:
1103                 ret = -ENXIO;
1104                 break;
1105         }
1106
1107         return ret;
1108 }
1109
1110 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1111 {
1112         uint8_t *keys;
1113         uint64_t hva;
1114         int i, r = 0;
1115
1116         if (args->flags != 0)
1117                 return -EINVAL;
1118
1119         /* Is this guest using storage keys? */
1120         if (!mm_use_skey(current->mm))
1121                 return KVM_S390_GET_SKEYS_NONE;
1122
1123         /* Enforce sane limit on memory allocation */
1124         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1125                 return -EINVAL;
1126
1127         keys = kmalloc_array(args->count, sizeof(uint8_t),
1128                              GFP_KERNEL | __GFP_NOWARN);
1129         if (!keys)
1130                 keys = vmalloc(sizeof(uint8_t) * args->count);
1131         if (!keys)
1132                 return -ENOMEM;
1133
1134         down_read(&current->mm->mmap_sem);
1135         for (i = 0; i < args->count; i++) {
1136                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1137                 if (kvm_is_error_hva(hva)) {
1138                         r = -EFAULT;
1139                         break;
1140                 }
1141
1142                 r = get_guest_storage_key(current->mm, hva, &keys[i]);
1143                 if (r)
1144                         break;
1145         }
1146         up_read(&current->mm->mmap_sem);
1147
1148         if (!r) {
1149                 r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1150                                  sizeof(uint8_t) * args->count);
1151                 if (r)
1152                         r = -EFAULT;
1153         }
1154
1155         kvfree(keys);
1156         return r;
1157 }
1158
1159 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1160 {
1161         uint8_t *keys;
1162         uint64_t hva;
1163         int i, r = 0;
1164
1165         if (args->flags != 0)
1166                 return -EINVAL;
1167
1168         /* Enforce sane limit on memory allocation */
1169         if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1170                 return -EINVAL;
1171
1172         keys = kmalloc_array(args->count, sizeof(uint8_t),
1173                              GFP_KERNEL | __GFP_NOWARN);
1174         if (!keys)
1175                 keys = vmalloc(sizeof(uint8_t) * args->count);
1176         if (!keys)
1177                 return -ENOMEM;
1178
1179         r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1180                            sizeof(uint8_t) * args->count);
1181         if (r) {
1182                 r = -EFAULT;
1183                 goto out;
1184         }
1185
1186         /* Enable storage key handling for the guest */
1187         r = s390_enable_skey();
1188         if (r)
1189                 goto out;
1190
1191         down_read(&current->mm->mmap_sem);
1192         for (i = 0; i < args->count; i++) {
1193                 hva = gfn_to_hva(kvm, args->start_gfn + i);
1194                 if (kvm_is_error_hva(hva)) {
1195                         r = -EFAULT;
1196                         break;
1197                 }
1198
1199                 /* Lowest order bit is reserved */
1200                 if (keys[i] & 0x01) {
1201                         r = -EINVAL;
1202                         break;
1203                 }
1204
1205                 r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1206                 if (r)
1207                         break;
1208         }
1209         up_read(&current->mm->mmap_sem);
1210 out:
1211         kvfree(keys);
1212         return r;
1213 }
1214
1215 long kvm_arch_vm_ioctl(struct file *filp,
1216                        unsigned int ioctl, unsigned long arg)
1217 {
1218         struct kvm *kvm = filp->private_data;
1219         void __user *argp = (void __user *)arg;
1220         struct kvm_device_attr attr;
1221         int r;
1222
1223         switch (ioctl) {
1224         case KVM_S390_INTERRUPT: {
1225                 struct kvm_s390_interrupt s390int;
1226
1227                 r = -EFAULT;
1228                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
1229                         break;
1230                 r = kvm_s390_inject_vm(kvm, &s390int);
1231                 break;
1232         }
1233         case KVM_ENABLE_CAP: {
1234                 struct kvm_enable_cap cap;
1235                 r = -EFAULT;
1236                 if (copy_from_user(&cap, argp, sizeof(cap)))
1237                         break;
1238                 r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1239                 break;
1240         }
1241         case KVM_CREATE_IRQCHIP: {
1242                 struct kvm_irq_routing_entry routing;
1243
1244                 r = -EINVAL;
1245                 if (kvm->arch.use_irqchip) {
1246                         /* Set up dummy routing. */
1247                         memset(&routing, 0, sizeof(routing));
1248                         r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1249                 }
1250                 break;
1251         }
1252         case KVM_SET_DEVICE_ATTR: {
1253                 r = -EFAULT;
1254                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1255                         break;
1256                 r = kvm_s390_vm_set_attr(kvm, &attr);
1257                 break;
1258         }
1259         case KVM_GET_DEVICE_ATTR: {
1260                 r = -EFAULT;
1261                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1262                         break;
1263                 r = kvm_s390_vm_get_attr(kvm, &attr);
1264                 break;
1265         }
1266         case KVM_HAS_DEVICE_ATTR: {
1267                 r = -EFAULT;
1268                 if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1269                         break;
1270                 r = kvm_s390_vm_has_attr(kvm, &attr);
1271                 break;
1272         }
1273         case KVM_S390_GET_SKEYS: {
1274                 struct kvm_s390_skeys args;
1275
1276                 r = -EFAULT;
1277                 if (copy_from_user(&args, argp,
1278                                    sizeof(struct kvm_s390_skeys)))
1279                         break;
1280                 r = kvm_s390_get_skeys(kvm, &args);
1281                 break;
1282         }
1283         case KVM_S390_SET_SKEYS: {
1284                 struct kvm_s390_skeys args;
1285
1286                 r = -EFAULT;
1287                 if (copy_from_user(&args, argp,
1288                                    sizeof(struct kvm_s390_skeys)))
1289                         break;
1290                 r = kvm_s390_set_skeys(kvm, &args);
1291                 break;
1292         }
1293         default:
1294                 r = -ENOTTY;
1295         }
1296
1297         return r;
1298 }
1299
1300 static int kvm_s390_query_ap_config(u8 *config)
1301 {
1302         u32 fcn_code = 0x04000000UL;
1303         u32 cc = 0;
1304
1305         memset(config, 0, 128);
1306         asm volatile(
1307                 "lgr 0,%1\n"
1308                 "lgr 2,%2\n"
1309                 ".long 0xb2af0000\n"            /* PQAP(QCI) */
1310                 "0: ipm %0\n"
1311                 "srl %0,28\n"
1312                 "1:\n"
1313                 EX_TABLE(0b, 1b)
1314                 : "+r" (cc)
1315                 : "r" (fcn_code), "r" (config)
1316                 : "cc", "0", "2", "memory"
1317         );
1318
1319         return cc;
1320 }
1321
1322 static int kvm_s390_apxa_installed(void)
1323 {
1324         u8 config[128];
1325         int cc;
1326
1327         if (test_facility(12)) {
1328                 cc = kvm_s390_query_ap_config(config);
1329
1330                 if (cc)
1331                         pr_err("PQAP(QCI) failed with cc=%d", cc);
1332                 else
1333                         return config[0] & 0x40;
1334         }
1335
1336         return 0;
1337 }
1338
1339 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1340 {
1341         kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1342
1343         if (kvm_s390_apxa_installed())
1344                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1345         else
1346                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1347 }
1348
1349 static u64 kvm_s390_get_initial_cpuid(void)
1350 {
1351         struct cpuid cpuid;
1352
1353         get_cpu_id(&cpuid);
1354         cpuid.version = 0xff;
1355         return *((u64 *) &cpuid);
1356 }
1357
1358 static void kvm_s390_crypto_init(struct kvm *kvm)
1359 {
1360         if (!test_kvm_facility(kvm, 76))
1361                 return;
1362
1363         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1364         kvm_s390_set_crycb_format(kvm);
1365
1366         /* Enable AES/DEA protected key functions by default */
1367         kvm->arch.crypto.aes_kw = 1;
1368         kvm->arch.crypto.dea_kw = 1;
1369         get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1370                          sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1371         get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1372                          sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1373 }
1374
1375 static void sca_dispose(struct kvm *kvm)
1376 {
1377         if (kvm->arch.use_esca)
1378                 free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1379         else
1380                 free_page((unsigned long)(kvm->arch.sca));
1381         kvm->arch.sca = NULL;
1382 }
1383
1384 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1385 {
1386         gfp_t alloc_flags = GFP_KERNEL;
1387         int i, rc;
1388         char debug_name[16];
1389         static unsigned long sca_offset;
1390
1391         rc = -EINVAL;
1392 #ifdef CONFIG_KVM_S390_UCONTROL
1393         if (type & ~KVM_VM_S390_UCONTROL)
1394                 goto out_err;
1395         if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1396                 goto out_err;
1397 #else
1398         if (type)
1399                 goto out_err;
1400 #endif
1401
1402         rc = s390_enable_sie();
1403         if (rc)
1404                 goto out_err;
1405
1406         rc = -ENOMEM;
1407
1408         ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1409
1410         kvm->arch.use_esca = 0; /* start with basic SCA */
1411         if (!sclp.has_64bscao)
1412                 alloc_flags |= GFP_DMA;
1413         rwlock_init(&kvm->arch.sca_lock);
1414         kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1415         if (!kvm->arch.sca)
1416                 goto out_err;
1417         spin_lock(&kvm_lock);
1418         sca_offset += 16;
1419         if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1420                 sca_offset = 0;
1421         kvm->arch.sca = (struct bsca_block *)
1422                         ((char *) kvm->arch.sca + sca_offset);
1423         spin_unlock(&kvm_lock);
1424
1425         sprintf(debug_name, "kvm-%u", current->pid);
1426
1427         kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1428         if (!kvm->arch.dbf)
1429                 goto out_err;
1430
1431         kvm->arch.sie_page2 =
1432              (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1433         if (!kvm->arch.sie_page2)
1434                 goto out_err;
1435
1436         /* Populate the facility mask initially. */
1437         memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1438                S390_ARCH_FAC_LIST_SIZE_BYTE);
1439         for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1440                 if (i < kvm_s390_fac_list_mask_size())
1441                         kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1442                 else
1443                         kvm->arch.model.fac_mask[i] = 0UL;
1444         }
1445
1446         /* Populate the facility list initially. */
1447         kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1448         memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1449                S390_ARCH_FAC_LIST_SIZE_BYTE);
1450
1451         set_kvm_facility(kvm->arch.model.fac_mask, 74);
1452         set_kvm_facility(kvm->arch.model.fac_list, 74);
1453
1454         kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1455         kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1456
1457         kvm_s390_crypto_init(kvm);
1458
1459         spin_lock_init(&kvm->arch.float_int.lock);
1460         for (i = 0; i < FIRQ_LIST_COUNT; i++)
1461                 INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1462         init_waitqueue_head(&kvm->arch.ipte_wq);
1463         mutex_init(&kvm->arch.ipte_mutex);
1464
1465         debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1466         VM_EVENT(kvm, 3, "vm created with type %lu", type);
1467
1468         if (type & KVM_VM_S390_UCONTROL) {
1469                 kvm->arch.gmap = NULL;
1470                 kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1471         } else {
1472                 if (sclp.hamax == U64_MAX)
1473                         kvm->arch.mem_limit = TASK_MAX_SIZE;
1474                 else
1475                         kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
1476                                                     sclp.hamax + 1);
1477                 kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1478                 if (!kvm->arch.gmap)
1479                         goto out_err;
1480                 kvm->arch.gmap->private = kvm;
1481                 kvm->arch.gmap->pfault_enabled = 0;
1482         }
1483
1484         kvm->arch.css_support = 0;
1485         kvm->arch.use_irqchip = 0;
1486         kvm->arch.epoch = 0;
1487
1488         spin_lock_init(&kvm->arch.start_stop_lock);
1489         kvm_s390_vsie_init(kvm);
1490         KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1491
1492         return 0;
1493 out_err:
1494         free_page((unsigned long)kvm->arch.sie_page2);
1495         debug_unregister(kvm->arch.dbf);
1496         sca_dispose(kvm);
1497         KVM_EVENT(3, "creation of vm failed: %d", rc);
1498         return rc;
1499 }
1500
1501 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
1502 {
1503         VCPU_EVENT(vcpu, 3, "%s", "free cpu");
1504         trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
1505         kvm_s390_clear_local_irqs(vcpu);
1506         kvm_clear_async_pf_completion_queue(vcpu);
1507         if (!kvm_is_ucontrol(vcpu->kvm))
1508                 sca_del_vcpu(vcpu);
1509
1510         if (kvm_is_ucontrol(vcpu->kvm))
1511                 gmap_remove(vcpu->arch.gmap);
1512
1513         if (vcpu->kvm->arch.use_cmma)
1514                 kvm_s390_vcpu_unsetup_cmma(vcpu);
1515         free_page((unsigned long)(vcpu->arch.sie_block));
1516
1517         kvm_vcpu_uninit(vcpu);
1518         kmem_cache_free(kvm_vcpu_cache, vcpu);
1519 }
1520
1521 static void kvm_free_vcpus(struct kvm *kvm)
1522 {
1523         unsigned int i;
1524         struct kvm_vcpu *vcpu;
1525
1526         kvm_for_each_vcpu(i, vcpu, kvm)
1527                 kvm_arch_vcpu_destroy(vcpu);
1528
1529         mutex_lock(&kvm->lock);
1530         for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
1531                 kvm->vcpus[i] = NULL;
1532
1533         atomic_set(&kvm->online_vcpus, 0);
1534         mutex_unlock(&kvm->lock);
1535 }
1536
1537 void kvm_arch_destroy_vm(struct kvm *kvm)
1538 {
1539         kvm_free_vcpus(kvm);
1540         sca_dispose(kvm);
1541         debug_unregister(kvm->arch.dbf);
1542         free_page((unsigned long)kvm->arch.sie_page2);
1543         if (!kvm_is_ucontrol(kvm))
1544                 gmap_remove(kvm->arch.gmap);
1545         kvm_s390_destroy_adapters(kvm);
1546         kvm_s390_clear_float_irqs(kvm);
1547         kvm_s390_vsie_destroy(kvm);
1548         KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
1549 }
1550
1551 /* Section: vcpu related */
1552 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
1553 {
1554         vcpu->arch.gmap = gmap_create(current->mm, -1UL);
1555         if (!vcpu->arch.gmap)
1556                 return -ENOMEM;
1557         vcpu->arch.gmap->private = vcpu->kvm;
1558
1559         return 0;
1560 }
1561
1562 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
1563 {
1564         read_lock(&vcpu->kvm->arch.sca_lock);
1565         if (vcpu->kvm->arch.use_esca) {
1566                 struct esca_block *sca = vcpu->kvm->arch.sca;
1567
1568                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1569                 sca->cpu[vcpu->vcpu_id].sda = 0;
1570         } else {
1571                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1572
1573                 clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1574                 sca->cpu[vcpu->vcpu_id].sda = 0;
1575         }
1576         read_unlock(&vcpu->kvm->arch.sca_lock);
1577 }
1578
1579 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
1580 {
1581         read_lock(&vcpu->kvm->arch.sca_lock);
1582         if (vcpu->kvm->arch.use_esca) {
1583                 struct esca_block *sca = vcpu->kvm->arch.sca;
1584
1585                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1586                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1587                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
1588                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1589                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
1590         } else {
1591                 struct bsca_block *sca = vcpu->kvm->arch.sca;
1592
1593                 sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
1594                 vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
1595                 vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
1596                 set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
1597         }
1598         read_unlock(&vcpu->kvm->arch.sca_lock);
1599 }
1600
1601 /* Basic SCA to Extended SCA data copy routines */
1602 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
1603 {
1604         d->sda = s->sda;
1605         d->sigp_ctrl.c = s->sigp_ctrl.c;
1606         d->sigp_ctrl.scn = s->sigp_ctrl.scn;
1607 }
1608
1609 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
1610 {
1611         int i;
1612
1613         d->ipte_control = s->ipte_control;
1614         d->mcn[0] = s->mcn;
1615         for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
1616                 sca_copy_entry(&d->cpu[i], &s->cpu[i]);
1617 }
1618
1619 static int sca_switch_to_extended(struct kvm *kvm)
1620 {
1621         struct bsca_block *old_sca = kvm->arch.sca;
1622         struct esca_block *new_sca;
1623         struct kvm_vcpu *vcpu;
1624         unsigned int vcpu_idx;
1625         u32 scaol, scaoh;
1626
1627         new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
1628         if (!new_sca)
1629                 return -ENOMEM;
1630
1631         scaoh = (u32)((u64)(new_sca) >> 32);
1632         scaol = (u32)(u64)(new_sca) & ~0x3fU;
1633
1634         kvm_s390_vcpu_block_all(kvm);
1635         write_lock(&kvm->arch.sca_lock);
1636
1637         sca_copy_b_to_e(new_sca, old_sca);
1638
1639         kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
1640                 vcpu->arch.sie_block->scaoh = scaoh;
1641                 vcpu->arch.sie_block->scaol = scaol;
1642                 vcpu->arch.sie_block->ecb2 |= 0x04U;
1643         }
1644         kvm->arch.sca = new_sca;
1645         kvm->arch.use_esca = 1;
1646
1647         write_unlock(&kvm->arch.sca_lock);
1648         kvm_s390_vcpu_unblock_all(kvm);
1649
1650         free_page((unsigned long)old_sca);
1651
1652         VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
1653                  old_sca, kvm->arch.sca);
1654         return 0;
1655 }
1656
1657 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
1658 {
1659         int rc;
1660
1661         if (id < KVM_S390_BSCA_CPU_SLOTS)
1662                 return true;
1663         if (!sclp.has_esca || !sclp.has_64bscao)
1664                 return false;
1665
1666         mutex_lock(&kvm->lock);
1667         rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
1668         mutex_unlock(&kvm->lock);
1669
1670         return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
1671 }
1672
1673 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
1674 {
1675         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1676         kvm_clear_async_pf_completion_queue(vcpu);
1677         vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
1678                                     KVM_SYNC_GPRS |
1679                                     KVM_SYNC_ACRS |
1680                                     KVM_SYNC_CRS |
1681                                     KVM_SYNC_ARCH0 |
1682                                     KVM_SYNC_PFAULT;
1683         kvm_s390_set_prefix(vcpu, 0);
1684         if (test_kvm_facility(vcpu->kvm, 64))
1685                 vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
1686         /* fprs can be synchronized via vrs, even if the guest has no vx. With
1687          * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
1688          */
1689         if (MACHINE_HAS_VX)
1690                 vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
1691         else
1692                 vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
1693
1694         if (kvm_is_ucontrol(vcpu->kvm))
1695                 return __kvm_ucontrol_vcpu_init(vcpu);
1696
1697         return 0;
1698 }
1699
1700 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1701 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1702 {
1703         WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
1704         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1705         vcpu->arch.cputm_start = get_tod_clock_fast();
1706         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1707 }
1708
1709 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1710 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1711 {
1712         WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
1713         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1714         vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1715         vcpu->arch.cputm_start = 0;
1716         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1717 }
1718
1719 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1720 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1721 {
1722         WARN_ON_ONCE(vcpu->arch.cputm_enabled);
1723         vcpu->arch.cputm_enabled = true;
1724         __start_cpu_timer_accounting(vcpu);
1725 }
1726
1727 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
1728 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1729 {
1730         WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
1731         __stop_cpu_timer_accounting(vcpu);
1732         vcpu->arch.cputm_enabled = false;
1733 }
1734
1735 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1736 {
1737         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1738         __enable_cpu_timer_accounting(vcpu);
1739         preempt_enable();
1740 }
1741
1742 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
1743 {
1744         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1745         __disable_cpu_timer_accounting(vcpu);
1746         preempt_enable();
1747 }
1748
1749 /* set the cpu timer - may only be called from the VCPU thread itself */
1750 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
1751 {
1752         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1753         raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
1754         if (vcpu->arch.cputm_enabled)
1755                 vcpu->arch.cputm_start = get_tod_clock_fast();
1756         vcpu->arch.sie_block->cputm = cputm;
1757         raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
1758         preempt_enable();
1759 }
1760
1761 /* update and get the cpu timer - can also be called from other VCPU threads */
1762 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
1763 {
1764         unsigned int seq;
1765         __u64 value;
1766
1767         if (unlikely(!vcpu->arch.cputm_enabled))
1768                 return vcpu->arch.sie_block->cputm;
1769
1770         preempt_disable(); /* protect from TOD sync and vcpu_load/put */
1771         do {
1772                 seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
1773                 /*
1774                  * If the writer would ever execute a read in the critical
1775                  * section, e.g. in irq context, we have a deadlock.
1776                  */
1777                 WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
1778                 value = vcpu->arch.sie_block->cputm;
1779                 /* if cputm_start is 0, accounting is being started/stopped */
1780                 if (likely(vcpu->arch.cputm_start))
1781                         value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
1782         } while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
1783         preempt_enable();
1784         return value;
1785 }
1786
1787 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1788 {
1789         /* Save host register state */
1790         save_fpu_regs();
1791         vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
1792         vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
1793
1794         if (MACHINE_HAS_VX)
1795                 current->thread.fpu.regs = vcpu->run->s.regs.vrs;
1796         else
1797                 current->thread.fpu.regs = vcpu->run->s.regs.fprs;
1798         current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
1799         if (test_fp_ctl(current->thread.fpu.fpc))
1800                 /* User space provided an invalid FPC, let's clear it */
1801                 current->thread.fpu.fpc = 0;
1802
1803         save_access_regs(vcpu->arch.host_acrs);
1804         restore_access_regs(vcpu->run->s.regs.acrs);
1805         gmap_enable(vcpu->arch.enabled_gmap);
1806         atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1807         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1808                 __start_cpu_timer_accounting(vcpu);
1809         vcpu->cpu = cpu;
1810 }
1811
1812 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
1813 {
1814         vcpu->cpu = -1;
1815         if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
1816                 __stop_cpu_timer_accounting(vcpu);
1817         atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
1818         vcpu->arch.enabled_gmap = gmap_get_enabled();
1819         gmap_disable(vcpu->arch.enabled_gmap);
1820
1821         /* Save guest register state */
1822         save_fpu_regs();
1823         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
1824
1825         /* Restore host register state */
1826         current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
1827         current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
1828
1829         save_access_regs(vcpu->run->s.regs.acrs);
1830         restore_access_regs(vcpu->arch.host_acrs);
1831 }
1832
1833 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
1834 {
1835         /* this equals initial cpu reset in pop, but we don't switch to ESA */
1836         vcpu->arch.sie_block->gpsw.mask = 0UL;
1837         vcpu->arch.sie_block->gpsw.addr = 0UL;
1838         kvm_s390_set_prefix(vcpu, 0);
1839         kvm_s390_set_cpu_timer(vcpu, 0);
1840         vcpu->arch.sie_block->ckc       = 0UL;
1841         vcpu->arch.sie_block->todpr     = 0;
1842         memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
1843         vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
1844         vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
1845         /* make sure the new fpc will be lazily loaded */
1846         save_fpu_regs();
1847         current->thread.fpu.fpc = 0;
1848         vcpu->arch.sie_block->gbea = 1;
1849         vcpu->arch.sie_block->pp = 0;
1850         vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
1851         kvm_clear_async_pf_completion_queue(vcpu);
1852         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
1853                 kvm_s390_vcpu_stop(vcpu);
1854         kvm_s390_clear_local_irqs(vcpu);
1855 }
1856
1857 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
1858 {
1859         mutex_lock(&vcpu->kvm->lock);
1860         preempt_disable();
1861         vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
1862         preempt_enable();
1863         mutex_unlock(&vcpu->kvm->lock);
1864         if (!kvm_is_ucontrol(vcpu->kvm)) {
1865                 vcpu->arch.gmap = vcpu->kvm->arch.gmap;
1866                 sca_add_vcpu(vcpu);
1867         }
1868         if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
1869                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
1870         /* make vcpu_load load the right gmap on the first trigger */
1871         vcpu->arch.enabled_gmap = vcpu->arch.gmap;
1872 }
1873
1874 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
1875 {
1876         if (!test_kvm_facility(vcpu->kvm, 76))
1877                 return;
1878
1879         vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
1880
1881         if (vcpu->kvm->arch.crypto.aes_kw)
1882                 vcpu->arch.sie_block->ecb3 |= ECB3_AES;
1883         if (vcpu->kvm->arch.crypto.dea_kw)
1884                 vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
1885
1886         vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
1887 }
1888
1889 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
1890 {
1891         free_page(vcpu->arch.sie_block->cbrlo);
1892         vcpu->arch.sie_block->cbrlo = 0;
1893 }
1894
1895 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
1896 {
1897         vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
1898         if (!vcpu->arch.sie_block->cbrlo)
1899                 return -ENOMEM;
1900
1901         vcpu->arch.sie_block->ecb2 |= 0x80;
1902         vcpu->arch.sie_block->ecb2 &= ~0x08;
1903         return 0;
1904 }
1905
1906 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
1907 {
1908         struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
1909
1910         vcpu->arch.sie_block->ibc = model->ibc;
1911         if (test_kvm_facility(vcpu->kvm, 7))
1912                 vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
1913 }
1914
1915 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
1916 {
1917         int rc = 0;
1918
1919         atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
1920                                                     CPUSTAT_SM |
1921                                                     CPUSTAT_STOPPED);
1922
1923         if (test_kvm_facility(vcpu->kvm, 78))
1924                 atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
1925         else if (test_kvm_facility(vcpu->kvm, 8))
1926                 atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
1927
1928         kvm_s390_vcpu_setup_model(vcpu);
1929
1930         /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
1931         if (MACHINE_HAS_ESOP)
1932                 vcpu->arch.sie_block->ecb |= 0x02;
1933         if (test_kvm_facility(vcpu->kvm, 9))
1934                 vcpu->arch.sie_block->ecb |= 0x04;
1935         if (test_kvm_facility(vcpu->kvm, 73))
1936                 vcpu->arch.sie_block->ecb |= 0x10;
1937
1938         if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
1939                 vcpu->arch.sie_block->ecb2 |= 0x08;
1940         vcpu->arch.sie_block->eca = 0x1002000U;
1941         if (sclp.has_cei)
1942                 vcpu->arch.sie_block->eca |= 0x80000000U;
1943         if (sclp.has_ib)
1944                 vcpu->arch.sie_block->eca |= 0x40000000U;
1945         if (sclp.has_siif)
1946                 vcpu->arch.sie_block->eca |= 1;
1947         if (sclp.has_sigpif)
1948                 vcpu->arch.sie_block->eca |= 0x10000000U;
1949         if (test_kvm_facility(vcpu->kvm, 64))
1950                 vcpu->arch.sie_block->ecb3 |= 0x01;
1951         if (test_kvm_facility(vcpu->kvm, 129)) {
1952                 vcpu->arch.sie_block->eca |= 0x00020000;
1953                 vcpu->arch.sie_block->ecd |= 0x20000000;
1954         }
1955         vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
1956         vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
1957
1958         if (vcpu->kvm->arch.use_cmma) {
1959                 rc = kvm_s390_vcpu_setup_cmma(vcpu);
1960                 if (rc)
1961                         return rc;
1962         }
1963         hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1964         vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
1965
1966         kvm_s390_vcpu_crypto_setup(vcpu);
1967
1968         return rc;
1969 }
1970
1971 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
1972                                       unsigned int id)
1973 {
1974         struct kvm_vcpu *vcpu;
1975         struct sie_page *sie_page;
1976         int rc = -EINVAL;
1977
1978         if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
1979                 goto out;
1980
1981         rc = -ENOMEM;
1982
1983         vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1984         if (!vcpu)
1985                 goto out;
1986
1987         sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
1988         if (!sie_page)
1989                 goto out_free_cpu;
1990
1991         vcpu->arch.sie_block = &sie_page->sie_block;
1992         vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
1993
1994         /* the real guest size will always be smaller than msl */
1995         vcpu->arch.sie_block->mso = 0;
1996         vcpu->arch.sie_block->msl = sclp.hamax;
1997
1998         vcpu->arch.sie_block->icpua = id;
1999         spin_lock_init(&vcpu->arch.local_int.lock);
2000         vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2001         vcpu->arch.local_int.wq = &vcpu->wq;
2002         vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2003         seqcount_init(&vcpu->arch.cputm_seqcount);
2004
2005         rc = kvm_vcpu_init(vcpu, kvm, id);
2006         if (rc)
2007                 goto out_free_sie_block;
2008         VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2009                  vcpu->arch.sie_block);
2010         trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2011
2012         return vcpu;
2013 out_free_sie_block:
2014         free_page((unsigned long)(vcpu->arch.sie_block));
2015 out_free_cpu:
2016         kmem_cache_free(kvm_vcpu_cache, vcpu);
2017 out:
2018         return ERR_PTR(rc);
2019 }
2020
2021 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2022 {
2023         return kvm_s390_vcpu_has_irq(vcpu, 0);
2024 }
2025
2026 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2027 {
2028         atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2029         exit_sie(vcpu);
2030 }
2031
2032 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2033 {
2034         atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2035 }
2036
2037 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2038 {
2039         atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2040         exit_sie(vcpu);
2041 }
2042
2043 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2044 {
2045         atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2046 }
2047
2048 /*
2049  * Kick a guest cpu out of SIE and wait until SIE is not running.
2050  * If the CPU is not running (e.g. waiting as idle) the function will
2051  * return immediately. */
2052 void exit_sie(struct kvm_vcpu *vcpu)
2053 {
2054         atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2055         while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2056                 cpu_relax();
2057 }
2058
2059 /* Kick a guest cpu out of SIE to process a request synchronously */
2060 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2061 {
2062         kvm_make_request(req, vcpu);
2063         kvm_s390_vcpu_request(vcpu);
2064 }
2065
2066 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2067                               unsigned long end)
2068 {
2069         struct kvm *kvm = gmap->private;
2070         struct kvm_vcpu *vcpu;
2071         unsigned long prefix;
2072         int i;
2073
2074         if (gmap_is_shadow(gmap))
2075                 return;
2076         if (start >= 1UL << 31)
2077                 /* We are only interested in prefix pages */
2078                 return;
2079         kvm_for_each_vcpu(i, vcpu, kvm) {
2080                 /* match against both prefix pages */
2081                 prefix = kvm_s390_get_prefix(vcpu);
2082                 if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2083                         VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2084                                    start, end);
2085                         kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2086                 }
2087         }
2088 }
2089
2090 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2091 {
2092         /* kvm common code refers to this, but never calls it */
2093         BUG();
2094         return 0;
2095 }
2096
2097 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2098                                            struct kvm_one_reg *reg)
2099 {
2100         int r = -EINVAL;
2101
2102         switch (reg->id) {
2103         case KVM_REG_S390_TODPR:
2104                 r = put_user(vcpu->arch.sie_block->todpr,
2105                              (u32 __user *)reg->addr);
2106                 break;
2107         case KVM_REG_S390_EPOCHDIFF:
2108                 r = put_user(vcpu->arch.sie_block->epoch,
2109                              (u64 __user *)reg->addr);
2110                 break;
2111         case KVM_REG_S390_CPU_TIMER:
2112                 r = put_user(kvm_s390_get_cpu_timer(vcpu),
2113                              (u64 __user *)reg->addr);
2114                 break;
2115         case KVM_REG_S390_CLOCK_COMP:
2116                 r = put_user(vcpu->arch.sie_block->ckc,
2117                              (u64 __user *)reg->addr);
2118                 break;
2119         case KVM_REG_S390_PFTOKEN:
2120                 r = put_user(vcpu->arch.pfault_token,
2121                              (u64 __user *)reg->addr);
2122                 break;
2123         case KVM_REG_S390_PFCOMPARE:
2124                 r = put_user(vcpu->arch.pfault_compare,
2125                              (u64 __user *)reg->addr);
2126                 break;
2127         case KVM_REG_S390_PFSELECT:
2128                 r = put_user(vcpu->arch.pfault_select,
2129                              (u64 __user *)reg->addr);
2130                 break;
2131         case KVM_REG_S390_PP:
2132                 r = put_user(vcpu->arch.sie_block->pp,
2133                              (u64 __user *)reg->addr);
2134                 break;
2135         case KVM_REG_S390_GBEA:
2136                 r = put_user(vcpu->arch.sie_block->gbea,
2137                              (u64 __user *)reg->addr);
2138                 break;
2139         default:
2140                 break;
2141         }
2142
2143         return r;
2144 }
2145
2146 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2147                                            struct kvm_one_reg *reg)
2148 {
2149         int r = -EINVAL;
2150         __u64 val;
2151
2152         switch (reg->id) {
2153         case KVM_REG_S390_TODPR:
2154                 r = get_user(vcpu->arch.sie_block->todpr,
2155                              (u32 __user *)reg->addr);
2156                 break;
2157         case KVM_REG_S390_EPOCHDIFF:
2158                 r = get_user(vcpu->arch.sie_block->epoch,
2159                              (u64 __user *)reg->addr);
2160                 break;
2161         case KVM_REG_S390_CPU_TIMER:
2162                 r = get_user(val, (u64 __user *)reg->addr);
2163                 if (!r)
2164                         kvm_s390_set_cpu_timer(vcpu, val);
2165                 break;
2166         case KVM_REG_S390_CLOCK_COMP:
2167                 r = get_user(vcpu->arch.sie_block->ckc,
2168                              (u64 __user *)reg->addr);
2169                 break;
2170         case KVM_REG_S390_PFTOKEN:
2171                 r = get_user(vcpu->arch.pfault_token,
2172                              (u64 __user *)reg->addr);
2173                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2174                         kvm_clear_async_pf_completion_queue(vcpu);
2175                 break;
2176         case KVM_REG_S390_PFCOMPARE:
2177                 r = get_user(vcpu->arch.pfault_compare,
2178                              (u64 __user *)reg->addr);
2179                 break;
2180         case KVM_REG_S390_PFSELECT:
2181                 r = get_user(vcpu->arch.pfault_select,
2182                              (u64 __user *)reg->addr);
2183                 break;
2184         case KVM_REG_S390_PP:
2185                 r = get_user(vcpu->arch.sie_block->pp,
2186                              (u64 __user *)reg->addr);
2187                 break;
2188         case KVM_REG_S390_GBEA:
2189                 r = get_user(vcpu->arch.sie_block->gbea,
2190                              (u64 __user *)reg->addr);
2191                 break;
2192         default:
2193                 break;
2194         }
2195
2196         return r;
2197 }
2198
2199 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2200 {
2201         kvm_s390_vcpu_initial_reset(vcpu);
2202         return 0;
2203 }
2204
2205 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2206 {
2207         memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2208         return 0;
2209 }
2210
2211 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2212 {
2213         memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2214         return 0;
2215 }
2216
2217 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2218                                   struct kvm_sregs *sregs)
2219 {
2220         memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2221         memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2222         restore_access_regs(vcpu->run->s.regs.acrs);
2223         return 0;
2224 }
2225
2226 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2227                                   struct kvm_sregs *sregs)
2228 {
2229         memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2230         memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2231         return 0;
2232 }
2233
2234 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2235 {
2236         /* make sure the new values will be lazily loaded */
2237         save_fpu_regs();
2238         if (test_fp_ctl(fpu->fpc))
2239                 return -EINVAL;
2240         current->thread.fpu.fpc = fpu->fpc;
2241         if (MACHINE_HAS_VX)
2242                 convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2243                                  (freg_t *) fpu->fprs);
2244         else
2245                 memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2246         return 0;
2247 }
2248
2249 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2250 {
2251         /* make sure we have the latest values */
2252         save_fpu_regs();
2253         if (MACHINE_HAS_VX)
2254                 convert_vx_to_fp((freg_t *) fpu->fprs,
2255                                  (__vector128 *) vcpu->run->s.regs.vrs);
2256         else
2257                 memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2258         fpu->fpc = current->thread.fpu.fpc;
2259         return 0;
2260 }
2261
2262 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2263 {
2264         int rc = 0;
2265
2266         if (!is_vcpu_stopped(vcpu))
2267                 rc = -EBUSY;
2268         else {
2269                 vcpu->run->psw_mask = psw.mask;
2270                 vcpu->run->psw_addr = psw.addr;
2271         }
2272         return rc;
2273 }
2274
2275 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2276                                   struct kvm_translation *tr)
2277 {
2278         return -EINVAL; /* not implemented yet */
2279 }
2280
2281 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2282                               KVM_GUESTDBG_USE_HW_BP | \
2283                               KVM_GUESTDBG_ENABLE)
2284
2285 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2286                                         struct kvm_guest_debug *dbg)
2287 {
2288         int rc = 0;
2289
2290         vcpu->guest_debug = 0;
2291         kvm_s390_clear_bp_data(vcpu);
2292
2293         if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2294                 return -EINVAL;
2295         if (!sclp.has_gpere)
2296                 return -EINVAL;
2297
2298         if (dbg->control & KVM_GUESTDBG_ENABLE) {
2299                 vcpu->guest_debug = dbg->control;
2300                 /* enforce guest PER */
2301                 atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2302
2303                 if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2304                         rc = kvm_s390_import_bp_data(vcpu, dbg);
2305         } else {
2306                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2307                 vcpu->arch.guestdbg.last_bp = 0;
2308         }
2309
2310         if (rc) {
2311                 vcpu->guest_debug = 0;
2312                 kvm_s390_clear_bp_data(vcpu);
2313                 atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2314         }
2315
2316         return rc;
2317 }
2318
2319 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2320                                     struct kvm_mp_state *mp_state)
2321 {
2322         /* CHECK_STOP and LOAD are not supported yet */
2323         return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2324                                        KVM_MP_STATE_OPERATING;
2325 }
2326
2327 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2328                                     struct kvm_mp_state *mp_state)
2329 {
2330         int rc = 0;
2331
2332         /* user space knows about this interface - let it control the state */
2333         vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2334
2335         switch (mp_state->mp_state) {
2336         case KVM_MP_STATE_STOPPED:
2337                 kvm_s390_vcpu_stop(vcpu);
2338                 break;
2339         case KVM_MP_STATE_OPERATING:
2340                 kvm_s390_vcpu_start(vcpu);
2341                 break;
2342         case KVM_MP_STATE_LOAD:
2343         case KVM_MP_STATE_CHECK_STOP:
2344                 /* fall through - CHECK_STOP and LOAD are not supported yet */
2345         default:
2346                 rc = -ENXIO;
2347         }
2348
2349         return rc;
2350 }
2351
2352 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2353 {
2354         return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2355 }
2356
2357 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2358 {
2359 retry:
2360         kvm_s390_vcpu_request_handled(vcpu);
2361         if (!vcpu->requests)
2362                 return 0;
2363         /*
2364          * We use MMU_RELOAD just to re-arm the ipte notifier for the
2365          * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2366          * This ensures that the ipte instruction for this request has
2367          * already finished. We might race against a second unmapper that
2368          * wants to set the blocking bit. Lets just retry the request loop.
2369          */
2370         if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2371                 int rc;
2372                 rc = gmap_mprotect_notify(vcpu->arch.gmap,
2373                                           kvm_s390_get_prefix(vcpu),
2374                                           PAGE_SIZE * 2, PROT_WRITE);
2375                 if (rc) {
2376                         kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2377                         return rc;
2378                 }
2379                 goto retry;
2380         }
2381
2382         if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2383                 vcpu->arch.sie_block->ihcpu = 0xffff;
2384                 goto retry;
2385         }
2386
2387         if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2388                 if (!ibs_enabled(vcpu)) {
2389                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2390                         atomic_or(CPUSTAT_IBS,
2391                                         &vcpu->arch.sie_block->cpuflags);
2392                 }
2393                 goto retry;
2394         }
2395
2396         if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2397                 if (ibs_enabled(vcpu)) {
2398                         trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2399                         atomic_andnot(CPUSTAT_IBS,
2400                                           &vcpu->arch.sie_block->cpuflags);
2401                 }
2402                 goto retry;
2403         }
2404
2405         if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2406                 vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2407                 goto retry;
2408         }
2409
2410         /* nothing to do, just clear the request */
2411         clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
2412
2413         return 0;
2414 }
2415
2416 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2417 {
2418         struct kvm_vcpu *vcpu;
2419         int i;
2420
2421         mutex_lock(&kvm->lock);
2422         preempt_disable();
2423         kvm->arch.epoch = tod - get_tod_clock();
2424         kvm_s390_vcpu_block_all(kvm);
2425         kvm_for_each_vcpu(i, vcpu, kvm)
2426                 vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2427         kvm_s390_vcpu_unblock_all(kvm);
2428         preempt_enable();
2429         mutex_unlock(&kvm->lock);
2430 }
2431
2432 /**
2433  * kvm_arch_fault_in_page - fault-in guest page if necessary
2434  * @vcpu: The corresponding virtual cpu
2435  * @gpa: Guest physical address
2436  * @writable: Whether the page should be writable or not
2437  *
2438  * Make sure that a guest page has been faulted-in on the host.
2439  *
2440  * Return: Zero on success, negative error code otherwise.
2441  */
2442 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2443 {
2444         return gmap_fault(vcpu->arch.gmap, gpa,
2445                           writable ? FAULT_FLAG_WRITE : 0);
2446 }
2447
2448 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
2449                                       unsigned long token)
2450 {
2451         struct kvm_s390_interrupt inti;
2452         struct kvm_s390_irq irq;
2453
2454         if (start_token) {
2455                 irq.u.ext.ext_params2 = token;
2456                 irq.type = KVM_S390_INT_PFAULT_INIT;
2457                 WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
2458         } else {
2459                 inti.type = KVM_S390_INT_PFAULT_DONE;
2460                 inti.parm64 = token;
2461                 WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
2462         }
2463 }
2464
2465 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
2466                                      struct kvm_async_pf *work)
2467 {
2468         trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
2469         __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
2470 }
2471
2472 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2473                                  struct kvm_async_pf *work)
2474 {
2475         trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
2476         __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
2477 }
2478
2479 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2480                                struct kvm_async_pf *work)
2481 {
2482         /* s390 will always inject the page directly */
2483 }
2484
2485 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
2486 {
2487         /*
2488          * s390 will always inject the page directly,
2489          * but we still want check_async_completion to cleanup
2490          */
2491         return true;
2492 }
2493
2494 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
2495 {
2496         hva_t hva;
2497         struct kvm_arch_async_pf arch;
2498         int rc;
2499
2500         if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2501                 return 0;
2502         if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
2503             vcpu->arch.pfault_compare)
2504                 return 0;
2505         if (psw_extint_disabled(vcpu))
2506                 return 0;
2507         if (kvm_s390_vcpu_has_irq(vcpu, 0))
2508                 return 0;
2509         if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
2510                 return 0;
2511         if (!vcpu->arch.gmap->pfault_enabled)
2512                 return 0;
2513
2514         hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
2515         hva += current->thread.gmap_addr & ~PAGE_MASK;
2516         if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
2517                 return 0;
2518
2519         rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
2520         return rc;
2521 }
2522
2523 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
2524 {
2525         int rc, cpuflags;
2526
2527         /*
2528          * On s390 notifications for arriving pages will be delivered directly
2529          * to the guest but the house keeping for completed pfaults is
2530          * handled outside the worker.
2531          */
2532         kvm_check_async_pf_completion(vcpu);
2533
2534         vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
2535         vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
2536
2537         if (need_resched())
2538                 schedule();
2539
2540         if (test_cpu_flag(CIF_MCCK_PENDING))
2541                 s390_handle_mcck();
2542
2543         if (!kvm_is_ucontrol(vcpu->kvm)) {
2544                 rc = kvm_s390_deliver_pending_interrupts(vcpu);
2545                 if (rc)
2546                         return rc;
2547         }
2548
2549         rc = kvm_s390_handle_requests(vcpu);
2550         if (rc)
2551                 return rc;
2552
2553         if (guestdbg_enabled(vcpu)) {
2554                 kvm_s390_backup_guest_per_regs(vcpu);
2555                 kvm_s390_patch_guest_per_regs(vcpu);
2556         }
2557
2558         vcpu->arch.sie_block->icptcode = 0;
2559         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
2560         VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
2561         trace_kvm_s390_sie_enter(vcpu, cpuflags);
2562
2563         return 0;
2564 }
2565
2566 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
2567 {
2568         struct kvm_s390_pgm_info pgm_info = {
2569                 .code = PGM_ADDRESSING,
2570         };
2571         u8 opcode, ilen;
2572         int rc;
2573
2574         VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
2575         trace_kvm_s390_sie_fault(vcpu);
2576
2577         /*
2578          * We want to inject an addressing exception, which is defined as a
2579          * suppressing or terminating exception. However, since we came here
2580          * by a DAT access exception, the PSW still points to the faulting
2581          * instruction since DAT exceptions are nullifying. So we've got
2582          * to look up the current opcode to get the length of the instruction
2583          * to be able to forward the PSW.
2584          */
2585         rc = read_guest_instr(vcpu, &opcode, 1);
2586         ilen = insn_length(opcode);
2587         if (rc < 0) {
2588                 return rc;
2589         } else if (rc) {
2590                 /* Instruction-Fetching Exceptions - we can't detect the ilen.
2591                  * Forward by arbitrary ilc, injection will take care of
2592                  * nullification if necessary.
2593                  */
2594                 pgm_info = vcpu->arch.pgm;
2595                 ilen = 4;
2596         }
2597         pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
2598         kvm_s390_forward_psw(vcpu, ilen);
2599         return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
2600 }
2601
2602 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
2603 {
2604         VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
2605                    vcpu->arch.sie_block->icptcode);
2606         trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
2607
2608         if (guestdbg_enabled(vcpu))
2609                 kvm_s390_restore_guest_per_regs(vcpu);
2610
2611         vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
2612         vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
2613
2614         if (vcpu->arch.sie_block->icptcode > 0) {
2615                 int rc = kvm_handle_sie_intercept(vcpu);
2616
2617                 if (rc != -EOPNOTSUPP)
2618                         return rc;
2619                 vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
2620                 vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
2621                 vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
2622                 vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
2623                 return -EREMOTE;
2624         } else if (exit_reason != -EFAULT) {
2625                 vcpu->stat.exit_null++;
2626                 return 0;
2627         } else if (kvm_is_ucontrol(vcpu->kvm)) {
2628                 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
2629                 vcpu->run->s390_ucontrol.trans_exc_code =
2630                                                 current->thread.gmap_addr;
2631                 vcpu->run->s390_ucontrol.pgm_code = 0x10;
2632                 return -EREMOTE;
2633         } else if (current->thread.gmap_pfault) {
2634                 trace_kvm_s390_major_guest_pfault(vcpu);
2635                 current->thread.gmap_pfault = 0;
2636                 if (kvm_arch_setup_async_pf(vcpu))
2637                         return 0;
2638                 return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
2639         }
2640         return vcpu_post_run_fault_in_sie(vcpu);
2641 }
2642
2643 static int __vcpu_run(struct kvm_vcpu *vcpu)
2644 {
2645         int rc, exit_reason;
2646
2647         /*
2648          * We try to hold kvm->srcu during most of vcpu_run (except when run-
2649          * ning the guest), so that memslots (and other stuff) are protected
2650          */
2651         vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2652
2653         do {
2654                 rc = vcpu_pre_run(vcpu);
2655                 if (rc)
2656                         break;
2657
2658                 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2659                 /*
2660                  * As PF_VCPU will be used in fault handler, between
2661                  * guest_enter and guest_exit should be no uaccess.
2662                  */
2663                 local_irq_disable();
2664                 guest_enter_irqoff();
2665                 __disable_cpu_timer_accounting(vcpu);
2666                 local_irq_enable();
2667                 exit_reason = sie64a(vcpu->arch.sie_block,
2668                                      vcpu->run->s.regs.gprs);
2669                 local_irq_disable();
2670                 __enable_cpu_timer_accounting(vcpu);
2671                 guest_exit_irqoff();
2672                 local_irq_enable();
2673                 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
2674
2675                 rc = vcpu_post_run(vcpu, exit_reason);
2676         } while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
2677
2678         srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
2679         return rc;
2680 }
2681
2682 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2683 {
2684         vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
2685         vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
2686         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
2687                 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
2688         if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
2689                 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
2690                 /* some control register changes require a tlb flush */
2691                 kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2692         }
2693         if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
2694                 kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
2695                 vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
2696                 vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
2697                 vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
2698                 vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
2699         }
2700         if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
2701                 vcpu->arch.pfault_token = kvm_run->s.regs.pft;
2702                 vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
2703                 vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
2704                 if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2705                         kvm_clear_async_pf_completion_queue(vcpu);
2706         }
2707         kvm_run->kvm_dirty_regs = 0;
2708 }
2709
2710 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2711 {
2712         kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
2713         kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
2714         kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
2715         memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
2716         kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
2717         kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
2718         kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
2719         kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
2720         kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
2721         kvm_run->s.regs.pft = vcpu->arch.pfault_token;
2722         kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
2723         kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
2724 }
2725
2726 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
2727 {
2728         int rc;
2729         sigset_t sigsaved;
2730
2731         if (guestdbg_exit_pending(vcpu)) {
2732                 kvm_s390_prepare_debug_exit(vcpu);
2733                 return 0;
2734         }
2735
2736         if (vcpu->sigset_active)
2737                 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
2738
2739         if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
2740                 kvm_s390_vcpu_start(vcpu);
2741         } else if (is_vcpu_stopped(vcpu)) {
2742                 pr_err_ratelimited("can't run stopped vcpu %d\n",
2743                                    vcpu->vcpu_id);
2744                 return -EINVAL;
2745         }
2746
2747         sync_regs(vcpu, kvm_run);
2748         enable_cpu_timer_accounting(vcpu);
2749
2750         might_fault();
2751         rc = __vcpu_run(vcpu);
2752
2753         if (signal_pending(current) && !rc) {
2754                 kvm_run->exit_reason = KVM_EXIT_INTR;
2755                 rc = -EINTR;
2756         }
2757
2758         if (guestdbg_exit_pending(vcpu) && !rc)  {
2759                 kvm_s390_prepare_debug_exit(vcpu);
2760                 rc = 0;
2761         }
2762
2763         if (rc == -EREMOTE) {
2764                 /* userspace support is needed, kvm_run has been prepared */
2765                 rc = 0;
2766         }
2767
2768         disable_cpu_timer_accounting(vcpu);
2769         store_regs(vcpu, kvm_run);
2770
2771         if (vcpu->sigset_active)
2772                 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
2773
2774         vcpu->stat.exit_userspace++;
2775         return rc;
2776 }
2777
2778 /*
2779  * store status at address
2780  * we use have two special cases:
2781  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
2782  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
2783  */
2784 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
2785 {
2786         unsigned char archmode = 1;
2787         freg_t fprs[NUM_FPRS];
2788         unsigned int px;
2789         u64 clkcomp, cputm;
2790         int rc;
2791
2792         px = kvm_s390_get_prefix(vcpu);
2793         if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
2794                 if (write_guest_abs(vcpu, 163, &archmode, 1))
2795                         return -EFAULT;
2796                 gpa = 0;
2797         } else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
2798                 if (write_guest_real(vcpu, 163, &archmode, 1))
2799                         return -EFAULT;
2800                 gpa = px;
2801         } else
2802                 gpa -= __LC_FPREGS_SAVE_AREA;
2803
2804         /* manually convert vector registers if necessary */
2805         if (MACHINE_HAS_VX) {
2806                 convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
2807                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2808                                      fprs, 128);
2809         } else {
2810                 rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
2811                                      vcpu->run->s.regs.fprs, 128);
2812         }
2813         rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
2814                               vcpu->run->s.regs.gprs, 128);
2815         rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
2816                               &vcpu->arch.sie_block->gpsw, 16);
2817         rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
2818                               &px, 4);
2819         rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
2820                               &vcpu->run->s.regs.fpc, 4);
2821         rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
2822                               &vcpu->arch.sie_block->todpr, 4);
2823         cputm = kvm_s390_get_cpu_timer(vcpu);
2824         rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
2825                               &cputm, 8);
2826         clkcomp = vcpu->arch.sie_block->ckc >> 8;
2827         rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
2828                               &clkcomp, 8);
2829         rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
2830                               &vcpu->run->s.regs.acrs, 64);
2831         rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
2832                               &vcpu->arch.sie_block->gcr, 128);
2833         return rc ? -EFAULT : 0;
2834 }
2835
2836 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
2837 {
2838         /*
2839          * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
2840          * copying in vcpu load/put. Lets update our copies before we save
2841          * it into the save area
2842          */
2843         save_fpu_regs();
2844         vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
2845         save_access_regs(vcpu->run->s.regs.acrs);
2846
2847         return kvm_s390_store_status_unloaded(vcpu, addr);
2848 }
2849
2850 /*
2851  * store additional status at address
2852  */
2853 int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
2854                                         unsigned long gpa)
2855 {
2856         /* Only bits 0-53 are used for address formation */
2857         if (!(gpa & ~0x3ff))
2858                 return 0;
2859
2860         return write_guest_abs(vcpu, gpa & ~0x3ff,
2861                                (void *)&vcpu->run->s.regs.vrs, 512);
2862 }
2863
2864 int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
2865 {
2866         if (!test_kvm_facility(vcpu->kvm, 129))
2867                 return 0;
2868
2869         /*
2870          * The guest VXRS are in the host VXRs due to the lazy
2871          * copying in vcpu load/put. We can simply call save_fpu_regs()
2872          * to save the current register state because we are in the
2873          * middle of a load/put cycle.
2874          *
2875          * Let's update our copies before we save it into the save area.
2876          */
2877         save_fpu_regs();
2878
2879         return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
2880 }
2881
2882 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2883 {
2884         kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
2885         kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
2886 }
2887
2888 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
2889 {
2890         unsigned int i;
2891         struct kvm_vcpu *vcpu;
2892
2893         kvm_for_each_vcpu(i, vcpu, kvm) {
2894                 __disable_ibs_on_vcpu(vcpu);
2895         }
2896 }
2897
2898 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
2899 {
2900         if (!sclp.has_ibs)
2901                 return;
2902         kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
2903         kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
2904 }
2905
2906 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
2907 {
2908         int i, online_vcpus, started_vcpus = 0;
2909
2910         if (!is_vcpu_stopped(vcpu))
2911                 return;
2912
2913         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
2914         /* Only one cpu at a time may enter/leave the STOPPED state. */
2915         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2916         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2917
2918         for (i = 0; i < online_vcpus; i++) {
2919                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
2920                         started_vcpus++;
2921         }
2922
2923         if (started_vcpus == 0) {
2924                 /* we're the only active VCPU -> speed it up */
2925                 __enable_ibs_on_vcpu(vcpu);
2926         } else if (started_vcpus == 1) {
2927                 /*
2928                  * As we are starting a second VCPU, we have to disable
2929                  * the IBS facility on all VCPUs to remove potentially
2930                  * oustanding ENABLE requests.
2931                  */
2932                 __disable_ibs_on_all_vcpus(vcpu->kvm);
2933         }
2934
2935         atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2936         /*
2937          * Another VCPU might have used IBS while we were offline.
2938          * Let's play safe and flush the VCPU at startup.
2939          */
2940         kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
2941         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2942         return;
2943 }
2944
2945 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
2946 {
2947         int i, online_vcpus, started_vcpus = 0;
2948         struct kvm_vcpu *started_vcpu = NULL;
2949
2950         if (is_vcpu_stopped(vcpu))
2951                 return;
2952
2953         trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
2954         /* Only one cpu at a time may enter/leave the STOPPED state. */
2955         spin_lock(&vcpu->kvm->arch.start_stop_lock);
2956         online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
2957
2958         /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
2959         kvm_s390_clear_stop_irq(vcpu);
2960
2961         atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
2962         __disable_ibs_on_vcpu(vcpu);
2963
2964         for (i = 0; i < online_vcpus; i++) {
2965                 if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
2966                         started_vcpus++;
2967                         started_vcpu = vcpu->kvm->vcpus[i];
2968                 }
2969         }
2970
2971         if (started_vcpus == 1) {
2972                 /*
2973                  * As we only have one VCPU left, we want to enable the
2974                  * IBS facility for that VCPU to speed it up.
2975                  */
2976                 __enable_ibs_on_vcpu(started_vcpu);
2977         }
2978
2979         spin_unlock(&vcpu->kvm->arch.start_stop_lock);
2980         return;
2981 }
2982
2983 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
2984                                      struct kvm_enable_cap *cap)
2985 {
2986         int r;
2987
2988         if (cap->flags)
2989                 return -EINVAL;
2990
2991         switch (cap->cap) {
2992         case KVM_CAP_S390_CSS_SUPPORT:
2993                 if (!vcpu->kvm->arch.css_support) {
2994                         vcpu->kvm->arch.css_support = 1;
2995                         VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
2996                         trace_kvm_s390_enable_css(vcpu->kvm);
2997                 }
2998                 r = 0;
2999                 break;
3000         default:
3001                 r = -EINVAL;
3002                 break;
3003         }
3004         return r;
3005 }
3006
3007 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3008                                   struct kvm_s390_mem_op *mop)
3009 {
3010         void __user *uaddr = (void __user *)mop->buf;
3011         void *tmpbuf = NULL;
3012         int r, srcu_idx;
3013         const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3014                                     | KVM_S390_MEMOP_F_CHECK_ONLY;
3015
3016         if (mop->flags & ~supported_flags)
3017                 return -EINVAL;
3018
3019         if (mop->size > MEM_OP_MAX_SIZE)
3020                 return -E2BIG;
3021
3022         if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3023                 tmpbuf = vmalloc(mop->size);
3024                 if (!tmpbuf)
3025                         return -ENOMEM;
3026         }
3027
3028         srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3029
3030         switch (mop->op) {
3031         case KVM_S390_MEMOP_LOGICAL_READ:
3032                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3033                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3034                                             mop->size, GACC_FETCH);
3035                         break;
3036                 }
3037                 r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3038                 if (r == 0) {
3039                         if (copy_to_user(uaddr, tmpbuf, mop->size))
3040                                 r = -EFAULT;
3041                 }
3042                 break;
3043         case KVM_S390_MEMOP_LOGICAL_WRITE:
3044                 if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3045                         r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3046                                             mop->size, GACC_STORE);
3047                         break;
3048                 }
3049                 if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3050                         r = -EFAULT;
3051                         break;
3052                 }
3053                 r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3054                 break;
3055         default:
3056                 r = -EINVAL;
3057         }
3058
3059         srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3060
3061         if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3062                 kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3063
3064         vfree(tmpbuf);
3065         return r;
3066 }
3067
3068 long kvm_arch_vcpu_ioctl(struct file *filp,
3069                          unsigned int ioctl, unsigned long arg)
3070 {
3071         struct kvm_vcpu *vcpu = filp->private_data;
3072         void __user *argp = (void __user *)arg;
3073         int idx;
3074         long r;
3075
3076         switch (ioctl) {
3077         case KVM_S390_IRQ: {
3078                 struct kvm_s390_irq s390irq;
3079
3080                 r = -EFAULT;
3081                 if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3082                         break;
3083                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3084                 break;
3085         }
3086         case KVM_S390_INTERRUPT: {
3087                 struct kvm_s390_interrupt s390int;
3088                 struct kvm_s390_irq s390irq;
3089
3090                 r = -EFAULT;
3091                 if (copy_from_user(&s390int, argp, sizeof(s390int)))
3092                         break;
3093                 if (s390int_to_s390irq(&s390int, &s390irq))
3094                         return -EINVAL;
3095                 r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3096                 break;
3097         }
3098         case KVM_S390_STORE_STATUS:
3099                 idx = srcu_read_lock(&vcpu->kvm->srcu);
3100                 r = kvm_s390_vcpu_store_status(vcpu, arg);
3101                 srcu_read_unlock(&vcpu->kvm->srcu, idx);
3102                 break;
3103         case KVM_S390_SET_INITIAL_PSW: {
3104                 psw_t psw;
3105
3106                 r = -EFAULT;
3107                 if (copy_from_user(&psw, argp, sizeof(psw)))
3108                         break;
3109                 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3110                 break;
3111         }
3112         case KVM_S390_INITIAL_RESET:
3113                 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3114                 break;
3115         case KVM_SET_ONE_REG:
3116         case KVM_GET_ONE_REG: {
3117                 struct kvm_one_reg reg;
3118                 r = -EFAULT;
3119                 if (copy_from_user(&reg, argp, sizeof(reg)))
3120                         break;
3121                 if (ioctl == KVM_SET_ONE_REG)
3122                         r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3123                 else
3124                         r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3125                 break;
3126         }
3127 #ifdef CONFIG_KVM_S390_UCONTROL
3128         case KVM_S390_UCAS_MAP: {
3129                 struct kvm_s390_ucas_mapping ucasmap;
3130
3131                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3132                         r = -EFAULT;
3133                         break;
3134                 }
3135
3136                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3137                         r = -EINVAL;
3138                         break;
3139                 }
3140
3141                 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3142                                      ucasmap.vcpu_addr, ucasmap.length);
3143                 break;
3144         }
3145         case KVM_S390_UCAS_UNMAP: {
3146                 struct kvm_s390_ucas_mapping ucasmap;
3147
3148                 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3149                         r = -EFAULT;
3150                         break;
3151                 }
3152
3153                 if (!kvm_is_ucontrol(vcpu->kvm)) {
3154                         r = -EINVAL;
3155                         break;
3156                 }
3157
3158                 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3159                         ucasmap.length);
3160                 break;
3161         }
3162 #endif
3163         case KVM_S390_VCPU_FAULT: {
3164                 r = gmap_fault(vcpu->arch.gmap, arg, 0);
3165                 break;
3166         }
3167         case KVM_ENABLE_CAP:
3168         {
3169                 struct kvm_enable_cap cap;
3170                 r = -EFAULT;
3171                 if (copy_from_user(&cap, argp, sizeof(cap)))
3172                         break;
3173                 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3174                 break;
3175         }
3176         case KVM_S390_MEM_OP: {
3177                 struct kvm_s390_mem_op mem_op;
3178
3179                 if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3180                         r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3181                 else
3182                         r = -EFAULT;
3183                 break;
3184         }
3185         case KVM_S390_SET_IRQ_STATE: {
3186                 struct kvm_s390_irq_state irq_state;
3187
3188                 r = -EFAULT;
3189                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3190                         break;
3191                 if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3192                     irq_state.len == 0 ||
3193                     irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3194                         r = -EINVAL;
3195                         break;
3196                 }
3197                 r = kvm_s390_set_irq_state(vcpu,
3198                                            (void __user *) irq_state.buf,
3199                                            irq_state.len);
3200                 break;
3201         }
3202         case KVM_S390_GET_IRQ_STATE: {
3203                 struct kvm_s390_irq_state irq_state;
3204
3205                 r = -EFAULT;
3206                 if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3207                         break;
3208                 if (irq_state.len == 0) {
3209                         r = -EINVAL;
3210                         break;
3211                 }
3212                 r = kvm_s390_get_irq_state(vcpu,
3213                                            (__u8 __user *)  irq_state.buf,
3214                                            irq_state.len);
3215                 break;
3216         }
3217         default:
3218                 r = -ENOTTY;
3219         }
3220         return r;
3221 }
3222
3223 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3224 {
3225 #ifdef CONFIG_KVM_S390_UCONTROL
3226         if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3227                  && (kvm_is_ucontrol(vcpu->kvm))) {
3228                 vmf->page = virt_to_page(vcpu->arch.sie_block);
3229                 get_page(vmf->page);
3230                 return 0;
3231         }
3232 #endif
3233         return VM_FAULT_SIGBUS;
3234 }
3235
3236 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3237                             unsigned long npages)
3238 {
3239         return 0;
3240 }
3241
3242 /* Section: memory related */
3243 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3244                                    struct kvm_memory_slot *memslot,
3245                                    const struct kvm_userspace_memory_region *mem,
3246                                    enum kvm_mr_change change)
3247 {
3248         /* A few sanity checks. We can have memory slots which have to be
3249            located/ended at a segment boundary (1MB). The memory in userland is
3250            ok to be fragmented into various different vmas. It is okay to mmap()
3251            and munmap() stuff in this slot after doing this call at any time */
3252
3253         if (mem->userspace_addr & 0xffffful)
3254                 return -EINVAL;
3255
3256         if (mem->memory_size & 0xffffful)
3257                 return -EINVAL;
3258
3259         if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3260                 return -EINVAL;
3261
3262         return 0;
3263 }
3264
3265 void kvm_arch_commit_memory_region(struct kvm *kvm,
3266                                 const struct kvm_userspace_memory_region *mem,
3267                                 const struct kvm_memory_slot *old,
3268                                 const struct kvm_memory_slot *new,
3269                                 enum kvm_mr_change change)
3270 {
3271         int rc;
3272
3273         /* If the basics of the memslot do not change, we do not want
3274          * to update the gmap. Every update causes several unnecessary
3275          * segment translation exceptions. This is usually handled just
3276          * fine by the normal fault handler + gmap, but it will also
3277          * cause faults on the prefix page of running guest CPUs.
3278          */
3279         if (old->userspace_addr == mem->userspace_addr &&
3280             old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3281             old->npages * PAGE_SIZE == mem->memory_size)
3282                 return;
3283
3284         rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3285                 mem->guest_phys_addr, mem->memory_size);
3286         if (rc)
3287                 pr_warn("failed to commit memory region\n");
3288         return;
3289 }
3290
3291 static inline unsigned long nonhyp_mask(int i)
3292 {
3293         unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3294
3295         return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3296 }
3297
3298 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3299 {
3300         vcpu->valid_wakeup = false;
3301 }
3302
3303 static int __init kvm_s390_init(void)
3304 {
3305         int i;
3306
3307         if (!sclp.has_sief2) {
3308                 pr_info("SIE not available\n");
3309                 return -ENODEV;
3310         }
3311
3312         for (i = 0; i < 16; i++)
3313                 kvm_s390_fac_list_mask[i] |=
3314                         S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3315
3316         return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3317 }
3318
3319 static void __exit kvm_s390_exit(void)
3320 {
3321         kvm_exit();
3322 }
3323
3324 module_init(kvm_s390_init);
3325 module_exit(kvm_s390_exit);
3326
3327 /*
3328  * Enable autoloading of the kvm module.
3329  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3330  * since x86 takes a different approach.
3331  */
3332 #include <linux/miscdevice.h>
3333 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3334 MODULE_ALIAS("devname:kvm");