[SPARC64]: Fix race between MD update and dr-cpu add.
[powerpc.git] / arch / sparc64 / kernel / ds.c
index b82c03a..ba01533 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/sched.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
-#include <linux/workqueue.h>
+#include <linux/kthread.h>
 #include <linux/cpu.h>
 
 #include <asm/ldc.h>
@@ -20,8 +20,7 @@
 #include <asm/power.h>
 #include <asm/mdesc.h>
 #include <asm/head.h>
-#include <asm/io.h>
-#include <asm/hvtramp.h>
+#include <asm/irq.h>
 
 #define DRV_MODULE_NAME                "ds"
 #define PFX DRV_MODULE_NAME    ": "
@@ -148,9 +147,11 @@ static void domain_shutdown_data(struct ldc_channel *lp,
 static void domain_panic_data(struct ldc_channel *lp,
                              struct ds_cap_state *cp,
                              void *buf, int len);
+#ifdef CONFIG_HOTPLUG_CPU
 static void dr_cpu_data(struct ldc_channel *lp,
                        struct ds_cap_state *cp,
                        void *buf, int len);
+#endif
 static void ds_pri_data(struct ldc_channel *lp,
                        struct ds_cap_state *cp,
                        void *buf, int len);
@@ -171,10 +172,12 @@ struct ds_cap_state ds_states[] = {
                .service_id     = "domain-panic",
                .data           = domain_panic_data,
        },
+#ifdef CONFIG_HOTPLUG_CPU
        {
                .service_id     = "dr-cpu",
                .data           = dr_cpu_data,
        },
+#endif
        {
                .service_id     = "pri",
                .data           = ds_pri_data,
@@ -225,7 +228,7 @@ static struct ds_cap_state *find_cap_by_string(const char *name)
        return NULL;
 }
 
-static int ds_send(struct ldc_channel *lp, void *data, int len)
+static int __ds_send(struct ldc_channel *lp, void *data, int len)
 {
        int err, limit = 1000;
 
@@ -240,6 +243,18 @@ static int ds_send(struct ldc_channel *lp, void *data, int len)
        return err;
 }
 
+static int ds_send(struct ldc_channel *lp, void *data, int len)
+{
+       unsigned long flags;
+       int err;
+
+       spin_lock_irqsave(&ds_lock, flags);
+       err = __ds_send(lp, data, len);
+       spin_unlock_irqrestore(&ds_lock, flags);
+
+       return err;
+}
+
 struct ds_md_update_req {
        __u64                           req_num;
 };
@@ -264,6 +279,8 @@ static void md_update_data(struct ldc_channel *lp,
 
        printk(KERN_INFO PFX "Machine description update.\n");
 
+       mdesc_update();
+
        memset(&pkt, 0, sizeof(pkt));
        pkt.data.tag.type = DS_DATA;
        pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
@@ -272,8 +289,6 @@ static void md_update_data(struct ldc_channel *lp,
        pkt.res.result = DS_OK;
 
        ds_send(lp, &pkt, sizeof(pkt));
-
-       mdesc_update();
 }
 
 struct ds_shutdown_req {
@@ -355,6 +370,7 @@ static void domain_panic_data(struct ldc_channel *lp,
        panic("PANIC requested by LDOM manager.");
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 struct dr_cpu_tag {
        __u64                           req_num;
        __u32                           type;
@@ -387,73 +403,6 @@ struct dr_cpu_resp_entry {
        __u32                           str_off;
 };
 
-/* XXX Put this in some common place. XXX */
-static unsigned long kimage_addr_to_ra(void *p)
-{
-       unsigned long val = (unsigned long) p;
-
-       return kern_base + (val - KERNBASE);
-}
-
-void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
-{
-       extern unsigned long sparc64_ttable_tl0;
-       extern unsigned long kern_locked_tte_data;
-       extern int bigkernel;
-       struct hvtramp_descr *hdesc;
-       unsigned long trampoline_ra;
-       struct trap_per_cpu *tb;
-       u64 tte_vaddr, tte_data;
-       unsigned long hv_err;
-
-       hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
-       if (!hdesc) {
-               printk(KERN_ERR PFX "ldom_startcpu_cpuid: Cannot allocate "
-                      "hvtramp_descr.\n");
-               return;
-       }
-
-       hdesc->cpu = cpu;
-       hdesc->num_mappings = (bigkernel ? 2 : 1);
-
-       tb = &trap_block[cpu];
-       tb->hdesc = hdesc;
-
-       hdesc->fault_info_va = (unsigned long) &tb->fault_info;
-       hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
-
-       hdesc->thread_reg = thread_reg;
-
-       tte_vaddr = (unsigned long) KERNBASE;
-       tte_data = kern_locked_tte_data;
-
-       hdesc->maps[0].vaddr = tte_vaddr;
-       hdesc->maps[0].tte   = tte_data;
-       if (bigkernel) {
-               tte_vaddr += 0x400000;
-               tte_data  += 0x400000;
-               hdesc->maps[1].vaddr = tte_vaddr;
-               hdesc->maps[1].tte   = tte_data;
-       }
-
-       trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
-
-       hv_err = sun4v_cpu_start(cpu, trampoline_ra,
-                                kimage_addr_to_ra(&sparc64_ttable_tl0),
-                                __pa(hdesc));
-}
-
-/* DR cpu requests get queued onto the work list by the
- * dr_cpu_data() callback.  The list is protected by
- * ds_lock, and processed by dr_cpu_process() in order.
- */
-static LIST_HEAD(dr_cpu_work_list);
-
-struct dr_cpu_queue_entry {
-       struct list_head                list;
-       char                            req[0];
-};
-
 static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
 {
        struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
@@ -476,7 +425,7 @@ static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
 
        pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
 
-       ds_send(dp->lp, &pkt, msg_len);
+       __ds_send(dp->lp, &pkt, msg_len);
 }
 
 static void dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
@@ -586,18 +535,34 @@ static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num,
 
                printk(KERN_INFO PFX "Starting cpu %d...\n", cpu);
                err = cpu_up(cpu);
-               if (err)
-                       dr_cpu_mark(resp, cpu, ncpus,
-                                   DR_CPU_RES_FAILURE,
-                                   DR_CPU_STAT_UNCONFIGURED);
+               if (err) {
+                       __u32 res = DR_CPU_RES_FAILURE;
+                       __u32 stat = DR_CPU_STAT_UNCONFIGURED;
+
+                       if (!cpu_present(cpu)) {
+                               /* CPU not present in MD */
+                               res = DR_CPU_RES_NOT_IN_MD;
+                               stat = DR_CPU_STAT_NOT_PRESENT;
+                       } else if (err == -ENODEV) {
+                               /* CPU did not call in successfully */
+                               res = DR_CPU_RES_CPU_NOT_RESPONDING;
+                       }
+
+                       printk(KERN_INFO PFX "CPU startup failed err=%d\n",
+                              err);
+                       dr_cpu_mark(resp, cpu, ncpus, res, stat);
+               }
        }
 
        spin_lock_irqsave(&ds_lock, flags);
-       ds_send(ds_info->lp, resp, resp_len);
+       __ds_send(ds_info->lp, resp, resp_len);
        spin_unlock_irqrestore(&ds_lock, flags);
 
        kfree(resp);
 
+       /* Redistribute IRQs, taking into account the new cpus.  */
+       fixup_irqs();
+
        return 0;
 }
 
@@ -605,7 +570,8 @@ static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num,
                              cpumask_t *mask)
 {
        struct ds_data *resp;
-       int resp_len, ncpus;
+       int resp_len, ncpus, cpu;
+       unsigned long flags;
 
        ncpus = cpus_weight(*mask);
        resp_len = dr_cpu_size_response(ncpus);
@@ -617,93 +583,70 @@ static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num,
                             resp_len, ncpus, mask,
                             DR_CPU_STAT_UNCONFIGURED);
 
-       kfree(resp);
-
-       return -EOPNOTSUPP;
-}
-
-static void dr_cpu_process(struct work_struct *work)
-{
-       struct dr_cpu_queue_entry *qp, *tmp;
-       struct ds_cap_state *cp;
-       unsigned long flags;
-       LIST_HEAD(todo);
-       cpumask_t mask;
+       for_each_cpu_mask(cpu, *mask) {
+               int err;
 
-       cp = find_cap_by_string("dr-cpu");
+               printk(KERN_INFO PFX "CPU[%d]: Shutting down cpu %d...\n",
+                      smp_processor_id(), cpu);
+               err = cpu_down(cpu);
+               if (err)
+                       dr_cpu_mark(resp, cpu, ncpus,
+                                   DR_CPU_RES_FAILURE,
+                                   DR_CPU_STAT_CONFIGURED);
+       }
 
        spin_lock_irqsave(&ds_lock, flags);
-       list_splice(&dr_cpu_work_list, &todo);
+       __ds_send(ds_info->lp, resp, resp_len);
        spin_unlock_irqrestore(&ds_lock, flags);
 
-       list_for_each_entry_safe(qp, tmp, &todo, list) {
-               struct ds_data *data = (struct ds_data *) qp->req;
-               struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
-               u32 *cpu_list = (u32 *) (tag + 1);
-               u64 req_num = tag->req_num;
-               unsigned int i;
-               int err;
-
-               switch (tag->type) {
-               case DR_CPU_CONFIGURE:
-               case DR_CPU_UNCONFIGURE:
-               case DR_CPU_FORCE_UNCONFIGURE:
-                       break;
-
-               default:
-                       dr_cpu_send_error(cp, data);
-                       goto next;
-               }
-
-               purge_dups(cpu_list, tag->num_records);
-
-               cpus_clear(mask);
-               for (i = 0; i < tag->num_records; i++) {
-                       if (cpu_list[i] == CPU_SENTINEL)
-                               continue;
-
-                       if (cpu_list[i] < NR_CPUS)
-                               cpu_set(cpu_list[i], mask);
-               }
-
-               if (tag->type == DR_CPU_CONFIGURE)
-                       err = dr_cpu_configure(cp, req_num, &mask);
-               else
-                       err = dr_cpu_unconfigure(cp, req_num, &mask);
-
-               if (err)
-                       dr_cpu_send_error(cp, data);
+       kfree(resp);
 
-next:
-               list_del(&qp->list);
-               kfree(qp);
-       }
+       return 0;
 }
 
-static DECLARE_WORK(dr_cpu_work, dr_cpu_process);
-
 static void dr_cpu_data(struct ldc_channel *lp,
-                       struct ds_cap_state *dp,
+                       struct ds_cap_state *cp,
                        void *buf, int len)
 {
-       struct dr_cpu_queue_entry *qp;
-       struct ds_data *dpkt = buf;
-       struct dr_cpu_tag *rp;
+       struct ds_data *data = buf;
+       struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
+       u32 *cpu_list = (u32 *) (tag + 1);
+       u64 req_num = tag->req_num;
+       cpumask_t mask;
+       unsigned int i;
+       int err;
 
-       rp = (struct dr_cpu_tag *) (dpkt + 1);
+       switch (tag->type) {
+       case DR_CPU_CONFIGURE:
+       case DR_CPU_UNCONFIGURE:
+       case DR_CPU_FORCE_UNCONFIGURE:
+               break;
 
-       qp = kmalloc(sizeof(struct dr_cpu_queue_entry) + len, GFP_ATOMIC);
-       if (!qp) {
-               struct ds_cap_state *cp;
+       default:
+               dr_cpu_send_error(cp, data);
+               return;
+       }
 
-               cp = find_cap_by_string("dr-cpu");
-               __dr_cpu_send_error(cp, dpkt);
-       } else {
-               memcpy(&qp->req, buf, len);
-               list_add_tail(&qp->list, &dr_cpu_work_list);
-               schedule_work(&dr_cpu_work);
+       purge_dups(cpu_list, tag->num_records);
+
+       cpus_clear(mask);
+       for (i = 0; i < tag->num_records; i++) {
+               if (cpu_list[i] == CPU_SENTINEL)
+                       continue;
+
+               if (cpu_list[i] < NR_CPUS)
+                       cpu_set(cpu_list[i], mask);
        }
+
+       if (tag->type == DR_CPU_CONFIGURE)
+               err = dr_cpu_configure(cp, req_num, &mask);
+       else
+               err = dr_cpu_unconfigure(cp, req_num, &mask);
+
+       if (err)
+               dr_cpu_send_error(cp, data);
 }
+#endif /* CONFIG_HOTPLUG_CPU */
 
 struct ds_pri_msg {
        __u64                           req_num;
@@ -819,7 +762,7 @@ void ldom_set_var(const char *var, const char *value)
                ds_var_doorbell = 0;
                ds_var_response = -1;
 
-               ds_send(dp->lp, &pkt, msg_len);
+               __ds_send(dp->lp, &pkt, msg_len);
                spin_unlock_irqrestore(&ds_lock, flags);
 
                loops = 1000;
@@ -903,7 +846,7 @@ static int register_services(struct ds_info *dp)
                pbuf.req.minor = 0;
                strcpy(pbuf.req.svc_id, cp->service_id);
 
-               err = ds_send(lp, &pbuf, msg_len);
+               err = __ds_send(lp, &pbuf, msg_len);
                if (err > 0)
                        cp->state = CAP_STATE_REG_SENT;
        }
@@ -959,27 +902,97 @@ conn_reset:
        return -ECONNRESET;
 }
 
+static void __send_ds_nack(struct ds_info *dp, u64 handle)
+{
+       struct ds_data_nack nack = {
+               .tag = {
+                       .type = DS_NACK,
+                       .len = (sizeof(struct ds_data_nack) -
+                               sizeof(struct ds_msg_tag)),
+               },
+               .handle = handle,
+               .result = DS_INV_HDL,
+       };
+
+       __ds_send(dp->lp, &nack, sizeof(nack));
+}
+
+static LIST_HEAD(ds_work_list);
+static DECLARE_WAIT_QUEUE_HEAD(ds_wait);
+
+struct ds_queue_entry {
+       struct list_head                list;
+       int                             req_len;
+       int                             __pad;
+       u64                             req[0];
+};
+
+static void process_ds_work(void)
+{
+       struct ds_queue_entry *qp, *tmp;
+       static struct ds_info *dp;
+       unsigned long flags;
+       LIST_HEAD(todo);
+
+       spin_lock_irqsave(&ds_lock, flags);
+       list_splice(&ds_work_list, &todo);
+       INIT_LIST_HEAD(&ds_work_list);
+       spin_unlock_irqrestore(&ds_lock, flags);
+
+       dp = ds_info;
+
+       list_for_each_entry_safe(qp, tmp, &todo, list) {
+               struct ds_data *dpkt = (struct ds_data *) qp->req;
+               struct ds_cap_state *cp = find_cap(dpkt->handle);
+               int req_len = qp->req_len;
+
+               if (!cp) {
+                       printk(KERN_ERR PFX "Data for unknown handle %lu\n",
+                              dpkt->handle);
+
+                       spin_lock_irqsave(&ds_lock, flags);
+                       __send_ds_nack(dp, dpkt->handle);
+                       spin_unlock_irqrestore(&ds_lock, flags);
+               } else {
+                       cp->data(dp->lp, cp, dpkt, req_len);
+               }
+
+               list_del(&qp->list);
+               kfree(qp);
+       }
+}
+
+static int ds_thread(void *__unused)
+{
+       DEFINE_WAIT(wait);
+
+       while (1) {
+               prepare_to_wait(&ds_wait, &wait, TASK_INTERRUPTIBLE);
+               if (list_empty(&ds_work_list))
+                       schedule();
+               finish_wait(&ds_wait, &wait);
+
+               if (kthread_should_stop())
+                       break;
+
+               process_ds_work();
+       }
+
+       return 0;
+}
+
 static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len)
 {
        struct ds_data *dpkt = (struct ds_data *) pkt;
-       struct ds_cap_state *cp = find_cap(dpkt->handle);
-
-       if (!cp) {
-               struct ds_data_nack nack = {
-                       .tag = {
-                               .type = DS_NACK,
-                               .len = (sizeof(struct ds_data_nack) -
-                                       sizeof(struct ds_msg_tag)),
-                       },
-                       .handle = dpkt->handle,
-                       .result = DS_INV_HDL,
-               };
-
-               printk(KERN_ERR PFX "Data for unknown handle %lu\n",
-                      dpkt->handle);
-               ds_send(dp->lp, &nack, sizeof(nack));
+       struct ds_queue_entry *qp;
+
+       qp = kmalloc(sizeof(struct ds_queue_entry) + len, GFP_ATOMIC);
+       if (!qp) {
+               __send_ds_nack(dp, dpkt->handle);
        } else {
-               cp->data(dp->lp, cp, dpkt, len);
+               memcpy(&qp->req, pkt, len);
+               list_add_tail(&qp->list, &ds_work_list);
+               wake_up(&ds_wait);
        }
        return 0;
 }
@@ -995,7 +1008,7 @@ static void ds_up(struct ds_info *dp)
        req.ver.major = 1;
        req.ver.minor = 0;
 
-       err = ds_send(lp, &req, sizeof(req));
+       err = __ds_send(lp, &req, sizeof(req));
        if (err > 0)
                dp->hs_state = DS_HS_START;
 }
@@ -1147,6 +1160,8 @@ static int __init ds_init(void)
        for (i = 0; i < ARRAY_SIZE(ds_states); i++)
                ds_states[i].handle = ((u64)i << 32);
 
+       kthread_run(ds_thread, NULL, "kldomd");
+
        return vio_register_driver(&ds_driver);
 }