[IA64-SGI] SGI Altix cross partition functionality [2/3]
[powerpc.git] / arch / ia64 / sn / kernel / xpc_partition.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9
10 /*
11  * Cross Partition Communication (XPC) partition support.
12  *
13  *      This is the part of XPC that detects the presence/absence of
14  *      other partitions. It provides a heartbeat and monitors the
15  *      heartbeats of other partitions.
16  *
17  */
18
19
20 #include <linux/kernel.h>
21 #include <linux/sysctl.h>
22 #include <linux/cache.h>
23 #include <linux/mmzone.h>
24 #include <linux/nodemask.h>
25 #include <asm/sn/bte.h>
26 #include <asm/sn/intr.h>
27 #include <asm/sn/sn_sal.h>
28 #include <asm/sn/nodepda.h>
29 #include <asm/sn/addrs.h>
30 #include "xpc.h"
31
32
33 /* XPC is exiting flag */
34 int xpc_exiting;
35
36
37 /* SH_IPI_ACCESS shub register value on startup */
38 static u64 xpc_sh1_IPI_access;
39 static u64 xpc_sh2_IPI_access0;
40 static u64 xpc_sh2_IPI_access1;
41 static u64 xpc_sh2_IPI_access2;
42 static u64 xpc_sh2_IPI_access3;
43
44
45 /* original protection values for each node */
46 u64 xpc_prot_vec[MAX_COMPACT_NODES];
47
48
49 /* this partition's reserved page */
50 struct xpc_rsvd_page *xpc_rsvd_page;
51
52 /* this partition's XPC variables (within the reserved page) */
53 struct xpc_vars *xpc_vars;
54 struct xpc_vars_part *xpc_vars_part;
55
56
57 /*
58  * For performance reasons, each entry of xpc_partitions[] is cacheline
59  * aligned. And xpc_partitions[] is padded with an additional entry at the
60  * end so that the last legitimate entry doesn't share its cacheline with
61  * another variable.
62  */
63 struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
64
65
66 /*
67  * Generic buffer used to store a local copy of the remote partitions
68  * reserved page or XPC variables.
69  *
70  * xpc_discovery runs only once and is a seperate thread that is
71  * very likely going to be processing in parallel with receiving
72  * interrupts.
73  */
74 char ____cacheline_aligned
75                 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
76
77
78 /* systune related variables */
79 int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
80 int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
81
82
83 /*
84  * Given a nasid, get the physical address of the  partition's reserved page
85  * for that nasid. This function returns 0 on any error.
86  */
87 static u64
88 xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
89 {
90         bte_result_t bte_res;
91         s64 status;
92         u64 cookie = 0;
93         u64 rp_pa = nasid;      /* seed with nasid */
94         u64 len = 0;
95
96
97         while (1) {
98
99                 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
100                                                                 &len);
101
102                 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
103                         "0x%016lx, address=0x%016lx, len=0x%016lx\n",
104                         status, cookie, rp_pa, len);
105
106                 if (status != SALRET_MORE_PASSES) {
107                         break;
108                 }
109
110                 if (len > buf_size) {
111                         dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
112                         status = SALRET_ERROR;
113                         break;
114                 }
115
116                 bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
117                                         (BTE_NOTIFY | BTE_WACQUIRE), NULL);
118                 if (bte_res != BTE_SUCCESS) {
119                         dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
120                         status = SALRET_ERROR;
121                         break;
122                 }
123         }
124
125         if (status != SALRET_OK) {
126                 rp_pa = 0;
127         }
128         dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
129         return rp_pa;
130 }
131
132
133 /*
134  * Fill the partition reserved page with the information needed by
135  * other partitions to discover we are alive and establish initial
136  * communications.
137  */
138 struct xpc_rsvd_page *
139 xpc_rsvd_page_init(void)
140 {
141         struct xpc_rsvd_page *rp;
142         AMO_t *amos_page;
143         u64 rp_pa, next_cl, nasid_array = 0;
144         int i, ret;
145
146
147         /* get the local reserved page's address */
148
149         rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
150                                         (u64) xpc_remote_copy_buffer,
151                                                 XPC_RSVD_PAGE_ALIGNED_SIZE);
152         if (rp_pa == 0) {
153                 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
154                 return NULL;
155         }
156         rp = (struct xpc_rsvd_page *) __va(rp_pa);
157
158         if (rp->partid != sn_partition_id) {
159                 dev_err(xpc_part, "the reserved page's partid of %d should be "
160                         "%d\n", rp->partid, sn_partition_id);
161                 return NULL;
162         }
163
164         rp->version = XPC_RP_VERSION;
165
166         /*
167          * Place the XPC variables on the cache line following the
168          * reserved page structure.
169          */
170         next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
171         xpc_vars = (struct xpc_vars *) next_cl;
172
173         /*
174          * Before clearing xpc_vars, see if a page of AMOs had been previously
175          * allocated. If not we'll need to allocate one and set permissions
176          * so that cross-partition AMOs are allowed.
177          *
178          * The allocated AMO page needs MCA reporting to remain disabled after
179          * XPC has unloaded.  To make this work, we keep a copy of the pointer
180          * to this page (i.e., amos_page) in the struct xpc_vars structure,
181          * which is pointed to by the reserved page, and re-use that saved copy
182          * on subsequent loads of XPC. This AMO page is never freed, and its
183          * memory protections are never restricted.
184          */
185         if ((amos_page = xpc_vars->amos_page) == NULL) {
186                 amos_page = (AMO_t *) mspec_kalloc_page(0);
187                 if (amos_page == NULL) {
188                         dev_err(xpc_part, "can't allocate page of AMOs\n");
189                         return NULL;
190                 }
191
192                 /*
193                  * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
194                  * when xpc_allow_IPI_ops() is called via xpc_hb_init().
195                  */
196                 if (!enable_shub_wars_1_1()) {
197                         ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
198                                         PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
199                                         &nasid_array);
200                         if (ret != 0) {
201                                 dev_err(xpc_part, "can't change memory "
202                                         "protections\n");
203                                 mspec_kfree_page((unsigned long) amos_page);
204                                 return NULL;
205                         }
206                 }
207         }
208
209         memset(xpc_vars, 0, sizeof(struct xpc_vars));
210
211         /*
212          * Place the XPC per partition specific variables on the cache line
213          * following the XPC variables structure.
214          */
215         next_cl += XPC_VARS_ALIGNED_SIZE;
216         memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
217                                                         XP_MAX_PARTITIONS);
218         xpc_vars_part = (struct xpc_vars_part *) next_cl;
219         xpc_vars->vars_part_pa = __pa(next_cl);
220
221         xpc_vars->version = XPC_V_VERSION;
222         xpc_vars->act_nasid = cpuid_to_nasid(0);
223         xpc_vars->act_phys_cpuid = cpu_physical_id(0);
224         xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
225
226
227         /*
228          * Initialize the activation related AMO variables.
229          */
230         xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
231         for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
232                 xpc_IPI_init(i + XP_MAX_PARTITIONS);
233         }
234         /* export AMO page's physical address to other partitions */
235         xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
236
237         /*
238          * This signifies to the remote partition that our reserved
239          * page is initialized.
240          */
241         (volatile u64) rp->vars_pa = __pa(xpc_vars);
242
243         return rp;
244 }
245
246
247 /*
248  * Change protections to allow IPI operations (and AMO operations on
249  * Shub 1.1 systems).
250  */
251 void
252 xpc_allow_IPI_ops(void)
253 {
254         int node;
255         int nasid;
256
257
258         // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
259
260         if (is_shub2()) {
261                 xpc_sh2_IPI_access0 =
262                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
263                 xpc_sh2_IPI_access1 =
264                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
265                 xpc_sh2_IPI_access2 =
266                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
267                 xpc_sh2_IPI_access3 =
268                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
269
270                 for_each_online_node(node) {
271                         nasid = cnodeid_to_nasid(node);
272                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
273                                                                 -1UL);
274                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
275                                                                 -1UL);
276                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
277                                                                 -1UL);
278                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
279                                                                 -1UL);
280                 }
281
282         } else {
283                 xpc_sh1_IPI_access =
284                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
285
286                 for_each_online_node(node) {
287                         nasid = cnodeid_to_nasid(node);
288                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
289                                                                 -1UL);
290
291                         /*
292                          * Since the BIST collides with memory operations on
293                          * SHUB 1.1 sn_change_memprotect() cannot be used.
294                          */
295                         if (enable_shub_wars_1_1()) {
296                                 /* open up everything */
297                                 xpc_prot_vec[node] = (u64) HUB_L((u64 *)
298                                                 GLOBAL_MMR_ADDR(nasid,
299                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
300                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
301                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
302                                                                 -1UL);
303                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
304                                                 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
305                                                                 -1UL);
306                         }
307                 }
308         }
309 }
310
311
312 /*
313  * Restrict protections to disallow IPI operations (and AMO operations on
314  * Shub 1.1 systems).
315  */
316 void
317 xpc_restrict_IPI_ops(void)
318 {
319         int node;
320         int nasid;
321
322
323         // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
324
325         if (is_shub2()) {
326
327                 for_each_online_node(node) {
328                         nasid = cnodeid_to_nasid(node);
329                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
330                                                         xpc_sh2_IPI_access0);
331                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
332                                                         xpc_sh2_IPI_access1);
333                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
334                                                         xpc_sh2_IPI_access2);
335                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
336                                                         xpc_sh2_IPI_access3);
337                 }
338
339         } else {
340
341                 for_each_online_node(node) {
342                         nasid = cnodeid_to_nasid(node);
343                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
344                                                         xpc_sh1_IPI_access);
345
346                         if (enable_shub_wars_1_1()) {
347                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
348                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
349                                                         xpc_prot_vec[node]);
350                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
351                                                 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
352                                                         xpc_prot_vec[node]);
353                         }
354                 }
355         }
356 }
357
358
359 /*
360  * At periodic intervals, scan through all active partitions and ensure
361  * their heartbeat is still active.  If not, the partition is deactivated.
362  */
363 void
364 xpc_check_remote_hb(void)
365 {
366         struct xpc_vars *remote_vars;
367         struct xpc_partition *part;
368         partid_t partid;
369         bte_result_t bres;
370
371
372         remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
373
374         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
375                 if (partid == sn_partition_id) {
376                         continue;
377                 }
378
379                 part = &xpc_partitions[partid];
380
381                 if (part->act_state == XPC_P_INACTIVE ||
382                                 part->act_state == XPC_P_DEACTIVATING) {
383                         continue;
384                 }
385
386                 /* pull the remote_hb cache line */
387                 bres = xp_bte_copy(part->remote_vars_pa,
388                                         ia64_tpa((u64) remote_vars),
389                                         XPC_VARS_ALIGNED_SIZE,
390                                         (BTE_NOTIFY | BTE_WACQUIRE), NULL);
391                 if (bres != BTE_SUCCESS) {
392                         XPC_DEACTIVATE_PARTITION(part,
393                                                 xpc_map_bte_errors(bres));
394                         continue;
395                 }
396
397                 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
398                         " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid,
399                         remote_vars->heartbeat, part->last_heartbeat,
400                         remote_vars->kdb_status,
401                         remote_vars->heartbeating_to_mask);
402
403                 if (((remote_vars->heartbeat == part->last_heartbeat) &&
404                         (remote_vars->kdb_status == 0)) ||
405                              !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
406
407                         XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
408                         continue;
409                 }
410
411                 part->last_heartbeat = remote_vars->heartbeat;
412         }
413 }
414
415
416 /*
417  * Get a copy of the remote partition's rsvd page.
418  *
419  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
420  * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
421  */
422 static enum xpc_retval
423 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
424                 struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
425 {
426         int bres, i;
427
428
429         /* get the reserved page's physical address */
430
431         *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
432                                                 XPC_RSVD_PAGE_ALIGNED_SIZE);
433         if (*remote_rsvd_page_pa == 0) {
434                 return xpcNoRsvdPageAddr;
435         }
436
437
438         /* pull over the reserved page structure */
439
440         bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
441                                 XPC_RSVD_PAGE_ALIGNED_SIZE,
442                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
443         if (bres != BTE_SUCCESS) {
444                 return xpc_map_bte_errors(bres);
445         }
446
447
448         if (discovered_nasids != NULL) {
449                 for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
450                         discovered_nasids[i] |= remote_rp->part_nasids[i];
451                 }
452         }
453
454
455         /* check that the partid is for another partition */
456
457         if (remote_rp->partid < 1 ||
458                                 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
459                 return xpcInvalidPartid;
460         }
461
462         if (remote_rp->partid == sn_partition_id) {
463                 return xpcLocalPartid;
464         }
465
466
467         if (XPC_VERSION_MAJOR(remote_rp->version) !=
468                                         XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
469                 return xpcBadVersion;
470         }
471
472         return xpcSuccess;
473 }
474
475
476 /*
477  * Get a copy of the remote partition's XPC variables.
478  *
479  * remote_vars points to a buffer that is cacheline aligned for BTE copies and
480  * assumed to be of size XPC_VARS_ALIGNED_SIZE.
481  */
482 static enum xpc_retval
483 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
484 {
485         int bres;
486
487
488         if (remote_vars_pa == 0) {
489                 return xpcVarsNotSet;
490         }
491
492
493         /* pull over the cross partition variables */
494
495         bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
496                                 XPC_VARS_ALIGNED_SIZE,
497                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
498         if (bres != BTE_SUCCESS) {
499                 return xpc_map_bte_errors(bres);
500         }
501
502         if (XPC_VERSION_MAJOR(remote_vars->version) !=
503                                         XPC_VERSION_MAJOR(XPC_V_VERSION)) {
504                 return xpcBadVersion;
505         }
506
507         return xpcSuccess;
508 }
509
510
511 /*
512  * Prior code has determine the nasid which generated an IPI.  Inspect
513  * that nasid to determine if its partition needs to be activated or
514  * deactivated.
515  *
516  * A partition is consider "awaiting activation" if our partition
517  * flags indicate it is not active and it has a heartbeat.  A
518  * partition is considered "awaiting deactivation" if our partition
519  * flags indicate it is active but it has no heartbeat or it is not
520  * sending its heartbeat to us.
521  *
522  * To determine the heartbeat, the remote nasid must have a properly
523  * initialized reserved page.
524  */
525 static void
526 xpc_identify_act_IRQ_req(int nasid)
527 {
528         struct xpc_rsvd_page *remote_rp;
529         struct xpc_vars *remote_vars;
530         u64 remote_rsvd_page_pa;
531         u64 remote_vars_pa;
532         partid_t partid;
533         struct xpc_partition *part;
534         enum xpc_retval ret;
535
536
537         /* pull over the reserved page structure */
538
539         remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
540
541         ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
542         if (ret != xpcSuccess) {
543                 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
544                         "which sent interrupt, reason=%d\n", nasid, ret);
545                 return;
546         }
547
548         remote_vars_pa = remote_rp->vars_pa;
549         partid = remote_rp->partid;
550         part = &xpc_partitions[partid];
551
552
553         /* pull over the cross partition variables */
554
555         remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
556
557         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
558         if (ret != xpcSuccess) {
559
560                 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
561                         "which sent interrupt, reason=%d\n", nasid, ret);
562
563                 XPC_DEACTIVATE_PARTITION(part, ret);
564                 return;
565         }
566
567
568         part->act_IRQ_rcvd++;
569
570         dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
571                 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
572                 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
573
574
575         if (part->act_state == XPC_P_INACTIVE) {
576
577                 part->remote_rp_pa = remote_rsvd_page_pa;
578                 dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n",
579                         part->remote_rp_pa);
580
581                 part->remote_vars_pa = remote_vars_pa;
582                 dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
583                         part->remote_vars_pa);
584
585                 part->last_heartbeat = remote_vars->heartbeat;
586                 dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
587                         part->last_heartbeat);
588
589                 part->remote_vars_part_pa = remote_vars->vars_part_pa;
590                 dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
591                         part->remote_vars_part_pa);
592
593                 part->remote_act_nasid = remote_vars->act_nasid;
594                 dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
595                         part->remote_act_nasid);
596
597                 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
598                 dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
599                         part->remote_act_phys_cpuid);
600
601                 part->remote_amos_page_pa = remote_vars->amos_page_pa;
602                 dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
603                         part->remote_amos_page_pa);
604
605                 xpc_activate_partition(part);
606
607         } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
608                         !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
609
610                 part->reactivate_nasid = nasid;
611                 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
612         }
613 }
614
615
616 /*
617  * Loop through the activation AMO variables and process any bits
618  * which are set.  Each bit indicates a nasid sending a partition
619  * activation or deactivation request.
620  *
621  * Return #of IRQs detected.
622  */
623 int
624 xpc_identify_act_IRQ_sender(void)
625 {
626         int word, bit;
627         u64 nasid_mask;
628         u64 nasid;                      /* remote nasid */
629         int n_IRQs_detected = 0;
630         AMO_t *act_amos;
631         struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
632
633
634         act_amos = xpc_vars->act_amos;
635
636
637         /* scan through act AMO variable looking for non-zero entries */
638         for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
639
640                 nasid_mask = xpc_IPI_receive(&act_amos[word]);
641                 if (nasid_mask == 0) {
642                         /* no IRQs from nasids in this variable */
643                         continue;
644                 }
645
646                 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
647                         nasid_mask);
648
649
650                 /*
651                  * If this nasid has been added to the machine since
652                  * our partition was reset, this will retain the
653                  * remote nasid in our reserved pages machine mask.
654                  * This is used in the event of module reload.
655                  */
656                 rp->mach_nasids[word] |= nasid_mask;
657
658
659                 /* locate the nasid(s) which sent interrupts */
660
661                 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
662                         if (nasid_mask & (1UL << bit)) {
663                                 n_IRQs_detected++;
664                                 nasid = XPC_NASID_FROM_W_B(word, bit);
665                                 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
666                                         nasid);
667                                 xpc_identify_act_IRQ_req(nasid);
668                         }
669                 }
670         }
671         return n_IRQs_detected;
672 }
673
674
675 /*
676  * Mark specified partition as active.
677  */
678 enum xpc_retval
679 xpc_mark_partition_active(struct xpc_partition *part)
680 {
681         unsigned long irq_flags;
682         enum xpc_retval ret;
683
684
685         dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
686
687         spin_lock_irqsave(&part->act_lock, irq_flags);
688         if (part->act_state == XPC_P_ACTIVATING) {
689                 part->act_state = XPC_P_ACTIVE;
690                 ret = xpcSuccess;
691         } else {
692                 DBUG_ON(part->reason == xpcSuccess);
693                 ret = part->reason;
694         }
695         spin_unlock_irqrestore(&part->act_lock, irq_flags);
696
697         return ret;
698 }
699
700
701 /*
702  * Notify XPC that the partition is down.
703  */
704 void
705 xpc_deactivate_partition(const int line, struct xpc_partition *part,
706                                 enum xpc_retval reason)
707 {
708         unsigned long irq_flags;
709         partid_t partid = XPC_PARTID(part);
710
711
712         spin_lock_irqsave(&part->act_lock, irq_flags);
713
714         if (part->act_state == XPC_P_INACTIVE) {
715                 XPC_SET_REASON(part, reason, line);
716                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
717                 if (reason == xpcReactivating) {
718                         /* we interrupt ourselves to reactivate partition */
719                         xpc_IPI_send_reactivate(part);
720                 }
721                 return;
722         }
723         if (part->act_state == XPC_P_DEACTIVATING) {
724                 if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
725                                         reason == xpcReactivating) {
726                         XPC_SET_REASON(part, reason, line);
727                 }
728                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
729                 return;
730         }
731
732         part->act_state = XPC_P_DEACTIVATING;
733         XPC_SET_REASON(part, reason, line);
734
735         spin_unlock_irqrestore(&part->act_lock, irq_flags);
736
737         XPC_DISALLOW_HB(partid, xpc_vars);
738
739         dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
740                 reason);
741
742         xpc_partition_down(part, reason);
743 }
744
745
746 /*
747  * Mark specified partition as active.
748  */
749 void
750 xpc_mark_partition_inactive(struct xpc_partition *part)
751 {
752         unsigned long irq_flags;
753
754
755         dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
756                 XPC_PARTID(part));
757
758         spin_lock_irqsave(&part->act_lock, irq_flags);
759         part->act_state = XPC_P_INACTIVE;
760         spin_unlock_irqrestore(&part->act_lock, irq_flags);
761         part->remote_rp_pa = 0;
762 }
763
764
765 /*
766  * SAL has provided a partition and machine mask.  The partition mask
767  * contains a bit for each even nasid in our partition.  The machine
768  * mask contains a bit for each even nasid in the entire machine.
769  *
770  * Using those two bit arrays, we can determine which nasids are
771  * known in the machine.  Each should also have a reserved page
772  * initialized if they are available for partitioning.
773  */
774 void
775 xpc_discovery(void)
776 {
777         void *remote_rp_base;
778         struct xpc_rsvd_page *remote_rp;
779         struct xpc_vars *remote_vars;
780         u64 remote_rsvd_page_pa;
781         u64 remote_vars_pa;
782         int region;
783         int max_regions;
784         int nasid;
785         struct xpc_rsvd_page *rp;
786         partid_t partid;
787         struct xpc_partition *part;
788         u64 *discovered_nasids;
789         enum xpc_retval ret;
790
791
792         remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
793                                                 GFP_KERNEL, &remote_rp_base);
794         if (remote_rp == NULL) {
795                 return;
796         }
797         remote_vars = (struct xpc_vars *) remote_rp;
798
799
800         discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
801                                                         GFP_KERNEL);
802         if (discovered_nasids == NULL) {
803                 kfree(remote_rp_base);
804                 return;
805         }
806         memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
807
808         rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
809
810         /*
811          * The term 'region' in this context refers to the minimum number of
812          * nodes that can comprise an access protection grouping. The access
813          * protection is in regards to memory, IOI and IPI.
814          */
815 //>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
816 //>>> include/asm-ia64/sn/addrs.h
817 #define SH1_MAX_REGIONS         64
818 #define SH2_MAX_REGIONS         256
819         max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
820
821         for (region = 0; region < max_regions; region++) {
822
823                 if ((volatile int) xpc_exiting) {
824                         break;
825                 }
826
827                 dev_dbg(xpc_part, "searching region %d\n", region);
828
829                 for (nasid = (region * sn_region_size * 2);
830                      nasid < ((region + 1) * sn_region_size * 2);
831                      nasid += 2) {
832
833                         if ((volatile int) xpc_exiting) {
834                                 break;
835                         }
836
837                         dev_dbg(xpc_part, "checking nasid %d\n", nasid);
838
839
840                         if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
841                                 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
842                                         "part of the local partition; skipping "
843                                         "region\n", nasid);
844                                 break;
845                         }
846
847                         if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
848                                 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
849                                         "not on Numa-Link network at reset\n",
850                                         nasid);
851                                 continue;
852                         }
853
854                         if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
855                                 dev_dbg(xpc_part, "Nasid %d is part of a "
856                                         "partition which was previously "
857                                         "discovered\n", nasid);
858                                 continue;
859                         }
860
861
862                         /* pull over the reserved page structure */
863
864                         ret = xpc_get_remote_rp(nasid, discovered_nasids,
865                                               remote_rp, &remote_rsvd_page_pa);
866                         if (ret != xpcSuccess) {
867                                 dev_dbg(xpc_part, "unable to get reserved page "
868                                         "from nasid %d, reason=%d\n", nasid,
869                                         ret);
870
871                                 if (ret == xpcLocalPartid) {
872                                         break;
873                                 }
874                                 continue;
875                         }
876
877                         remote_vars_pa = remote_rp->vars_pa;
878
879                         partid = remote_rp->partid;
880                         part = &xpc_partitions[partid];
881
882
883                         /* pull over the cross partition variables */
884
885                         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
886                         if (ret != xpcSuccess) {
887                                 dev_dbg(xpc_part, "unable to get XPC variables "
888                                         "from nasid %d, reason=%d\n", nasid,
889                                         ret);
890
891                                 XPC_DEACTIVATE_PARTITION(part, ret);
892                                 continue;
893                         }
894
895                         if (part->act_state != XPC_P_INACTIVE) {
896                                 dev_dbg(xpc_part, "partition %d on nasid %d is "
897                                         "already activating\n", partid, nasid);
898                                 break;
899                         }
900
901                         /*
902                          * Register the remote partition's AMOs with SAL so it
903                          * can handle and cleanup errors within that address
904                          * range should the remote partition go down. We don't
905                          * unregister this range because it is difficult to
906                          * tell when outstanding writes to the remote partition
907                          * are finished and thus when it is thus safe to
908                          * unregister. This should not result in wasted space
909                          * in the SAL xp_addr_region table because we should
910                          * get the same page for remote_act_amos_pa after
911                          * module reloads and system reboots.
912                          */
913                         if (sn_register_xp_addr_region(
914                                             remote_vars->amos_page_pa,
915                                                         PAGE_SIZE, 1) < 0) {
916                                 dev_dbg(xpc_part, "partition %d failed to "
917                                         "register xp_addr region 0x%016lx\n",
918                                         partid, remote_vars->amos_page_pa);
919
920                                 XPC_SET_REASON(part, xpcPhysAddrRegFailed,
921                                                 __LINE__);
922                                 break;
923                         }
924
925                         /*
926                          * The remote nasid is valid and available.
927                          * Send an interrupt to that nasid to notify
928                          * it that we are ready to begin activation.
929                          */
930                         dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
931                                 "nasid %d, phys_cpuid 0x%x\n",
932                                 remote_vars->amos_page_pa,
933                                 remote_vars->act_nasid,
934                                 remote_vars->act_phys_cpuid);
935
936                         xpc_IPI_send_activate(remote_vars);
937                 }
938         }
939
940         kfree(discovered_nasids);
941         kfree(remote_rp_base);
942 }
943
944
945 /*
946  * Given a partid, get the nasids owned by that partition from the
947  * remote partitions reserved page.
948  */
949 enum xpc_retval
950 xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
951 {
952         struct xpc_partition *part;
953         u64 part_nasid_pa;
954         int bte_res;
955
956
957         part = &xpc_partitions[partid];
958         if (part->remote_rp_pa == 0) {
959                 return xpcPartitionDown;
960         }
961
962         part_nasid_pa = part->remote_rp_pa +
963                 (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
964
965         bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
966                                 L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
967                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
968
969         return xpc_map_bte_errors(bte_res);
970 }
971