arch/ia64/sn/kernel/xpc_main.c

   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
   7  */
   8
   9
  10 /*
  11  * Cross Partition Communication (XPC) support - standard version.
  12  *
  13  *      XPC provides a message passing capability that crosses partition
  14  *      boundaries. This module is made up of two parts:
  15  *
  16  *          partition   This part detects the presence/absence of other
  17  *                      partitions. It provides a heartbeat and monitors
  18  *                      the heartbeats of other partitions.
  19  *
  20  *          channel     This part manages the channels and sends/receives
  21  *                      messages across them to/from other partitions.
  22  *
  23  *      There are a couple of additional functions residing in XP, which
  24  *      provide an interface to XPC for its users.
  25  *
  26  *
  27  *      Caveats:
  28  *
  29  *        . We currently have no way to determine which nasid an IPI came
  30  *          from. Thus, xpc_IPI_send() does a remote AMO write followed by
  31  *          an IPI. The AMO indicates where data is to be pulled from, so
  32  *          after the IPI arrives, the remote partition checks the AMO word.
  33  *          The IPI can actually arrive before the AMO however, so other code
  34  *          must periodically check for this case. Also, remote AMO operations
  35  *          do not reliably time out. Thus we do a remote PIO read solely to
  36  *          know whether the remote partition is down and whether we should
  37  *          stop sending IPIs to it. This remote PIO read operation is set up
  38  *          in a special nofault region so SAL knows to ignore (and cleanup)
  39  *          any errors due to the remote AMO write, PIO read, and/or PIO
  40  *          write operations.
  41  *
  42  *          If/when new hardware solves this IPI problem, we should abandon
  43  *          the current approach.
  44  *
  45  */
  46
  47
  48 #include <linux/kernel.h>
  49 #include <linux/module.h>
  50 #include <linux/init.h>
  51 #include <linux/sched.h>
  52 #include <linux/syscalls.h>
  53 #include <linux/cache.h>
  54 #include <linux/interrupt.h>
  55 #include <linux/slab.h>
  56 #include <linux/delay.h>
  57 #include <linux/reboot.h>
  58 #include <asm/sn/intr.h>
  59 #include <asm/sn/sn_sal.h>
  60 #include <asm/kdebug.h>
  61 #include <asm/uaccess.h>
  62 #include "xpc.h"
  63
  64
  65 /* define two XPC debug device structures to be used with dev_dbg() et al */
  66
  67 struct device_driver xpc_dbg_name = {
  68         .name = "xpc"
  69 };
  70
  71 struct device xpc_part_dbg_subname = {
  72         .bus_id = {0},          /* set to "part" at xpc_init() time */
  73         .driver = &xpc_dbg_name
  74 };
  75
  76 struct device xpc_chan_dbg_subname = {
  77         .bus_id = {0},          /* set to "chan" at xpc_init() time */
  78         .driver = &xpc_dbg_name
  79 };
  80
  81 struct device *xpc_part = &xpc_part_dbg_subname;
  82 struct device *xpc_chan = &xpc_chan_dbg_subname;
  83
  84
  85 /* systune related variables for /proc/sys directories */
  86
  87 static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
  88 static int xpc_hb_min_interval = 1;
  89 static int xpc_hb_max_interval = 10;
  90
  91 static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
  92 static int xpc_hb_check_min_interval = 10;
  93 static int xpc_hb_check_max_interval = 120;
  94
  95 int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
  96 static int xpc_disengage_request_min_timelimit = 0;
  97 static int xpc_disengage_request_max_timelimit = 120;
  98
  99 static ctl_table xpc_sys_xpc_hb_dir[] = {
 100         {
 101                 1,
 102                 "hb_interval",
 103                 &xpc_hb_interval,
 104                 sizeof(int),
 105                 0644,
 106                 NULL,
 107                 &proc_dointvec_minmax,
 108                 &sysctl_intvec,
 109                 NULL,
 110                 &xpc_hb_min_interval,
 111                 &xpc_hb_max_interval
 112         },
 113         {
 114                 2,
 115                 "hb_check_interval",
 116                 &xpc_hb_check_interval,
 117                 sizeof(int),
 118                 0644,
 119                 NULL,
 120                 &proc_dointvec_minmax,
 121                 &sysctl_intvec,
 122                 NULL,
 123                 &xpc_hb_check_min_interval,
 124                 &xpc_hb_check_max_interval
 125         },
 126         {0}
 127 };
 128 static ctl_table xpc_sys_xpc_dir[] = {
 129         {
 130                 1,
 131                 "hb",
 132                 NULL,
 133                 0,
 134                 0555,
 135                 xpc_sys_xpc_hb_dir
 136         },
 137         {
 138                 2,
 139                 "disengage_request_timelimit",
 140                 &xpc_disengage_request_timelimit,
 141                 sizeof(int),
 142                 0644,
 143                 NULL,
 144                 &proc_dointvec_minmax,
 145                 &sysctl_intvec,
 146                 NULL,
 147                 &xpc_disengage_request_min_timelimit,
 148                 &xpc_disengage_request_max_timelimit
 149         },
 150         {0}
 151 };
 152 static ctl_table xpc_sys_dir[] = {
 153         {
 154                 1,
 155                 "xpc",
 156                 NULL,
 157                 0,
 158                 0555,
 159                 xpc_sys_xpc_dir
 160         },
 161         {0}
 162 };
 163 static struct ctl_table_header *xpc_sysctl;
 164
 165 /* non-zero if any remote partition disengage request was timed out */
 166 int xpc_disengage_request_timedout;
 167
 168 /* #of IRQs received */
 169 static atomic_t xpc_act_IRQ_rcvd;
 170
 171 /* IRQ handler notifies this wait queue on receipt of an IRQ */
 172 static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
 173
 174 static unsigned long xpc_hb_check_timeout;
 175
 176 /* notification that the xpc_hb_checker thread has exited */
 177 static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
 178
 179 /* notification that the xpc_discovery thread has exited */
 180 static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
 181
 182
 183 static struct timer_list xpc_hb_timer;
 184
 185
 186 static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
 187
 188
 189 static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
 190 static struct notifier_block xpc_reboot_notifier = {
 191         .notifier_call = xpc_system_reboot,
 192 };
 193
 194 static int xpc_system_die(struct notifier_block *, unsigned long, void *);
 195 static struct notifier_block xpc_die_notifier = {
 196         .notifier_call = xpc_system_die,
 197 };
 198
 199
 200 /*
 201  * Timer function to enforce the timelimit on the partition disengage request.
 202  */
 203 static void
 204 xpc_timeout_partition_disengage_request(unsigned long data)
 205 {
 206         struct xpc_partition *part = (struct xpc_partition *) data;
 207
 208
 209         DBUG_ON(jiffies < part->disengage_request_timeout);
 210
 211         (void) xpc_partition_disengaged(part);
 212
 213         DBUG_ON(part->disengage_request_timeout != 0);
 214         DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
 215 }
 216
 217
 218 /*
 219  * Notify the heartbeat check thread that an IRQ has been received.
 220  */
 221 static irqreturn_t
 222 xpc_act_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs)
 223 {
 224         atomic_inc(&xpc_act_IRQ_rcvd);
 225         wake_up_interruptible(&xpc_act_IRQ_wq);
 226         return IRQ_HANDLED;
 227 }
 228
 229
 230 /*
 231  * Timer to produce the heartbeat.  The timer structures function is
 232  * already set when this is initially called.  A tunable is used to
 233  * specify when the next timeout should occur.
 234  */
 235 static void
 236 xpc_hb_beater(unsigned long dummy)
 237 {
 238         xpc_vars->heartbeat++;
 239
 240         if (jiffies >= xpc_hb_check_timeout) {
 241                 wake_up_interruptible(&xpc_act_IRQ_wq);
 242         }
 243
 244         xpc_hb_timer.expires = jiffies + (xpc_hb_interval * HZ);
 245         add_timer(&xpc_hb_timer);
 246 }
 247
 248
 249 /*
 250  * This thread is responsible for nearly all of the partition
 251  * activation/deactivation.
 252  */
 253 static int
 254 xpc_hb_checker(void *ignore)
 255 {
 256         int last_IRQ_count = 0;
 257         int new_IRQ_count;
 258         int force_IRQ=0;
 259
 260
 261         /* this thread was marked active by xpc_hb_init() */
 262
 263         daemonize(XPC_HB_CHECK_THREAD_NAME);
 264
 265         set_cpus_allowed(current, cpumask_of_cpu(XPC_HB_CHECK_CPU));
 266
 267         xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
 268
 269         while (!(volatile int) xpc_exiting) {
 270
 271                 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
 272                         "been received\n",
 273                         (int) (xpc_hb_check_timeout - jiffies),
 274                         atomic_read(&xpc_act_IRQ_rcvd) - last_IRQ_count);
 275
 276
 277                 /* checking of remote heartbeats is skewed by IRQ handling */
 278                 if (jiffies >= xpc_hb_check_timeout) {
 279                         dev_dbg(xpc_part, "checking remote heartbeats\n");
 280                         xpc_check_remote_hb();
 281
 282                         /*
 283                          * We need to periodically recheck to ensure no
 284                          * IPI/AMO pairs have been missed.  That check
 285                          * must always reset xpc_hb_check_timeout.
 286                          */
 287                         force_IRQ = 1;
 288                 }
 289
 290
 291                 /* check for outstanding IRQs */
 292                 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
 293                 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
 294                         force_IRQ = 0;
 295
 296                         dev_dbg(xpc_part, "found an IRQ to process; will be "
 297                                 "resetting xpc_hb_check_timeout\n");
 298
 299                         last_IRQ_count += xpc_identify_act_IRQ_sender();
 300                         if (last_IRQ_count < new_IRQ_count) {
 301                                 /* retry once to help avoid missing AMO */
 302                                 (void) xpc_identify_act_IRQ_sender();
 303                         }
 304                         last_IRQ_count = new_IRQ_count;
 305
 306                         xpc_hb_check_timeout = jiffies +
 307                                            (xpc_hb_check_interval * HZ);
 308                 }
 309
 310                 /* wait for IRQ or timeout */
 311                 (void) wait_event_interruptible(xpc_act_IRQ_wq,
 312                             (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
 313                                         jiffies >= xpc_hb_check_timeout ||
 314                                                 (volatile int) xpc_exiting));
 315         }
 316
 317         dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 318
 319
 320         /* mark this thread as having exited */
 321         up(&xpc_hb_checker_exited);
 322         return 0;
 323 }
 324
 325
 326 /*
 327  * This thread will attempt to discover other partitions to activate
 328  * based on info provided by SAL. This new thread is short lived and
 329  * will exit once discovery is complete.
 330  */
 331 static int
 332 xpc_initiate_discovery(void *ignore)
 333 {
 334         daemonize(XPC_DISCOVERY_THREAD_NAME);
 335
 336         xpc_discovery();
 337
 338         dev_dbg(xpc_part, "discovery thread is exiting\n");
 339
 340         /* mark this thread as having exited */
 341         up(&xpc_discovery_exited);
 342         return 0;
 343 }
 344
 345
 346 /*
 347  * Establish first contact with the remote partititon. This involves pulling
 348  * the XPC per partition variables from the remote partition and waiting for
 349  * the remote partition to pull ours.
 350  */
 351 static enum xpc_retval
 352 xpc_make_first_contact(struct xpc_partition *part)
 353 {
 354         enum xpc_retval ret;
 355
 356
 357         while ((ret = xpc_pull_remote_vars_part(part)) != xpcSuccess) {
 358                 if (ret != xpcRetry) {
 359                         XPC_DEACTIVATE_PARTITION(part, ret);
 360                         return ret;
 361                 }
 362
 363                 dev_dbg(xpc_chan, "waiting to make first contact with "
 364                         "partition %d\n", XPC_PARTID(part));
 365
 366                 /* wait a 1/4 of a second or so */
 367                 (void) msleep_interruptible(250);
 368
 369                 if (part->act_state == XPC_P_DEACTIVATING) {
 370                         return part->reason;
 371                 }
 372         }
 373
 374         return xpc_mark_partition_active(part);
 375 }
 376
 377
 378 /*
 379  * The first kthread assigned to a newly activated partition is the one
 380  * created by XPC HB with which it calls xpc_partition_up(). XPC hangs on to
 381  * that kthread until the partition is brought down, at which time that kthread
 382  * returns back to XPC HB. (The return of that kthread will signify to XPC HB
 383  * that XPC has dismantled all communication infrastructure for the associated
 384  * partition.) This kthread becomes the channel manager for that partition.
 385  *
 386  * Each active partition has a channel manager, who, besides connecting and
 387  * disconnecting channels, will ensure that each of the partition's connected
 388  * channels has the required number of assigned kthreads to get the work done.
 389  */
 390 static void
 391 xpc_channel_mgr(struct xpc_partition *part)
 392 {
 393         while (part->act_state != XPC_P_DEACTIVATING ||
 394                         atomic_read(&part->nchannels_active) > 0 ||
 395                                         !xpc_partition_disengaged(part)) {
 396
 397                 xpc_process_channel_activity(part);
 398
 399
 400                 /*
 401                  * Wait until we've been requested to activate kthreads or
 402                  * all of the channel's message queues have been torn down or
 403                  * a signal is pending.
 404                  *
 405                  * The channel_mgr_requests is set to 1 after being awakened,
 406                  * This is done to prevent the channel mgr from making one pass
 407                  * through the loop for each request, since he will
 408                  * be servicing all the requests in one pass. The reason it's
 409                  * set to 1 instead of 0 is so that other kthreads will know
 410                  * that the channel mgr is running and won't bother trying to
 411                  * wake him up.
 412                  */
 413                 atomic_dec(&part->channel_mgr_requests);
 414                 (void) wait_event_interruptible(part->channel_mgr_wq,
 415                                 (atomic_read(&part->channel_mgr_requests) > 0 ||
 416                                 (volatile u64) part->local_IPI_amo != 0 ||
 417                                 ((volatile u8) part->act_state ==
 418                                                         XPC_P_DEACTIVATING &&
 419                                 atomic_read(&part->nchannels_active) == 0 &&
 420                                 xpc_partition_disengaged(part))));
 421                 atomic_set(&part->channel_mgr_requests, 1);
 422
 423                 // >>> Does it need to wakeup periodically as well? In case we
 424                 // >>> miscalculated the #of kthreads to wakeup or create?
 425         }
 426 }
 427
 428
 429 /*
 430  * When XPC HB determines that a partition has come up, it will create a new
 431  * kthread and that kthread will call this function to attempt to set up the
 432  * basic infrastructure used for Cross Partition Communication with the newly
 433  * upped partition.
 434  *
 435  * The kthread that was created by XPC HB and which setup the XPC
 436  * infrastructure will remain assigned to the partition until the partition
 437  * goes down. At which time the kthread will teardown the XPC infrastructure
 438  * and then exit.
 439  *
 440  * XPC HB will put the remote partition's XPC per partition specific variables
 441  * physical address into xpc_partitions[partid].remote_vars_part_pa prior to
 442  * calling xpc_partition_up().
 443  */
 444 static void
 445 xpc_partition_up(struct xpc_partition *part)
 446 {
 447         DBUG_ON(part->channels != NULL);
 448
 449         dev_dbg(xpc_chan, "activating partition %d\n", XPC_PARTID(part));
 450
 451         if (xpc_setup_infrastructure(part) != xpcSuccess) {
 452                 return;
 453         }
 454
 455         /*
 456          * The kthread that XPC HB called us with will become the
 457          * channel manager for this partition. It will not return
 458          * back to XPC HB until the partition's XPC infrastructure
 459          * has been dismantled.
 460          */
 461
 462         (void) xpc_part_ref(part);      /* this will always succeed */
 463
 464         if (xpc_make_first_contact(part) == xpcSuccess) {
 465                 xpc_channel_mgr(part);
 466         }
 467
 468         xpc_part_deref(part);
 469
 470         xpc_teardown_infrastructure(part);
 471 }
 472
 473
 474 static int
 475 xpc_activating(void *__partid)
 476 {
 477         partid_t partid = (u64) __partid;
 478         struct xpc_partition *part = &xpc_partitions[partid];
 479         unsigned long irq_flags;
 480         struct sched_param param = { sched_priority: MAX_RT_PRIO - 1 };
 481         int ret;
 482
 483
 484         DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
 485
 486         spin_lock_irqsave(&part->act_lock, irq_flags);
 487
 488         if (part->act_state == XPC_P_DEACTIVATING) {
 489                 part->act_state = XPC_P_INACTIVE;
 490                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
 491                 part->remote_rp_pa = 0;
 492                 return 0;
 493         }
 494
 495         /* indicate the thread is activating */
 496         DBUG_ON(part->act_state != XPC_P_ACTIVATION_REQ);
 497         part->act_state = XPC_P_ACTIVATING;
 498
 499         XPC_SET_REASON(part, 0, 0);
 500         spin_unlock_irqrestore(&part->act_lock, irq_flags);
 501
 502         dev_dbg(xpc_part, "bringing partition %d up\n", partid);
 503
 504         daemonize("xpc%02d", partid);
 505
 506         /*
 507          * This thread needs to run at a realtime priority to prevent a
 508          * significant performance degradation.
 509          */
 510         ret = sched_setscheduler(current, SCHED_FIFO, &param);
 511         if (ret != 0) {
 512                 dev_warn(xpc_part, "unable to set pid %d to a realtime "
 513                         "priority, ret=%d\n", current->pid, ret);
 514         }
 515
 516         /* allow this thread and its children to run on any CPU */
 517         set_cpus_allowed(current, CPU_MASK_ALL);
 518
 519         /*
 520          * Register the remote partition's AMOs with SAL so it can handle
 521          * and cleanup errors within that address range should the remote
 522          * partition go down. We don't unregister this range because it is
 523          * difficult to tell when outstanding writes to the remote partition
 524          * are finished and thus when it is safe to unregister. This should
 525          * not result in wasted space in the SAL xp_addr_region table because
 526          * we should get the same page for remote_amos_page_pa after module
 527          * reloads and system reboots.
 528          */
 529         if (sn_register_xp_addr_region(part->remote_amos_page_pa,
 530                                                         PAGE_SIZE, 1) < 0) {
 531                 dev_warn(xpc_part, "xpc_partition_up(%d) failed to register "
 532                         "xp_addr region\n", partid);
 533
 534                 spin_lock_irqsave(&part->act_lock, irq_flags);
 535                 part->act_state = XPC_P_INACTIVE;
 536                 XPC_SET_REASON(part, xpcPhysAddrRegFailed, __LINE__);
 537                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
 538                 part->remote_rp_pa = 0;
 539                 return 0;
 540         }
 541
 542         xpc_allow_hb(partid, xpc_vars);
 543         xpc_IPI_send_activated(part);
 544
 545
 546         /*
 547          * xpc_partition_up() holds this thread and marks this partition as
 548          * XPC_P_ACTIVE by calling xpc_hb_mark_active().
 549          */
 550         (void) xpc_partition_up(part);
 551
 552         xpc_disallow_hb(partid, xpc_vars);
 553         xpc_mark_partition_inactive(part);
 554
 555         if (part->reason == xpcReactivating) {
 556                 /* interrupting ourselves results in activating partition */
 557                 xpc_IPI_send_reactivate(part);
 558         }
 559
 560         return 0;
 561 }
 562
 563
 564 void
 565 xpc_activate_partition(struct xpc_partition *part)
 566 {
 567         partid_t partid = XPC_PARTID(part);
 568         unsigned long irq_flags;
 569         pid_t pid;
 570
 571
 572         spin_lock_irqsave(&part->act_lock, irq_flags);
 573
 574         pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
 575
 576         DBUG_ON(part->act_state != XPC_P_INACTIVE);
 577
 578         if (pid > 0) {
 579                 part->act_state = XPC_P_ACTIVATION_REQ;
 580                 XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
 581         } else {
 582                 XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
 583         }
 584
 585         spin_unlock_irqrestore(&part->act_lock, irq_flags);
 586 }
 587
 588
 589 /*
 590  * Handle the receipt of a SGI_XPC_NOTIFY IRQ by seeing whether the specified
 591  * partition actually sent it. Since SGI_XPC_NOTIFY IRQs may be shared by more
 592  * than one partition, we use an AMO_t structure per partition to indicate
 593  * whether a partition has sent an IPI or not.  >>> If it has, then wake up the
 594  * associated kthread to handle it.
 595  *
 596  * All SGI_XPC_NOTIFY IRQs received by XPC are the result of IPIs sent by XPC
 597  * running on other partitions.
 598  *
 599  * Noteworthy Arguments:
 600  *
 601  *      irq - Interrupt ReQuest number. NOT USED.
 602  *
 603  *      dev_id - partid of IPI's potential sender.
 604  *
 605  *      regs - processor's context before the processor entered
 606  *             interrupt code. NOT USED.
 607  */
 608 irqreturn_t
 609 xpc_notify_IRQ_handler(int irq, void *dev_id, struct pt_regs *regs)
 610 {
 611         partid_t partid = (partid_t) (u64) dev_id;
 612         struct xpc_partition *part = &xpc_partitions[partid];
 613
 614
 615         DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
 616
 617         if (xpc_part_ref(part)) {
 618                 xpc_check_for_channel_activity(part);
 619
 620                 xpc_part_deref(part);
 621         }
 622         return IRQ_HANDLED;
 623 }
 624
 625
 626 /*
 627  * Check to see if xpc_notify_IRQ_handler() dropped any IPIs on the floor
 628  * because the write to their associated IPI amo completed after the IRQ/IPI
 629  * was received.
 630  */
 631 void
 632 xpc_dropped_IPI_check(struct xpc_partition *part)
 633 {
 634         if (xpc_part_ref(part)) {
 635                 xpc_check_for_channel_activity(part);
 636
 637                 part->dropped_IPI_timer.expires = jiffies +
 638                                                         XPC_P_DROPPED_IPI_WAIT;
 639                 add_timer(&part->dropped_IPI_timer);
 640                 xpc_part_deref(part);
 641         }
 642 }
 643
 644
 645 void
 646 xpc_activate_kthreads(struct xpc_channel *ch, int needed)
 647 {
 648         int idle = atomic_read(&ch->kthreads_idle);
 649         int assigned = atomic_read(&ch->kthreads_assigned);
 650         int wakeup;
 651
 652
 653         DBUG_ON(needed <= 0);
 654
 655         if (idle > 0) {
 656                 wakeup = (needed > idle) ? idle : needed;
 657                 needed -= wakeup;
 658
 659                 dev_dbg(xpc_chan, "wakeup %d idle kthreads, partid=%d, "
 660                         "channel=%d\n", wakeup, ch->partid, ch->number);
 661
 662                 /* only wakeup the requested number of kthreads */
 663                 wake_up_nr(&ch->idle_wq, wakeup);
 664         }
 665
 666         if (needed <= 0) {
 667                 return;
 668         }
 669
 670         if (needed + assigned > ch->kthreads_assigned_limit) {
 671                 needed = ch->kthreads_assigned_limit - assigned;
 672                 // >>>should never be less than 0
 673                 if (needed <= 0) {
 674                         return;
 675                 }
 676         }
 677
 678         dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n",
 679                 needed, ch->partid, ch->number);
 680
 681         xpc_create_kthreads(ch, needed);
 682 }
 683
 684
 685 /*
 686  * This function is where XPC's kthreads wait for messages to deliver.
 687  */
 688 static void
 689 xpc_kthread_waitmsgs(struct xpc_partition *part, struct xpc_channel *ch)
 690 {
 691         do {
 692                 /* deliver messages to their intended recipients */
 693
 694                 while ((volatile s64) ch->w_local_GP.get <
 695                                 (volatile s64) ch->w_remote_GP.put &&
 696                                         !((volatile u32) ch->flags &
 697                                                 XPC_C_DISCONNECTING)) {
 698                         xpc_deliver_msg(ch);
 699                 }
 700
 701                 if (atomic_inc_return(&ch->kthreads_idle) >
 702                                                 ch->kthreads_idle_limit) {
 703                         /* too many idle kthreads on this channel */
 704                         atomic_dec(&ch->kthreads_idle);
 705                         break;
 706                 }
 707
 708                 dev_dbg(xpc_chan, "idle kthread calling "
 709                         "wait_event_interruptible_exclusive()\n");
 710
 711                 (void) wait_event_interruptible_exclusive(ch->idle_wq,
 712                                 ((volatile s64) ch->w_local_GP.get <
 713                                         (volatile s64) ch->w_remote_GP.put ||
 714                                 ((volatile u32) ch->flags &
 715                                                 XPC_C_DISCONNECTING)));
 716
 717                 atomic_dec(&ch->kthreads_idle);
 718
 719         } while (!((volatile u32) ch->flags & XPC_C_DISCONNECTING));
 720 }
 721
 722
 723 static int
 724 xpc_daemonize_kthread(void *args)
 725 {
 726         partid_t partid = XPC_UNPACK_ARG1(args);
 727         u16 ch_number = XPC_UNPACK_ARG2(args);
 728         struct xpc_partition *part = &xpc_partitions[partid];
 729         struct xpc_channel *ch;
 730         int n_needed;
 731         unsigned long irq_flags;
 732
 733
 734         daemonize("xpc%02dc%d", partid, ch_number);
 735
 736         dev_dbg(xpc_chan, "kthread starting, partid=%d, channel=%d\n",
 737                 partid, ch_number);
 738
 739         ch = &part->channels[ch_number];
 740
 741         if (!(ch->flags & XPC_C_DISCONNECTING)) {
 742
 743                 /* let registerer know that connection has been established */
 744
 745                 spin_lock_irqsave(&ch->lock, irq_flags);
 746                 if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
 747                         ch->flags |= XPC_C_CONNECTCALLOUT;
 748                         spin_unlock_irqrestore(&ch->lock, irq_flags);
 749
 750                         xpc_connected_callout(ch);
 751
 752                         /*
 753                          * It is possible that while the callout was being
 754                          * made that the remote partition sent some messages.
 755                          * If that is the case, we may need to activate
 756                          * additional kthreads to help deliver them. We only
 757                          * need one less than total #of messages to deliver.
 758                          */
 759                         n_needed = ch->w_remote_GP.put - ch->w_local_GP.get - 1;
 760                         if (n_needed > 0 &&
 761                                         !(ch->flags & XPC_C_DISCONNECTING)) {
 762                                 xpc_activate_kthreads(ch, n_needed);
 763                         }
 764                 } else {
 765                         spin_unlock_irqrestore(&ch->lock, irq_flags);
 766                 }
 767
 768                 xpc_kthread_waitmsgs(part, ch);
 769         }
 770
 771         if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
 772                 spin_lock_irqsave(&ch->lock, irq_flags);
 773                 if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
 774                                 !(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
 775                         ch->flags |= XPC_C_DISCONNECTCALLOUT;
 776                         spin_unlock_irqrestore(&ch->lock, irq_flags);
 777
 778                         xpc_disconnect_callout(ch, xpcDisconnecting);
 779                 } else {
 780                         spin_unlock_irqrestore(&ch->lock, irq_flags);
 781                 }
 782                 if (atomic_dec_return(&part->nchannels_engaged) == 0) {
 783                         xpc_mark_partition_disengaged(part);
 784                         xpc_IPI_send_disengage(part);
 785                 }
 786         }
 787
 788
 789         xpc_msgqueue_deref(ch);
 790
 791         dev_dbg(xpc_chan, "kthread exiting, partid=%d, channel=%d\n",
 792                 partid, ch_number);
 793
 794         xpc_part_deref(part);
 795         return 0;
 796 }
 797
 798
 799 /*
 800  * For each partition that XPC has established communications with, there is
 801  * a minimum of one kernel thread assigned to perform any operation that
 802  * may potentially sleep or block (basically the callouts to the asynchronous
 803  * functions registered via xpc_connect()).
 804  *
 805  * Additional kthreads are created and destroyed by XPC as the workload
 806  * demands.
 807  *
 808  * A kthread is assigned to one of the active channels that exists for a given
 809  * partition.
 810  */
 811 void
 812 xpc_create_kthreads(struct xpc_channel *ch, int needed)
 813 {
 814         unsigned long irq_flags;
 815         pid_t pid;
 816         u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
 817         struct xpc_partition *part = &xpc_partitions[ch->partid];
 818
 819
 820         while (needed-- > 0) {
 821
 822                 /*
 823                  * The following is done on behalf of the newly created
 824                  * kthread. That kthread is responsible for doing the
 825                  * counterpart to the following before it exits.
 826                  */
 827                 (void) xpc_part_ref(part);
 828                 xpc_msgqueue_ref(ch);
 829                 if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
 830                     atomic_inc_return(&part->nchannels_engaged) == 1) {
 831                         xpc_mark_partition_engaged(part);
 832                 }
 833
 834                 pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
 835                 if (pid < 0) {
 836                         /* the fork failed */
 837                         if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
 838                             atomic_dec_return(&part->nchannels_engaged) == 0) {
 839                                 xpc_mark_partition_disengaged(part);
 840                                 xpc_IPI_send_disengage(part);
 841                         }
 842                         xpc_msgqueue_deref(ch);
 843                         xpc_part_deref(part);
 844
 845                         if (atomic_read(&ch->kthreads_assigned) <
 846                                                 ch->kthreads_idle_limit) {
 847                                 /*
 848                                  * Flag this as an error only if we have an
 849                                  * insufficient #of kthreads for the channel
 850                                  * to function.
 851                                  *
 852                                  * No xpc_msgqueue_ref() is needed here since
 853                                  * the channel mgr is doing this.
 854                                  */
 855                                 spin_lock_irqsave(&ch->lock, irq_flags);
 856                                 XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
 857                                                                 &irq_flags);
 858                                 spin_unlock_irqrestore(&ch->lock, irq_flags);
 859                         }
 860                         break;
 861                 }
 862
 863                 ch->kthreads_created++; // >>> temporary debug only!!!
 864         }
 865 }
 866
 867
 868 void
 869 xpc_disconnect_wait(int ch_number)
 870 {
 871         unsigned long irq_flags;
 872         partid_t partid;
 873         struct xpc_partition *part;
 874         struct xpc_channel *ch;
 875         int wakeup_channel_mgr;
 876
 877
 878         /* now wait for all callouts to the caller's function to cease */
 879         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 880                 part = &xpc_partitions[partid];
 881
 882                 if (!xpc_part_ref(part)) {
 883                         continue;
 884                 }
 885
 886                 ch = &part->channels[ch_number];
 887
 888                 if (!(ch->flags & XPC_C_WDISCONNECT)) {
 889                         xpc_part_deref(part);
 890                         continue;
 891                 }
 892
 893                 (void) down(&ch->wdisconnect_sema);
 894
 895                 spin_lock_irqsave(&ch->lock, irq_flags);
 896                 DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
 897                 wakeup_channel_mgr = 0;
 898
 899                 if (ch->delayed_IPI_flags) {
 900                         if (part->act_state != XPC_P_DEACTIVATING) {
 901                                 spin_lock(&part->IPI_lock);
 902                                 XPC_SET_IPI_FLAGS(part->local_IPI_amo,
 903                                         ch->number, ch->delayed_IPI_flags);
 904                                 spin_unlock(&part->IPI_lock);
 905                                 wakeup_channel_mgr = 1;
 906                         }
 907                         ch->delayed_IPI_flags = 0;
 908                 }
 909
 910                 ch->flags &= ~XPC_C_WDISCONNECT;
 911                 spin_unlock_irqrestore(&ch->lock, irq_flags);
 912
 913                 if (wakeup_channel_mgr) {
 914                         xpc_wakeup_channel_mgr(part);
 915                 }
 916
 917                 xpc_part_deref(part);
 918         }
 919 }
 920
 921
 922 static void
 923 xpc_do_exit(enum xpc_retval reason)
 924 {
 925         partid_t partid;
 926         int active_part_count, printed_waiting_msg = 0;
 927         struct xpc_partition *part;
 928         unsigned long printmsg_time, disengage_request_timeout = 0;
 929
 930
 931         /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
 932         DBUG_ON(xpc_exiting == 1);
 933
 934         /*
 935          * Let the heartbeat checker thread and the discovery thread
 936          * (if one is running) know that they should exit. Also wake up
 937          * the heartbeat checker thread in case it's sleeping.
 938          */
 939         xpc_exiting = 1;
 940         wake_up_interruptible(&xpc_act_IRQ_wq);
 941
 942         /* ignore all incoming interrupts */
 943         free_irq(SGI_XPC_ACTIVATE, NULL);
 944
 945         /* wait for the discovery thread to exit */
 946         down(&xpc_discovery_exited);
 947
 948         /* wait for the heartbeat checker thread to exit */
 949         down(&xpc_hb_checker_exited);
 950
 951
 952         /* sleep for a 1/3 of a second or so */
 953         (void) msleep_interruptible(300);
 954
 955
 956         /* wait for all partitions to become inactive */
 957
 958         printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
 959         xpc_disengage_request_timedout = 0;
 960
 961         do {
 962                 active_part_count = 0;
 963
 964                 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
 965                         part = &xpc_partitions[partid];
 966
 967                         if (xpc_partition_disengaged(part) &&
 968                                         part->act_state == XPC_P_INACTIVE) {
 969                                 continue;
 970                         }
 971
 972                         active_part_count++;
 973
 974                         XPC_DEACTIVATE_PARTITION(part, reason);
 975
 976                         if (part->disengage_request_timeout >
 977                                                 disengage_request_timeout) {
 978                                 disengage_request_timeout =
 979                                                 part->disengage_request_timeout;
 980                         }
 981                 }
 982
 983                 if (xpc_partition_engaged(-1UL)) {
 984                         if (time_after(jiffies, printmsg_time)) {
 985                                 dev_info(xpc_part, "waiting for remote "
 986                                         "partitions to disengage, timeout in "
 987                                         "%ld seconds\n",
 988                                         (disengage_request_timeout - jiffies)
 989                                                                         / HZ);
 990                                 printmsg_time = jiffies +
 991                                         (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
 992                                 printed_waiting_msg = 1;
 993                         }
 994
 995                 } else if (active_part_count > 0) {
 996                         if (printed_waiting_msg) {
 997                                 dev_info(xpc_part, "waiting for local partition"
 998                                         " to disengage\n");
 999                                 printed_waiting_msg = 0;
1000                         }
1001
1002                 } else {
1003                         if (!xpc_disengage_request_timedout) {
1004                                 dev_info(xpc_part, "all partitions have "
1005                                         "disengaged\n");
1006                         }
1007                         break;
1008                 }
1009
1010                 /* sleep for a 1/3 of a second or so */
1011                 (void) msleep_interruptible(300);
1012
1013         } while (1);
1014
1015         DBUG_ON(xpc_partition_engaged(-1UL));
1016
1017
1018         /* indicate to others that our reserved page is uninitialized */
1019         xpc_rsvd_page->vars_pa = 0;
1020
1021         /* now it's time to eliminate our heartbeat */
1022         del_timer_sync(&xpc_hb_timer);
1023         DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
1024
1025         if (reason == xpcUnloading) {
1026                 /* take ourselves off of the reboot_notifier_list */
1027                 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1028
1029                 /* take ourselves off of the die_notifier list */
1030                 (void) unregister_die_notifier(&xpc_die_notifier);
1031         }
1032
1033         /* close down protections for IPI operations */
1034         xpc_restrict_IPI_ops();
1035
1036
1037         /* clear the interface to XPC's functions */
1038         xpc_clear_interface();
1039
1040         if (xpc_sysctl) {
1041                 unregister_sysctl_table(xpc_sysctl);
1042         }
1043 }
1044
1045
1046 /*
1047  * Called when the system is about to be either restarted or halted.
1048  */
1049 static void
1050 xpc_die_disengage(void)
1051 {
1052         struct xpc_partition *part;
1053         partid_t partid;
1054         unsigned long engaged;
1055         long time, printmsg_time, disengage_request_timeout;
1056
1057
1058         /* keep xpc_hb_checker thread from doing anything (just in case) */
1059         xpc_exiting = 1;
1060
1061         xpc_vars->heartbeating_to_mask = 0;  /* indicate we're deactivated */
1062
1063         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1064                 part = &xpc_partitions[partid];
1065
1066                 if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
1067                                                         remote_vars_version)) {
1068
1069                         /* just in case it was left set by an earlier XPC */
1070                         xpc_clear_partition_engaged(1UL << partid);
1071                         continue;
1072                 }
1073
1074                 if (xpc_partition_engaged(1UL << partid) ||
1075                                         part->act_state != XPC_P_INACTIVE) {
1076                         xpc_request_partition_disengage(part);
1077                         xpc_mark_partition_disengaged(part);
1078                         xpc_IPI_send_disengage(part);
1079                 }
1080         }
1081
1082         time = rtc_time();
1083         printmsg_time = time +
1084                 (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
1085         disengage_request_timeout = time +
1086                 (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
1087
1088         /* wait for all other partitions to disengage from us */
1089
1090         while (1) {
1091                 engaged = xpc_partition_engaged(-1UL);
1092                 if (!engaged) {
1093                         dev_info(xpc_part, "all partitions have disengaged\n");
1094                         break;
1095                 }
1096
1097                 time = rtc_time();
1098                 if (time >= disengage_request_timeout) {
1099                         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1100                                 if (engaged & (1UL << partid)) {
1101                                         dev_info(xpc_part, "disengage from "
1102                                                 "remote partition %d timed "
1103                                                 "out\n", partid);
1104                                 }
1105                         }
1106                         break;
1107                 }
1108
1109                 if (time >= printmsg_time) {
1110                         dev_info(xpc_part, "waiting for remote partitions to "
1111                                 "disengage, timeout in %ld seconds\n",
1112                                 (disengage_request_timeout - time) /
1113                                                 sn_rtc_cycles_per_second);
1114                         printmsg_time = time +
1115                                         (XPC_DISENGAGE_PRINTMSG_INTERVAL *
1116                                                 sn_rtc_cycles_per_second);
1117                 }
1118         }
1119 }
1120
1121
1122 /*
1123  * This function is called when the system is being rebooted.
1124  */
1125 static int
1126 xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1127 {
1128         enum xpc_retval reason;
1129
1130
1131         switch (event) {
1132         case SYS_RESTART:
1133                 reason = xpcSystemReboot;
1134                 break;
1135         case SYS_HALT:
1136                 reason = xpcSystemHalt;
1137                 break;
1138         case SYS_POWER_OFF:
1139                 reason = xpcSystemPoweroff;
1140                 break;
1141         default:
1142                 reason = xpcSystemGoingDown;
1143         }
1144
1145         xpc_do_exit(reason);
1146         return NOTIFY_DONE;
1147 }
1148
1149
1150 /*
1151  * This function is called when the system is being rebooted.
1152  */
1153 static int
1154 xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1155 {
1156         switch (event) {
1157         case DIE_MACHINE_RESTART:
1158         case DIE_MACHINE_HALT:
1159                 xpc_die_disengage();
1160                 break;
1161         case DIE_MCA_MONARCH_ENTER:
1162         case DIE_INIT_MONARCH_ENTER:
1163                 xpc_vars->heartbeat++;
1164                 xpc_vars->heartbeat_offline = 1;
1165                 break;
1166         case DIE_MCA_MONARCH_LEAVE:
1167         case DIE_INIT_MONARCH_LEAVE:
1168                 xpc_vars->heartbeat++;
1169                 xpc_vars->heartbeat_offline = 0;
1170                 break;
1171         }
1172
1173         return NOTIFY_DONE;
1174 }
1175
1176
1177 int __init
1178 xpc_init(void)
1179 {
1180         int ret;
1181         partid_t partid;
1182         struct xpc_partition *part;
1183         pid_t pid;
1184
1185
1186         if (!ia64_platform_is("sn2")) {
1187                 return -ENODEV;
1188         }
1189
1190         /*
1191          * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
1192          * various portions of a partition's reserved page. Its size is based
1193          * on the size of the reserved page header and part_nasids mask. So we
1194          * need to ensure that the other items will fit as well.
1195          */
1196         if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
1197                 dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
1198                 return -EPERM;
1199         }
1200         DBUG_ON((u64) xpc_remote_copy_buffer !=
1201                                 L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
1202
1203         snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
1204         snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
1205
1206         xpc_sysctl = register_sysctl_table(xpc_sys_dir, 1);
1207
1208         /*
1209          * The first few fields of each entry of xpc_partitions[] need to
1210          * be initialized now so that calls to xpc_connect() and
1211          * xpc_disconnect() can be made prior to the activation of any remote
1212          * partition. NOTE THAT NONE OF THE OTHER FIELDS BELONGING TO THESE
1213          * ENTRIES ARE MEANINGFUL UNTIL AFTER AN ENTRY'S CORRESPONDING
1214          * PARTITION HAS BEEN ACTIVATED.
1215          */
1216         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1217                 part = &xpc_partitions[partid];
1218
1219                 DBUG_ON((u64) part != L1_CACHE_ALIGN((u64) part));
1220
1221                 part->act_IRQ_rcvd = 0;
1222                 spin_lock_init(&part->act_lock);
1223                 part->act_state = XPC_P_INACTIVE;
1224                 XPC_SET_REASON(part, 0, 0);
1225
1226                 init_timer(&part->disengage_request_timer);
1227                 part->disengage_request_timer.function =
1228                                 xpc_timeout_partition_disengage_request;
1229                 part->disengage_request_timer.data = (unsigned long) part;
1230
1231                 part->setup_state = XPC_P_UNSET;
1232                 init_waitqueue_head(&part->teardown_wq);
1233                 atomic_set(&part->references, 0);
1234         }
1235
1236         /*
1237          * Open up protections for IPI operations (and AMO operations on
1238          * Shub 1.1 systems).
1239          */
1240         xpc_allow_IPI_ops();
1241
1242         /*
1243          * Interrupts being processed will increment this atomic variable and
1244          * awaken the heartbeat thread which will process the interrupts.
1245          */
1246         atomic_set(&xpc_act_IRQ_rcvd, 0);
1247
1248         /*
1249          * This is safe to do before the xpc_hb_checker thread has started
1250          * because the handler releases a wait queue.  If an interrupt is
1251          * received before the thread is waiting, it will not go to sleep,
1252          * but rather immediately process the interrupt.
1253          */
1254         ret = request_irq(SGI_XPC_ACTIVATE, xpc_act_IRQ_handler, 0,
1255                                                         "xpc hb", NULL);
1256         if (ret != 0) {
1257                 dev_err(xpc_part, "can't register ACTIVATE IRQ handler, "
1258                         "errno=%d\n", -ret);
1259
1260                 xpc_restrict_IPI_ops();
1261
1262                 if (xpc_sysctl) {
1263                         unregister_sysctl_table(xpc_sysctl);
1264                 }
1265                 return -EBUSY;
1266         }
1267
1268         /*
1269          * Fill the partition reserved page with the information needed by
1270          * other partitions to discover we are alive and establish initial
1271          * communications.
1272          */
1273         xpc_rsvd_page = xpc_rsvd_page_init();
1274         if (xpc_rsvd_page == NULL) {
1275                 dev_err(xpc_part, "could not setup our reserved page\n");
1276
1277                 free_irq(SGI_XPC_ACTIVATE, NULL);
1278                 xpc_restrict_IPI_ops();
1279
1280                 if (xpc_sysctl) {
1281                         unregister_sysctl_table(xpc_sysctl);
1282                 }
1283                 return -EBUSY;
1284         }
1285
1286
1287         /* add ourselves to the reboot_notifier_list */
1288         ret = register_reboot_notifier(&xpc_reboot_notifier);
1289         if (ret != 0) {
1290                 dev_warn(xpc_part, "can't register reboot notifier\n");
1291         }
1292
1293         /* add ourselves to the die_notifier list (i.e., ia64die_chain) */
1294         ret = register_die_notifier(&xpc_die_notifier);
1295         if (ret != 0) {
1296                 dev_warn(xpc_part, "can't register die notifier\n");
1297         }
1298
1299
1300         /*
1301          * Set the beating to other partitions into motion.  This is
1302          * the last requirement for other partitions' discovery to
1303          * initiate communications with us.
1304          */
1305         init_timer(&xpc_hb_timer);
1306         xpc_hb_timer.function = xpc_hb_beater;
1307         xpc_hb_beater(0);
1308
1309
1310         /*
1311          * The real work-horse behind xpc.  This processes incoming
1312          * interrupts and monitors remote heartbeats.
1313          */
1314         pid = kernel_thread(xpc_hb_checker, NULL, 0);
1315         if (pid < 0) {
1316                 dev_err(xpc_part, "failed while forking hb check thread\n");
1317
1318                 /* indicate to others that our reserved page is uninitialized */
1319                 xpc_rsvd_page->vars_pa = 0;
1320
1321                 /* take ourselves off of the reboot_notifier_list */
1322                 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1323
1324                 /* take ourselves off of the die_notifier list */
1325                 (void) unregister_die_notifier(&xpc_die_notifier);
1326
1327                 del_timer_sync(&xpc_hb_timer);
1328                 free_irq(SGI_XPC_ACTIVATE, NULL);
1329                 xpc_restrict_IPI_ops();
1330
1331                 if (xpc_sysctl) {
1332                         unregister_sysctl_table(xpc_sysctl);
1333                 }
1334                 return -EBUSY;
1335         }
1336
1337
1338         /*
1339          * Startup a thread that will attempt to discover other partitions to
1340          * activate based on info provided by SAL. This new thread is short
1341          * lived and will exit once discovery is complete.
1342          */
1343         pid = kernel_thread(xpc_initiate_discovery, NULL, 0);
1344         if (pid < 0) {
1345                 dev_err(xpc_part, "failed while forking discovery thread\n");
1346
1347                 /* mark this new thread as a non-starter */
1348                 up(&xpc_discovery_exited);
1349
1350                 xpc_do_exit(xpcUnloading);
1351                 return -EBUSY;
1352         }
1353
1354
1355         /* set the interface to point at XPC's functions */
1356         xpc_set_interface(xpc_initiate_connect, xpc_initiate_disconnect,
1357                           xpc_initiate_allocate, xpc_initiate_send,
1358                           xpc_initiate_send_notify, xpc_initiate_received,
1359                           xpc_initiate_partid_to_nasids);
1360
1361         return 0;
1362 }
1363 module_init(xpc_init);
1364
1365
1366 void __exit
1367 xpc_exit(void)
1368 {
1369         xpc_do_exit(xpcUnloading);
1370 }
1371 module_exit(xpc_exit);
1372
1373
1374 MODULE_AUTHOR("Silicon Graphics, Inc.");
1375 MODULE_DESCRIPTION("Cross Partition Communication (XPC) support");
1376 MODULE_LICENSE("GPL");
1377
1378 module_param(xpc_hb_interval, int, 0);
1379 MODULE_PARM_DESC(xpc_hb_interval, "Number of seconds between "
1380                 "heartbeat increments.");
1381
1382 module_param(xpc_hb_check_interval, int, 0);
1383 MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
1384                 "heartbeat checks.");
1385
1386 module_param(xpc_disengage_request_timelimit, int, 0);
1387 MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
1388                 "for disengage request to complete.");
1389