* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc. All Rights Reserved.
*/
#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/reboot.h>
+#include <linux/completion.h>
#include <asm/sn/intr.h>
#include <asm/sn/sn_sal.h>
#include <asm/kdebug.h>
#include <asm/uaccess.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
/* define two XPC debug device structures to be used with dev_dbg() et al */
struct device *xpc_chan = &xpc_chan_dbg_subname;
+static int xpc_kdebug_ignore;
+
+
/* systune related variables for /proc/sys directories */
static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
};
static struct ctl_table_header *xpc_sysctl;
+/* non-zero if any remote partition disengage request was timed out */
+int xpc_disengage_request_timedout;
/* #of IRQs received */
static atomic_t xpc_act_IRQ_rcvd;
static unsigned long xpc_hb_check_timeout;
/* notification that the xpc_hb_checker thread has exited */
-static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
/* notification that the xpc_discovery thread has exited */
-static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
+static DECLARE_COMPLETION(xpc_discovery_exited);
static struct timer_list xpc_hb_timer;
/* mark this thread as having exited */
- up(&xpc_hb_checker_exited);
+ complete(&xpc_hb_checker_exited);
return 0;
}
dev_dbg(xpc_part, "discovery thread is exiting\n");
/* mark this thread as having exited */
- up(&xpc_discovery_exited);
+ complete(&xpc_discovery_exited);
return 0;
}
spin_lock_irqsave(&part->act_lock, irq_flags);
- pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
-
DBUG_ON(part->act_state != XPC_P_INACTIVE);
- if (pid > 0) {
- part->act_state = XPC_P_ACTIVATION_REQ;
- XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
- } else {
- XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
- }
+ part->act_state = XPC_P_ACTIVATION_REQ;
+ XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+ pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
+
+ if (unlikely(pid <= 0)) {
+ spin_lock_irqsave(&part->act_lock, irq_flags);
+ part->act_state = XPC_P_INACTIVE;
+ XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
+ spin_unlock_irqrestore(&part->act_lock, irq_flags);
+ }
}
/* let registerer know that connection has been established */
spin_lock_irqsave(&ch->lock, irq_flags);
- if (!(ch->flags & XPC_C_CONNECTCALLOUT)) {
- ch->flags |= XPC_C_CONNECTCALLOUT;
+ if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+ ch->flags |= XPC_C_CONNECTEDCALLOUT;
spin_unlock_irqrestore(&ch->lock, irq_flags);
xpc_connected_callout(ch);
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
+
/*
* It is possible that while the callout was being
* made that the remote partition sent some messages.
if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
spin_lock_irqsave(&ch->lock, irq_flags);
- if ((ch->flags & XPC_C_CONNECTCALLOUT) &&
- !(ch->flags & XPC_C_DISCONNECTCALLOUT)) {
- ch->flags |= XPC_C_DISCONNECTCALLOUT;
+ if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+ !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+ ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
spin_unlock_irqrestore(&ch->lock, irq_flags);
- xpc_disconnecting_callout(ch);
- } else {
- spin_unlock_irqrestore(&ch->lock, irq_flags);
+ xpc_disconnect_callout(ch, xpcDisconnecting);
+
+ spin_lock_irqsave(&ch->lock, irq_flags);
+ ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
}
+ spin_unlock_irqrestore(&ch->lock, irq_flags);
if (atomic_dec_return(&part->nchannels_engaged) == 0) {
xpc_mark_partition_disengaged(part);
xpc_IPI_send_disengage(part);
continue;
}
- (void) down(&ch->wdisconnect_sema);
+ wait_for_completion(&ch->wdisconnect_wait);
spin_lock_irqsave(&ch->lock, irq_flags);
DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
xpc_do_exit(enum xpc_retval reason)
{
partid_t partid;
- int active_part_count;
+ int active_part_count, printed_waiting_msg = 0;
struct xpc_partition *part;
- unsigned long printmsg_time;
+ unsigned long printmsg_time, disengage_request_timeout = 0;
/* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
free_irq(SGI_XPC_ACTIVATE, NULL);
/* wait for the discovery thread to exit */
- down(&xpc_discovery_exited);
+ wait_for_completion(&xpc_discovery_exited);
/* wait for the heartbeat checker thread to exit */
- down(&xpc_hb_checker_exited);
+ wait_for_completion(&xpc_hb_checker_exited);
/* sleep for a 1/3 of a second or so */
/* wait for all partitions to become inactive */
- printmsg_time = jiffies;
+ printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ xpc_disengage_request_timedout = 0;
do {
active_part_count = 0;
active_part_count++;
XPC_DEACTIVATE_PARTITION(part, reason);
- }
- if (active_part_count == 0) {
- break;
+ if (part->disengage_request_timeout >
+ disengage_request_timeout) {
+ disengage_request_timeout =
+ part->disengage_request_timeout;
+ }
}
- if (jiffies >= printmsg_time) {
- dev_info(xpc_part, "waiting for partitions to "
- "deactivate/disengage, active count=%d, remote "
- "engaged=0x%lx\n", active_part_count,
- xpc_partition_engaged(1UL << partid));
-
- printmsg_time = jiffies +
+ if (xpc_partition_engaged(-1UL)) {
+ if (time_after(jiffies, printmsg_time)) {
+ dev_info(xpc_part, "waiting for remote "
+ "partitions to disengage, timeout in "
+ "%ld seconds\n",
+ (disengage_request_timeout - jiffies)
+ / HZ);
+ printmsg_time = jiffies +
(XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+ printed_waiting_msg = 1;
+ }
+
+ } else if (active_part_count > 0) {
+ if (printed_waiting_msg) {
+ dev_info(xpc_part, "waiting for local partition"
+ " to disengage\n");
+ printed_waiting_msg = 0;
+ }
+
+ } else {
+ if (!xpc_disengage_request_timedout) {
+ dev_info(xpc_part, "all partitions have "
+ "disengaged\n");
+ }
+ break;
}
/* sleep for a 1/3 of a second or so */
del_timer_sync(&xpc_hb_timer);
DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
- /* take ourselves off of the reboot_notifier_list */
- (void) unregister_reboot_notifier(&xpc_reboot_notifier);
+ if (reason == xpcUnloading) {
+ /* take ourselves off of the reboot_notifier_list */
+ (void) unregister_reboot_notifier(&xpc_reboot_notifier);
- /* take ourselves off of the die_notifier list */
- (void) unregister_die_notifier(&xpc_die_notifier);
+ /* take ourselves off of the die_notifier list */
+ (void) unregister_die_notifier(&xpc_die_notifier);
+ }
/* close down protections for IPI operations */
xpc_restrict_IPI_ops();
/*
- * Called when the system is about to be either restarted or halted.
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+ enum xpc_retval reason;
+
+
+ switch (event) {
+ case SYS_RESTART:
+ reason = xpcSystemReboot;
+ break;
+ case SYS_HALT:
+ reason = xpcSystemHalt;
+ break;
+ case SYS_POWER_OFF:
+ reason = xpcSystemPoweroff;
+ break;
+ default:
+ reason = xpcSystemGoingDown;
+ }
+
+ xpc_do_exit(reason);
+ return NOTIFY_DONE;
+}
+
+
+/*
+ * Notify other partitions to disengage from all references to our memory.
*/
static void
xpc_die_disengage(void)
struct xpc_partition *part;
partid_t partid;
unsigned long engaged;
- long time, print_time, disengage_request_timeout;
+ long time, printmsg_time, disengage_request_timeout;
/* keep xpc_hb_checker thread from doing anything (just in case) */
}
}
- print_time = rtc_time();
- disengage_request_timeout = print_time +
+ time = rtc_time();
+ printmsg_time = time +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
+ disengage_request_timeout = time +
(xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
/* wait for all other partitions to disengage from us */
- while ((engaged = xpc_partition_engaged(-1UL)) &&
- (time = rtc_time()) < disengage_request_timeout) {
+ while (1) {
+ engaged = xpc_partition_engaged(-1UL);
+ if (!engaged) {
+ dev_info(xpc_part, "all partitions have disengaged\n");
+ break;
+ }
- if (time >= print_time) {
+ time = rtc_time();
+ if (time >= disengage_request_timeout) {
+ for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+ if (engaged & (1UL << partid)) {
+ dev_info(xpc_part, "disengage from "
+ "remote partition %d timed "
+ "out\n", partid);
+ }
+ }
+ break;
+ }
+
+ if (time >= printmsg_time) {
dev_info(xpc_part, "waiting for remote partitions to "
- "disengage, engaged=0x%lx\n", engaged);
- print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL *
+ "disengage, timeout in %ld seconds\n",
+ (disengage_request_timeout - time) /
+ sn_rtc_cycles_per_second);
+ printmsg_time = time +
+ (XPC_DISENGAGE_PRINTMSG_INTERVAL *
sn_rtc_cycles_per_second);
}
}
- dev_info(xpc_part, "finished waiting for remote partitions to "
- "disengage, engaged=0x%lx\n", engaged);
}
/*
- * This function is called when the system is being rebooted.
- */
-static int
-xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
-{
- enum xpc_retval reason;
-
-
- switch (event) {
- case SYS_RESTART:
- reason = xpcSystemReboot;
- break;
- case SYS_HALT:
- reason = xpcSystemHalt;
- break;
- case SYS_POWER_OFF:
- reason = xpcSystemPoweroff;
- break;
- default:
- reason = xpcSystemGoingDown;
- }
-
- xpc_do_exit(reason);
- return NOTIFY_DONE;
-}
-
-
-/*
- * This function is called when the system is being rebooted.
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
*/
static int
xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
case DIE_MACHINE_HALT:
xpc_die_disengage();
break;
+
+ case DIE_KDEBUG_ENTER:
+ /* Should lack of heartbeat be ignored by other partitions? */
+ if (!xpc_kdebug_ignore) {
+ break;
+ }
+ /* fall through */
case DIE_MCA_MONARCH_ENTER:
case DIE_INIT_MONARCH_ENTER:
xpc_vars->heartbeat++;
xpc_vars->heartbeat_offline = 1;
break;
+
+ case DIE_KDEBUG_LEAVE:
+ /* Is lack of heartbeat being ignored by other partitions? */
+ if (!xpc_kdebug_ignore) {
+ break;
+ }
+ /* fall through */
case DIE_MCA_MONARCH_LEAVE:
case DIE_INIT_MONARCH_LEAVE:
xpc_vars->heartbeat++;
dev_err(xpc_part, "failed while forking discovery thread\n");
/* mark this new thread as a non-starter */
- up(&xpc_discovery_exited);
+ complete(&xpc_discovery_exited);
xpc_do_exit(xpcUnloading);
return -EBUSY;
MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
"for disengage request to complete.");
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+ "other partitions when dropping into kdebug.");
+