2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * Copyright (C) 1992 - 1997, 2000-2002 Silicon Graphics, Inc. All rights reserved.
8 #ifndef _ASM_IA64_SN_IOERROR_HANDLING_H
9 #define _ASM_IA64_SN_IOERROR_HANDLING_H
11 #include <linux/config.h>
12 #include <linux/types.h>
13 #include <linux/devfs_fs_kernel.h>
14 #include <asm/sn/sgi.h>
19 * Basic types required for io error handling interfaces.
23 * Return code from the io error handling interfaces.
26 enum error_return_code_e {
28 ERROR_RETURN_CODE_SUCCESS,
31 ERROR_RETURN_CODE_GENERAL_FAILURE,
33 /* Nth error noticed while handling the first error */
34 ERROR_RETURN_CODE_NESTED_CALL,
36 /* State of the vertex is invalid */
37 ERROR_RETURN_CODE_INVALID_STATE,
40 ERROR_RETURN_CODE_INVALID_ACTION,
42 /* Valid action but not cannot set it */
43 ERROR_RETURN_CODE_CANNOT_SET_ACTION,
45 /* Valid action but not possible for the current state */
46 ERROR_RETURN_CODE_CANNOT_PERFORM_ACTION,
48 /* Valid state but cannot change the state of the vertex to it */
49 ERROR_RETURN_CODE_CANNOT_SET_STATE,
52 ERROR_RETURN_CODE_DUPLICATE,
54 /* Reached the root of the system critical graph */
55 ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_BEGIN,
57 /* Reached the leaf of the system critical graph */
58 ERROR_RETURN_CODE_SYS_CRITICAL_GRAPH_ADD,
60 /* Cannot shutdown the device in hw/sw */
61 ERROR_RETURN_CODE_SHUTDOWN_FAILED,
63 /* Cannot restart the device in hw/sw */
64 ERROR_RETURN_CODE_RESET_FAILED,
66 /* Cannot failover the io subsystem */
67 ERROR_RETURN_CODE_FAILOVER_FAILED,
69 /* No Jump Buffer exists */
70 ERROR_RETURN_CODE_NO_JUMP_BUFFER
73 typedef uint64_t error_return_code_t;
76 * State of the vertex during error handling.
85 /* Trying to decipher the error bits */
88 /* Trying to carryout the action decided upon after
89 * looking at the error bits
93 /* Donot allow any other operations to this vertex from
94 * other parts of the kernel. This is also used to indicate
95 * that the device has been software shutdown.
99 /* This is a transitory state when no new requests are accepted
100 * on behalf of the device. This is usually used when trying to
101 * quiesce all the outstanding operations and preparing the
102 * device for a failover / shutdown etc.
104 ERROR_STATE_SHUTDOWN_IN_PROGRESS,
106 /* This is the state when there is absolutely no activity going
109 ERROR_STATE_SHUTDOWN_COMPLETE,
111 /* This is the state when the device has issued a retry. */
114 /* This is the normal state. This can also be used to indicate
115 * that the device has been software-enabled after software-
116 * shutting down previously.
122 typedef uint64_t error_state_t;
125 * Generic error classes. This is used to classify errors after looking
126 * at the error bits and helpful in deciding on the action.
129 /* Unclassified error */
132 /* LLP transmit error */
133 ERROR_CLASS_LLP_XMIT,
135 /* LLP receive error */
136 ERROR_CLASS_LLP_RECV,
147 /* System coherency error */
148 ERROR_CLASS_SYS_COHERENCY,
150 /* Bad data error (ecc / parity etc) */
151 ERROR_CLASS_BAD_DATA,
153 /* Illegal request packet */
154 ERROR_CLASS_BAD_REQ_PKT,
156 /* Illegal response packet */
157 ERROR_CLASS_BAD_RESP_PKT
160 typedef uint64_t error_class_t;
164 * Error context which the error action can use.
166 typedef void *error_context_t;
167 #define ERROR_CONTEXT_IGNORE ((error_context_t)-1ll)
173 typedef error_return_code_t (*error_action_f)( error_context_t);
174 #define ERROR_ACTION_IGNORE ((error_action_f)-1ll)
176 /* Typical set of error actions */
177 typedef struct error_action_set_s {
178 error_action_f eas_panic;
179 error_action_f eas_shutdown;
180 error_action_f eas_abort;
181 error_action_f eas_retry;
182 error_action_f eas_failover;
183 error_action_f eas_log_n_ignore;
184 error_action_f eas_reset;
185 } error_action_set_t;
188 /* Set of priorites for in case mutliple error actions/states
189 * are trying to be prescribed for a device.
190 * NOTE : The ordering below encapsulates the priorities. Highest value
191 * corresponds to highest priority.
193 enum error_priority_e {
194 ERROR_PRIORITY_IGNORE,
196 ERROR_PRIORITY_NORMAL,
198 ERROR_PRIORITY_FAILOVER,
199 ERROR_PRIORITY_RETRY,
200 ERROR_PRIORITY_ABORT,
201 ERROR_PRIORITY_SHUTDOWN,
202 ERROR_PRIORITY_RESTART,
206 typedef uint64_t error_priority_t;
208 /* Error state interfaces */
209 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
210 extern error_return_code_t error_state_set(devfs_handle_t,error_state_t);
211 extern error_state_t error_state_get(devfs_handle_t);
214 /* System critical graph interfaces */
216 extern boolean_t is_sys_critical_vertex(devfs_handle_t);
217 extern devfs_handle_t sys_critical_first_child_get(devfs_handle_t);
218 extern devfs_handle_t sys_critical_next_child_get(devfs_handle_t);
219 extern devfs_handle_t sys_critical_parent_get(devfs_handle_t);
220 extern error_return_code_t sys_critical_graph_vertex_add(devfs_handle_t,
223 /* Error action interfaces */
225 extern error_return_code_t error_action_set(devfs_handle_t,
229 extern error_return_code_t error_action_perform(devfs_handle_t);
232 #define INFO_LBL_ERROR_SKIP_ENV "error_skip_env"
234 #define v_error_skip_env_get(v, l) \
235 hwgraph_info_get_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t *)&l)
237 #define v_error_skip_env_set(v, l, r) \
239 hwgraph_info_replace_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l,0) :\
240 hwgraph_info_add_LBL(v, INFO_LBL_ERROR_SKIP_ENV, (arbitrary_info_t)l))
242 #define v_error_skip_env_clear(v) \
243 hwgraph_info_remove_LBL(v, INFO_LBL_ERROR_SKIP_ENV, 0)
245 /* Skip point interfaces */
246 extern error_return_code_t error_skip_point_jump(devfs_handle_t, boolean_t);
247 extern error_return_code_t error_skip_point_clear(devfs_handle_t);
250 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
253 error_skip_point_mark(devfs_handle_t v)
255 label_t *error_env = NULL;
258 /* Check if we have a valid hwgraph vertex */
260 if (!dev_is_vertex(v))
264 /* There is no error jump buffer for this device vertex. Allocate
267 if (v_error_skip_env_get(v, error_env) != GRAPH_SUCCESS) {
268 error_env = snia_kmem_zalloc(sizeof(label_t), KM_NOSLEEP);
269 /* Unable to allocate memory for jum buffer. This should
270 * be a very rare occurrence.
274 /* Store the jump buffer information on the vertex.*/
275 if (v_error_skip_env_set(v, error_env, 0) != GRAPH_SUCCESS)
278 ASSERT(v_error_skip_env_get(v, error_env) == GRAPH_SUCCESS);
279 code = setjmp(*error_env);
282 #endif /* CONFIG_SGI_IO_ERROR_HANDLING */
284 typedef uint64_t counter_t;
286 extern counter_t error_retry_count_get(devfs_handle_t);
287 extern error_return_code_t error_retry_count_set(devfs_handle_t,counter_t);
288 extern counter_t error_retry_count_increment(devfs_handle_t);
289 extern counter_t error_retry_count_decrement(devfs_handle_t);
291 /* Except for the PIO Read error typically the other errors are handled in
292 * the context of an asynchronous error interrupt.
294 #define IS_ERROR_INTR_CONTEXT(_ec) ((_ec & IOECODE_DMA) || \
295 (_ec == IOECODE_PIO_WRITE))
297 /* Some convenience macros on device state. This state is accessed only
298 * thru the calls the io error handling layer.
300 #if defined(CONFIG_SGI_IO_ERROR_HANDLING)
301 extern boolean_t is_device_shutdown(devfs_handle_t);
302 #define IS_DEVICE_SHUTDOWN(_d) (is_device_shutdown(_d))
305 #endif /* __KERNEL__ */
306 #endif /* _ASM_IA64_SN_IOERROR_HANDLING_H */