include upstream ip1000a driver version 2.09f
[linux-2.4.git] / drivers / message / i2o / i2o_block.c
1 /*
2  * I2O Random Block Storage Class OSM
3  *
4  * (C) Copyright 1999 Red Hat Software
5  *      
6  * Written by Alan Cox, Building Number Three Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License
10  * as published by the Free Software Foundation; either version
11  * 2 of the License, or (at your option) any later version.
12  *
13  * This is a beta test release. Most of the good code was taken
14  * from the nbd driver by Pavel Machek, who in turn took some of it
15  * from loop.c. Isn't free software great for reusability 8)
16  *
17  * Fixes/additions:
18  *      Steve Ralston:  
19  *              Multiple device handling error fixes,
20  *              Added a queue depth.
21  *      Alan Cox:       
22  *              FC920 has an rmw bug. Dont or in the end marker.
23  *              Removed queue walk, fixed for 64bitness.
24  *              Rewrote much of the code over time
25  *              Added indirect block lists
26  *              Handle 64K limits on many controllers
27  *              Don't use indirects on the Promise (breaks)
28  *              Heavily chop down the queue depths
29  *      Deepak Saxena:
30  *              Independent queues per IOP
31  *              Support for dynamic device creation/deletion
32  *              Code cleanup    
33  *              Support for larger I/Os through merge* functions 
34  *              (taken from DAC960 driver)
35  *      Boji T Kannanthanam:
36  *              Set the I2O Block devices to be detected in increasing 
37  *              order of TIDs during boot.
38  *              Search and set the I2O block device that we boot off from  as
39  *              the first device to be claimed (as /dev/i2o/hda)
40  *              Properly attach/detach I2O gendisk structure from the system
41  *              gendisk list. The I2O block devices now appear in 
42  *              /proc/partitions.
43  *
44  *      To do:
45  *              Serial number scanning to find duplicates for FC multipathing
46  */
47
48 #include <linux/major.h>
49
50 #include <linux/module.h>
51
52 #include <linux/sched.h>
53 #include <linux/fs.h>
54 #include <linux/stat.h>
55 #include <linux/pci.h>
56 #include <linux/errno.h>
57 #include <linux/file.h>
58 #include <linux/ioctl.h>
59 #include <linux/i2o.h>
60 #include <linux/blkdev.h>
61 #include <linux/blkpg.h>
62 #include <linux/slab.h>
63 #include <linux/hdreg.h>
64 #include <linux/spinlock.h>
65
66 #include <linux/notifier.h>
67 #include <linux/reboot.h>
68
69 #include <asm/uaccess.h>
70 #include <asm/semaphore.h>
71 #include <linux/completion.h>
72 #include <asm/io.h>
73 #include <asm/atomic.h>
74 #include <linux/smp_lock.h>
75 #include <linux/wait.h>
76
77 #define MAJOR_NR I2O_MAJOR
78
79 #include <linux/blk.h>
80
81 #define MAX_I2OB        16
82
83 #define MAX_I2OB_DEPTH  8
84 #define MAX_I2OB_RETRIES 4
85
86 //#define DRIVERDEBUG
87 #ifdef DRIVERDEBUG
88 #define DEBUG( s ) printk( s )
89 #else
90 #define DEBUG( s )
91 #endif
92
93 /*
94  * Events that this OSM is interested in
95  */
96 #define I2OB_EVENT_MASK         (I2O_EVT_IND_BSA_VOLUME_LOAD |  \
97                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD | \
98                                  I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ | \
99                                  I2O_EVT_IND_BSA_CAPACITY_CHANGE | \
100                                  I2O_EVT_IND_BSA_SCSI_SMART )
101
102
103 /*
104  * I2O Block Error Codes - should be in a header file really...
105  */
106 #define I2O_BSA_DSC_SUCCESS             0x0000
107 #define I2O_BSA_DSC_MEDIA_ERROR         0x0001
108 #define I2O_BSA_DSC_ACCESS_ERROR        0x0002
109 #define I2O_BSA_DSC_DEVICE_FAILURE      0x0003
110 #define I2O_BSA_DSC_DEVICE_NOT_READY    0x0004
111 #define I2O_BSA_DSC_MEDIA_NOT_PRESENT   0x0005
112 #define I2O_BSA_DSC_MEDIA_LOCKED        0x0006
113 #define I2O_BSA_DSC_MEDIA_FAILURE       0x0007
114 #define I2O_BSA_DSC_PROTOCOL_FAILURE    0x0008
115 #define I2O_BSA_DSC_BUS_FAILURE         0x0009
116 #define I2O_BSA_DSC_ACCESS_VIOLATION    0x000A
117 #define I2O_BSA_DSC_WRITE_PROTECTED     0x000B
118 #define I2O_BSA_DSC_DEVICE_RESET        0x000C
119 #define I2O_BSA_DSC_VOLUME_CHANGED      0x000D
120 #define I2O_BSA_DSC_TIMEOUT             0x000E
121
122 /*
123  *      Some of these can be made smaller later
124  */
125
126 static int i2ob_blksizes[MAX_I2OB<<4];
127 static int i2ob_hardsizes[MAX_I2OB<<4];
128 static int i2ob_sizes[MAX_I2OB<<4];
129 static int i2ob_media_change_flag[MAX_I2OB];
130 static u32 i2ob_max_sectors[MAX_I2OB<<4];
131
132 static int i2ob_context;
133
134 /*
135  * I2O Block device descriptor 
136  */
137 struct i2ob_device
138 {
139         struct i2o_controller *controller;
140         struct i2o_device *i2odev;
141         int unit;
142         int tid;
143         int flags;
144         int refcnt;
145         struct request *head, *tail;
146         request_queue_t *req_queue;
147         int max_segments;
148         int max_direct;         /* Not yet used properly */
149         int done_flag;
150         int depth;
151         int rcache;
152         int wcache;
153         int power;
154 };
155
156 /*
157  *      FIXME:
158  *      We should cache align these to avoid ping-ponging lines on SMP
159  *      boxes under heavy I/O load...
160  */
161
162 struct i2ob_request
163 {
164         struct i2ob_request *next;
165         struct request *req;
166         int num;
167 };
168
169 /*
170  * Per IOP requst queue information
171  *
172  * We have a separate requeust_queue_t per IOP so that a heavilly
173  * loaded I2O block device on an IOP does not starve block devices
174  * across all I2O controllers.
175  * 
176  */
177 struct i2ob_iop_queue
178 {
179         atomic_t queue_depth;
180         struct i2ob_request request_queue[MAX_I2OB_DEPTH];
181         struct i2ob_request *i2ob_qhead;
182         request_queue_t req_queue;
183 };
184 static struct i2ob_iop_queue *i2ob_queues[MAX_I2O_CONTROLLERS];
185
186 /*
187  *      Each I2O disk is one of these.
188  */
189
190 static struct i2ob_device i2ob_dev[MAX_I2OB<<4];
191 static int i2ob_dev_count = 0;
192 static struct hd_struct i2ob[MAX_I2OB<<4];
193 static struct gendisk i2ob_gendisk;     /* Declared later */
194
195 /*
196  * Mutex and spin lock for event handling synchronization
197  * evt_msg contains the last event.
198  */
199 static DECLARE_MUTEX_LOCKED(i2ob_evt_sem);
200 static DECLARE_COMPLETION(i2ob_thread_dead);
201 static spinlock_t i2ob_evt_lock = SPIN_LOCK_UNLOCKED;
202 static u32 evt_msg[MSG_FRAME_SIZE];
203
204 static void i2o_block_reply(struct i2o_handler *, struct i2o_controller *,
205          struct i2o_message *);
206 static void i2ob_new_device(struct i2o_controller *, struct i2o_device *);
207 static void i2ob_del_device(struct i2o_controller *, struct i2o_device *);
208 static void i2ob_reboot_event(void);
209 static int i2ob_install_device(struct i2o_controller *, struct i2o_device *, int);
210 static void i2ob_end_request(struct request *);
211 static void i2ob_request(request_queue_t *);
212 static int i2ob_init_iop(unsigned int);
213 static request_queue_t* i2ob_get_queue(kdev_t);
214 static int i2ob_query_device(struct i2ob_device *, int, int, void*, int);
215 static int do_i2ob_revalidate(kdev_t, int);
216 static int i2ob_evt(void *);
217
218 static int evt_pid = 0;
219 static int evt_running = 0;
220 static int scan_unit = 0;
221
222 /*
223  * I2O OSM registration structure...keeps getting bigger and bigger :)
224  */
225 static struct i2o_handler i2o_block_handler =
226 {
227         i2o_block_reply,
228         i2ob_new_device,
229         i2ob_del_device,
230         i2ob_reboot_event,
231         "I2O Block OSM",
232         0,
233         I2O_CLASS_RANDOM_BLOCK_STORAGE
234 };
235
236 /**
237  *      i2ob_get        -       Get an I2O message
238  *      @dev:  I2O block device
239  *
240  *      Get a message from the FIFO used for this block device. The message is returned
241  *      or the I2O 'no message' value of 0xFFFFFFFF if nothing is available.
242  */
243
244 static u32 i2ob_get(struct i2ob_device *dev)
245 {
246         struct i2o_controller *c=dev->controller;
247         return I2O_POST_READ32(c);
248 }
249  
250 /**
251  *      i2ob_send               -       Turn a request into a message and send it
252  *      @m: Message offset
253  *      @dev: I2O device
254  *      @ireq: Request structure
255  *      @base: Partition offset
256  *      @unit: Device identity
257  *
258  *      Generate an I2O BSAREAD request. This interface function is called for devices that
259  *      appear to explode when they are fed indirect chain pointers (notably right now this
260  *      appears to afflict Promise hardwre, so be careful what you feed the hardware
261  *
262  *      No cleanup is done by this interface. It is done on the interrupt side when the
263  *      reply arrives
264  *
265  *      To Fix: Generate PCI maps of the buffers
266  */
267  
268 static int i2ob_send(u32 m, struct i2ob_device *dev, struct i2ob_request *ireq, u32 base, int unit)
269 {
270         struct i2o_controller *c = dev->controller;
271         int tid = dev->tid;
272         unsigned long msg;
273         unsigned long mptr;
274         u64 offset;
275         struct request *req = ireq->req;
276         struct buffer_head *bh = req->bh;
277         int count = req->nr_sectors<<9;
278         char *last = NULL;
279         unsigned short size = 0;
280
281         // printk(KERN_INFO "i2ob_send called\n");
282         /* Map the message to a virtual address */
283         msg = c->mem_offset + m;
284         
285         /*
286          * Build the message based on the request.
287          */
288         __raw_writel(i2ob_context|(unit<<8), msg+8);
289         __raw_writel(ireq->num, msg+12);
290         __raw_writel(req->nr_sectors << 9, msg+20);
291
292         /* 
293          * Mask out partitions from now on
294          */
295         unit &= 0xF0;
296                 
297         /* This can be optimised later - just want to be sure its right for
298            starters */
299         offset = ((u64)(req->sector+base)) << 9;
300         __raw_writel( offset & 0xFFFFFFFF, msg+24);
301         __raw_writel(offset>>32, msg+28);
302         mptr=msg+32;
303         
304         if(req->cmd == READ)
305         {
306                 DEBUG("READ\n");
307                 __raw_writel(I2O_CMD_BLOCK_READ<<24|HOST_TID<<12|tid, msg+4);
308                 while(bh!=NULL)
309                 {
310                         if(bh->b_data == last) {
311                                 size += bh->b_size;
312                                 last += bh->b_size;
313                                 if(bh->b_reqnext)
314                                         __raw_writel(0x10000000|(size), mptr-8);
315                                 else
316                                         __raw_writel(0xD0000000|(size), mptr-8);
317                         }
318                         else
319                         {
320                                 if(bh->b_reqnext)
321                                         __raw_writel(0x10000000|(bh->b_size), mptr);
322                                 else
323                                         __raw_writel(0xD0000000|(bh->b_size), mptr);
324                                 __raw_writel(virt_to_bus(bh->b_data), mptr+4);
325                                 mptr += 8;      
326                                 size = bh->b_size;
327                                 last = bh->b_data + size;
328                         }
329
330                         count -= bh->b_size;
331                         bh = bh->b_reqnext;
332                 }
333                 switch(dev->rcache)
334                 {
335                         case CACHE_NULL:
336                                 __raw_writel(0, msg+16);break;
337                         case CACHE_PREFETCH:
338                                 __raw_writel(0x201F0008, msg+16);break;
339                         case CACHE_SMARTFETCH:
340                                 if(req->nr_sectors > 16)
341                                         __raw_writel(0x201F0008, msg+16);
342                                 else
343                                         __raw_writel(0x001F0000, msg+16);
344                                 break;
345                 }                               
346                                 
347 //              printk("Reading %d entries %d bytes.\n",
348 //                      mptr-msg-8, req->nr_sectors<<9);
349         }
350         else if(req->cmd == WRITE)
351         {
352                 DEBUG("WRITE\n");
353                 __raw_writel(I2O_CMD_BLOCK_WRITE<<24|HOST_TID<<12|tid, msg+4);
354                 while(bh!=NULL)
355                 {
356                         if(bh->b_data == last) {
357                                 size += bh->b_size;
358                                 last += bh->b_size;
359                                 if(bh->b_reqnext)
360                                         __raw_writel(0x14000000|(size), mptr-8);
361                                 else
362                                         __raw_writel(0xD4000000|(size), mptr-8);
363                         }
364                         else
365                         {
366                                 if(bh->b_reqnext)
367                                         __raw_writel(0x14000000|(bh->b_size), mptr);
368                                 else
369                                         __raw_writel(0xD4000000|(bh->b_size), mptr);
370                                 __raw_writel(virt_to_bus(bh->b_data), mptr+4);
371                                 mptr += 8;      
372                                 size = bh->b_size;
373                                 last = bh->b_data + size;
374                         }
375
376                         count -= bh->b_size;
377                         bh = bh->b_reqnext;
378                 }
379
380                 switch(dev->wcache)
381                 {
382                         case CACHE_NULL:
383                                 __raw_writel(0, msg+16);break;
384                         case CACHE_WRITETHROUGH:
385                                 __raw_writel(0x001F0008, msg+16);break;
386                         case CACHE_WRITEBACK:
387                                 __raw_writel(0x001F0010, msg+16);break;
388                         case CACHE_SMARTBACK:
389                                 if(req->nr_sectors > 16)
390                                         __raw_writel(0x001F0004, msg+16);
391                                 else
392                                         __raw_writel(0x001F0010, msg+16);
393                                 break;
394                         case CACHE_SMARTTHROUGH:
395                                 if(req->nr_sectors > 16)
396                                         __raw_writel(0x001F0004, msg+16);
397                                 else
398                                         __raw_writel(0x001F0010, msg+16);
399                 }
400                                 
401 //              printk("Writing %d entries %d bytes.\n",
402 //                      mptr-msg-8, req->nr_sectors<<9);
403         }
404         __raw_writel(I2O_MESSAGE_SIZE(mptr-msg)>>2 | SGL_OFFSET_8, msg);
405         
406         if(count != 0)
407         {
408                 printk(KERN_ERR "Request count botched by %d.\n", count);
409         }
410
411         i2o_post_message(c,m);
412         atomic_inc(&i2ob_queues[c->unit]->queue_depth);
413
414         return 0;
415 }
416
417 /*
418  *      Remove a request from the _locked_ request list. We update both the
419  *      list chain and if this is the last item the tail pointer. Caller
420  *      must hold the lock.
421  */
422  
423 static inline void i2ob_unhook_request(struct i2ob_request *ireq, 
424         unsigned int iop)
425 {
426         ireq->next = i2ob_queues[iop]->i2ob_qhead;
427         i2ob_queues[iop]->i2ob_qhead = ireq;
428 }
429
430 /*
431  *      Request completion handler
432  */
433  
434 static inline void i2ob_end_request(struct request *req)
435 {
436         /* FIXME  - pci unmap the request */
437
438         /*
439          * Loop until all of the buffers that are linked
440          * to this request have been marked updated and
441          * unlocked.
442          */
443
444         while (end_that_request_first( req, !req->errors, "i2o block" ));
445
446         /*
447          * It is now ok to complete the request.
448          */
449         end_that_request_last( req );
450         DEBUG("IO COMPLETED\n");
451 }
452
453 /*
454  * Request merging functions
455  */
456
457 static inline int i2ob_new_segment(request_queue_t *q, struct request *req,
458                                   int __max_segments)
459 {
460         int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
461
462         if (__max_segments < max_segments)
463                 max_segments = __max_segments;
464
465         if (req->nr_segments < max_segments) {
466                 req->nr_segments++;
467                 return 1;
468         }
469         return 0;
470 }
471
472 static int i2ob_back_merge(request_queue_t *q, struct request *req, 
473                              struct buffer_head *bh, int __max_segments)
474 {
475         if (req->bhtail->b_data + req->bhtail->b_size == bh->b_data)
476                 return 1;
477         return i2ob_new_segment(q, req, __max_segments);
478 }
479
480 static int i2ob_front_merge(request_queue_t *q, struct request *req, 
481                               struct buffer_head *bh, int __max_segments)
482 {
483         if (bh->b_data + bh->b_size == req->bh->b_data)
484                 return 1;
485         return i2ob_new_segment(q, req, __max_segments);
486 }
487
488 static int i2ob_merge_requests(request_queue_t *q,
489                                 struct request *req,
490                                 struct request *next,
491                                 int __max_segments)
492 {
493         int max_segments = i2ob_dev[MINOR(req->rq_dev)].max_segments;
494         int total_segments = req->nr_segments + next->nr_segments;
495
496         if (__max_segments < max_segments)
497                 max_segments = __max_segments;
498
499         if (req->bhtail->b_data + req->bhtail->b_size == next->bh->b_data)
500                 total_segments--;
501     
502         if (total_segments > max_segments)
503                 return 0;
504
505         req->nr_segments = total_segments;
506         return 1;
507 }
508
509 static int i2ob_flush(struct i2o_controller *c, struct i2ob_device *d, int unit)
510 {
511         unsigned long msg;
512         u32 m = i2ob_get(d);
513         
514         if(m == 0xFFFFFFFF)
515                 return -1;
516                 
517         msg = c->mem_offset + m;
518
519         /*
520          *      Ask the controller to write the cache back. This sorts out
521          *      the supertrak firmware flaw and also does roughly the right
522          *      thing for other cases too.
523          */
524                 
525         i2o_raw_writel(FIVE_WORD_MSG_SIZE|SGL_OFFSET_0, msg);
526         i2o_raw_writel(I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|d->tid, msg+4);
527         i2o_raw_writel(i2ob_context|(unit<<8), msg+8);
528         i2o_raw_writel(0, msg+12);
529         i2o_raw_writel(60<<16, msg+16);
530         DEBUG("FLUSH");
531         i2o_post_message(c,m);
532         return 0;
533 }
534                         
535 /*
536  *      OSM reply handler. This gets all the message replies
537  */
538
539 static void i2o_block_reply(struct i2o_handler *h, struct i2o_controller *c, struct i2o_message *msg)
540 {
541         unsigned long flags;
542         struct i2ob_request *ireq = NULL;
543         u8 st;
544         u32 *m = (u32 *)msg;
545         u8 unit = (m[2]>>8)&0xF0;       /* low 4 bits are partition */
546         struct i2ob_device *dev = &i2ob_dev[(unit&0xF0)];
547
548         /*
549          *      Pull the lock over ready
550          */     
551          
552         spin_lock_prefetch(&io_request_lock);
553                 
554         /*
555          * FAILed message
556          */
557         if(m[0] & (1<<13))
558         {
559                 DEBUG("FAIL");
560                 /*
561                  * FAILed message from controller
562                  * We increment the error count and abort it
563                  *
564                  * In theory this will never happen.  The I2O block class
565                  * specification states that block devices never return
566                  * FAILs but instead use the REQ status field...but
567                  * better be on the safe side since no one really follows
568                  * the spec to the book :)
569                  */
570                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
571                 ireq->req->errors++;
572
573                 spin_lock_irqsave(&io_request_lock, flags);
574                 i2ob_unhook_request(ireq, c->unit);
575                 i2ob_end_request(ireq->req);
576                 spin_unlock_irqrestore(&io_request_lock, flags);
577         
578                 /* Now flush the message by making it a NOP */
579                 m[0]&=0x00FFFFFF;
580                 m[0]|=(I2O_CMD_UTIL_NOP)<<24;
581                 i2o_post_message(c,virt_to_bus(m));
582
583                 return;
584         }
585
586         if(msg->function == I2O_CMD_UTIL_EVT_REGISTER)
587         {
588                 spin_lock(&i2ob_evt_lock);
589                 memcpy(evt_msg, msg, (m[0]>>16)<<2);
590                 spin_unlock(&i2ob_evt_lock);
591                 up(&i2ob_evt_sem);
592                 return;
593         }
594
595         if(!dev->i2odev)
596         {
597                 /*
598                  * This is HACK, but Intel Integrated RAID allows user
599                  * to delete a volume that is claimed, locked, and in use 
600                  * by the OS. We have to check for a reply from a
601                  * non-existent device and flag it as an error or the system 
602                  * goes kaput...
603                  */
604                 ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
605                 ireq->req->errors++;
606                 printk(KERN_WARNING "I2O Block: Data transfer to deleted device!\n");
607                 spin_lock_irqsave(&io_request_lock, flags);
608                 i2ob_unhook_request(ireq, c->unit);
609                 i2ob_end_request(ireq->req);
610                 spin_unlock_irqrestore(&io_request_lock, flags);
611                 return;
612         }       
613
614         /*
615          *      Lets see what is cooking. We stuffed the
616          *      request in the context.
617          */
618                  
619         ireq=&i2ob_queues[c->unit]->request_queue[m[3]];
620         st=m[4]>>24;
621
622         if(st!=0)
623         {
624                 int err;
625                 char *bsa_errors[] = 
626                 { 
627                         "Success", 
628                         "Media Error", 
629                         "Failure communicating to device",
630                         "Device Failure",
631                         "Device is not ready",
632                         "Media not present",
633                         "Media is locked by another user",
634                         "Media has failed",
635                         "Failure communicating to device",
636                         "Device bus failure",
637                         "Device is locked by another user",
638                         "Device is write protected",
639                         "Device has reset",
640                         "Volume has changed, waiting for acknowledgement"
641                 };
642                                 
643                 err = m[4]&0xFFFF;
644                 
645                 /*
646                  *      Device not ready means two things. One is that the
647                  *      the thing went offline (but not a removal media)
648                  *
649                  *      The second is that you have a SuperTrak 100 and the
650                  *      firmware got constipated. Unlike standard i2o card
651                  *      setups the supertrak returns an error rather than
652                  *      blocking for the timeout in these cases. 
653                  *
654                  *      Don't stick a supertrak100 into cache aggressive modes
655                  */
656                  
657                 
658                 printk(KERN_ERR "\n/dev/%s error: %s", dev->i2odev->dev_name, 
659                         bsa_errors[m[4]&0XFFFF]);
660                 if(m[4]&0x00FF0000)
661                         printk(" - DDM attempted %d retries", (m[4]>>16)&0x00FF );
662                 printk(".\n");
663                 ireq->req->errors++;    
664         }
665         else
666                 ireq->req->errors = 0;
667
668         /*
669          *      Dequeue the request. We use irqsave locks as one day we
670          *      may be running polled controllers from a BH...
671          */
672         
673         spin_lock_irqsave(&io_request_lock, flags);
674         i2ob_unhook_request(ireq, c->unit);
675         i2ob_end_request(ireq->req);
676         atomic_dec(&i2ob_queues[c->unit]->queue_depth);
677         
678         /*
679          *      We may be able to do more I/O
680          */
681          
682         i2ob_request(dev->req_queue);
683         spin_unlock_irqrestore(&io_request_lock, flags);
684 }
685
686 /* 
687  * Event handler.  Needs to be a separate thread b/c we may have
688  * to do things like scan a partition table, or query parameters
689  * which cannot be done from an interrupt or from a bottom half.
690  */
691 static int i2ob_evt(void *dummy)
692 {
693         unsigned int evt;
694         unsigned long flags;
695         int unit;
696         int i;
697         //The only event that has data is the SCSI_SMART event.
698         struct i2o_reply {
699                 u32 header[4];
700                 u32 evt_indicator;
701                 u8 ASC;
702                 u8 ASCQ;
703                 u16 pad;
704                 u8 data[16];
705                 } *evt_local;
706
707         lock_kernel();
708         daemonize();
709         unlock_kernel();
710
711         strcpy(current->comm, "i2oblock");
712         evt_running = 1;
713
714         while(1)
715         {
716                 if(down_interruptible(&i2ob_evt_sem))
717                 {
718                         evt_running = 0;
719                         printk("exiting...");
720                         break;
721                 }
722
723                 /*
724                  * Keep another CPU/interrupt from overwriting the 
725                  * message while we're reading it
726                  *
727                  * We stuffed the unit in the TxContext and grab the event mask
728                  * None of the BSA we care about events have EventData
729                  */
730                 spin_lock_irqsave(&i2ob_evt_lock, flags);
731                 evt_local = (struct i2o_reply *)evt_msg;
732                 spin_unlock_irqrestore(&i2ob_evt_lock, flags);
733
734                 unit = le32_to_cpu(evt_local->header[3]);
735                 evt = le32_to_cpu(evt_local->evt_indicator);
736
737                 switch(evt)
738                 {
739                         /*
740                          * New volume loaded on same TID, so we just re-install.
741                          * The TID/controller don't change as it is the same
742                          * I2O device.  It's just new media that we have to
743                          * rescan.
744                          */
745                         case I2O_EVT_IND_BSA_VOLUME_LOAD:
746                         {
747                                 i2ob_install_device(i2ob_dev[unit].i2odev->controller, 
748                                         i2ob_dev[unit].i2odev, unit);
749                                 break;
750                         }
751
752                         /*
753                          * No media, so set all parameters to 0 and set the media
754                          * change flag. The I2O device is still valid, just doesn't
755                          * have media, so we don't want to clear the controller or
756                          * device pointer.
757                          */
758                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD:
759                         {
760                                 for(i = unit; i <= unit+15; i++)
761                                 {
762                                         i2ob_sizes[i] = 0;
763                                         i2ob_hardsizes[i] = 0;
764                                         i2ob_max_sectors[i] = 0;
765                                         i2ob[i].nr_sects = 0;
766                                         i2ob_gendisk.part[i].nr_sects = 0;
767                                 }
768                                 i2ob_media_change_flag[unit] = 1;
769                                 break;
770                         }
771
772                         case I2O_EVT_IND_BSA_VOLUME_UNLOAD_REQ:
773                                 printk(KERN_WARNING "%s: Attempt to eject locked media\n", 
774                                         i2ob_dev[unit].i2odev->dev_name);
775                                 break;
776
777                         /*
778                          * The capacity has changed and we are going to be
779                          * updating the max_sectors and other information 
780                          * about this disk.  We try a revalidate first. If
781                          * the block device is in use, we don't want to
782                          * do that as there may be I/Os bound for the disk
783                          * at the moment.  In that case we read the size 
784                          * from the device and update the information ourselves
785                          * and the user can later force a partition table
786                          * update through an ioctl.
787                          */
788                         case I2O_EVT_IND_BSA_CAPACITY_CHANGE:
789                         {
790                                 u64 size;
791
792                                 if(do_i2ob_revalidate(MKDEV(MAJOR_NR, unit),0) != -EBUSY)
793                                         continue;
794
795                                 if(i2ob_query_device(&i2ob_dev[unit], 0x0004, 0, &size, 8) !=0 )
796                                         i2ob_query_device(&i2ob_dev[unit], 0x0000, 4, &size, 8);
797
798                                 spin_lock_irqsave(&io_request_lock, flags);     
799                                 i2ob_sizes[unit] = (int)(size>>10);
800                                 i2ob_gendisk.part[unit].nr_sects = size>>9;
801                                 i2ob[unit].nr_sects = (int)(size>>9);
802                                 spin_unlock_irqrestore(&io_request_lock, flags);        
803                                 break;
804                         }
805
806                         /* 
807                          * We got a SCSI SMART event, we just log the relevant
808                          * information and let the user decide what they want
809                          * to do with the information.
810                          */
811                         case I2O_EVT_IND_BSA_SCSI_SMART:
812                         {
813                                 char buf[16];
814                                 printk(KERN_INFO "I2O Block: %s received a SCSI SMART Event\n",i2ob_dev[unit].i2odev->dev_name);
815                                 evt_local->data[16]='\0';
816                                 sprintf(buf,"%s",&evt_local->data[0]);
817                                 printk(KERN_INFO "      Disk Serial#:%s\n",buf);
818                                 printk(KERN_INFO "      ASC 0x%02x \n",evt_local->ASC);
819                                 printk(KERN_INFO "      ASCQ 0x%02x \n",evt_local->ASCQ);
820                                 break;
821                         }
822                 
823                         /*
824                          *      Non event
825                          */
826                          
827                         case 0:
828                                 break;
829                                 
830                         /*
831                          * An event we didn't ask for.  Call the card manufacturer
832                          * and tell them to fix their firmware :)
833                          */
834                          
835                         case 0x20:
836                                 /*
837                                  * If a promise card reports 0x20 event then the brown stuff
838                                  * hit the fan big time. The card seems to recover but loses
839                                  * the pending writes. Deeply ungood except for testing fsck
840                                  */
841                                 if(i2ob_dev[unit].i2odev->controller->bus.pci.promise)
842                                         panic("I2O controller firmware failed. Reboot and force a filesystem check.\n");
843                         default:
844                                 printk(KERN_INFO "%s: Received event 0x%X we didn't register for\n"
845                                         KERN_INFO "   Blame the I2O card manufacturer 8)\n", 
846                                         i2ob_dev[unit].i2odev->dev_name, evt);
847                                 break;
848                 }
849         };
850
851         complete_and_exit(&i2ob_thread_dead,0);
852         return 0;
853 }
854
855 /*
856  *      The I2O block driver is listed as one of those that pulls the
857  *      front entry off the queue before processing it. This is important
858  *      to remember here. If we drop the io lock then CURRENT will change
859  *      on us. We must unlink CURRENT in this routine before we return, if
860  *      we use it.
861  */
862
863 static void i2ob_request(request_queue_t *q)
864 {
865         struct request *req;
866         struct i2ob_request *ireq;
867         int unit;
868         struct i2ob_device *dev;
869         u32 m;
870         
871         while (!list_empty(&q->queue_head)) {
872                 /*
873                  *      On an IRQ completion if there is an inactive
874                  *      request on the queue head it means it isnt yet
875                  *      ready to dispatch.
876                  */
877                 req = blkdev_entry_next_request(&q->queue_head);
878
879                 if(req->rq_status == RQ_INACTIVE)
880                         return;
881                         
882                 unit = MINOR(req->rq_dev);
883                 dev = &i2ob_dev[(unit&0xF0)];
884
885                 /* 
886                  *      Queue depths probably belong with some kind of 
887                  *      generic IOP commit control. Certainly its not right 
888                  *      its global!  
889                  */
890                 if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) >= dev->depth)
891                         break;
892                 
893                 /* Get a message */
894                 m = i2ob_get(dev);
895
896                 if(m==0xFFFFFFFF)
897                 {
898                         if(atomic_read(&i2ob_queues[dev->unit]->queue_depth) == 0)
899                                 printk(KERN_ERR "i2o_block: message queue and request queue empty!!\n");
900                         break;
901                 }
902                 /*
903                  * Everything ok, so pull from kernel queue onto our queue
904                  */
905                 req->errors = 0;
906                 blkdev_dequeue_request(req);    
907                 req->waiting = NULL;
908                 
909                 ireq = i2ob_queues[dev->unit]->i2ob_qhead;
910                 i2ob_queues[dev->unit]->i2ob_qhead = ireq->next;
911                 ireq->req = req;
912
913                 i2ob_send(m, dev, ireq, i2ob[unit].start_sect, (unit&0xF0));
914         }
915 }
916
917
918 /*
919  *      SCSI-CAM for ioctl geometry mapping
920  *      Duplicated with SCSI - this should be moved into somewhere common
921  *      perhaps genhd ?
922  *
923  * LBA -> CHS mapping table taken from:
924  *
925  * "Incorporating the I2O Architecture into BIOS for Intel Architecture 
926  *  Platforms" 
927  *
928  * This is an I2O document that is only available to I2O members,
929  * not developers.
930  *
931  * From my understanding, this is how all the I2O cards do this
932  *
933  * Disk Size      | Sectors | Heads | Cylinders
934  * ---------------+---------+-------+-------------------
935  * 1 < X <= 528M  | 63      | 16    | X/(63 * 16 * 512)
936  * 528M < X <= 1G | 63      | 32    | X/(63 * 32 * 512)
937  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
938  * 1 < X <528M    | 63      | 16    | X/(63 * 16 * 512)
939  *
940  */
941 #define BLOCK_SIZE_528M         1081344
942 #define BLOCK_SIZE_1G           2097152
943 #define BLOCK_SIZE_21G          4403200
944 #define BLOCK_SIZE_42G          8806400
945 #define BLOCK_SIZE_84G          17612800
946
947 static void i2o_block_biosparam(
948         unsigned long capacity,
949         unsigned short *cyls,
950         unsigned char *hds,
951         unsigned char *secs) 
952
953         unsigned long heads, sectors, cylinders; 
954
955         sectors = 63L;                          /* Maximize sectors per track */ 
956         if(capacity <= BLOCK_SIZE_528M)
957                 heads = 16;
958         else if(capacity <= BLOCK_SIZE_1G)
959                 heads = 32;
960         else if(capacity <= BLOCK_SIZE_21G)
961                 heads = 64;
962         else if(capacity <= BLOCK_SIZE_42G)
963                 heads = 128;
964         else
965                 heads = 255;
966
967         cylinders = capacity / (heads * sectors);
968
969         *cyls = (unsigned short) cylinders;     /* Stuff return values */ 
970         *secs = (unsigned char) sectors; 
971         *hds  = (unsigned char) heads; 
972 }
973
974
975 /*
976  *      Rescan the partition tables
977  */
978  
979 static int do_i2ob_revalidate(kdev_t dev, int maxu)
980 {
981         int minor=MINOR(dev);
982         int i;
983         
984         minor&=0xF0;
985
986         i2ob_dev[minor].refcnt++;
987         if(i2ob_dev[minor].refcnt>maxu+1)
988         {
989                 i2ob_dev[minor].refcnt--;
990                 return -EBUSY;
991         }
992         
993         for( i = 15; i>=0 ; i--)
994         {
995                 int m = minor+i;
996                 invalidate_device(MKDEV(MAJOR_NR, m), 1);
997                 i2ob_gendisk.part[m].start_sect = 0;
998                 i2ob_gendisk.part[m].nr_sects = 0;
999         }
1000
1001         /*
1002          *      Do a physical check and then reconfigure
1003          */
1004          
1005         i2ob_install_device(i2ob_dev[minor].controller, i2ob_dev[minor].i2odev,
1006                 minor);
1007         i2ob_dev[minor].refcnt--;
1008         return 0;
1009 }
1010
1011 /*
1012  *      Issue device specific ioctl calls.
1013  */
1014
1015 static int i2ob_ioctl(struct inode *inode, struct file *file,
1016                      unsigned int cmd, unsigned long arg)
1017 {
1018         struct i2ob_device *dev;
1019         int minor;
1020
1021         /* Anyone capable of this syscall can do *real bad* things */
1022
1023         if (!capable(CAP_SYS_ADMIN))
1024                 return -EPERM;
1025         if (!inode)
1026                 return -EINVAL;
1027         minor = MINOR(inode->i_rdev);
1028         if (minor >= (MAX_I2OB<<4))
1029                 return -ENODEV;
1030
1031         dev = &i2ob_dev[minor];
1032         switch (cmd) {
1033                 case HDIO_GETGEO:
1034                 {
1035                         struct hd_geometry g;
1036                         int u=minor&0xF0;
1037                         i2o_block_biosparam(i2ob_sizes[u]<<1, 
1038                                 &g.cylinders, &g.heads, &g.sectors);
1039                         g.start = i2ob[minor].start_sect;
1040                         return copy_to_user((void *)arg,&g, sizeof(g))?-EFAULT:0;
1041                 }
1042                 
1043                 case BLKI2OGRSTRAT:
1044                         return put_user(dev->rcache, (int *)arg);
1045                 case BLKI2OGWSTRAT:
1046                         return put_user(dev->wcache, (int *)arg);
1047                 case BLKI2OSRSTRAT:
1048                         if(arg<0||arg>CACHE_SMARTFETCH)
1049                                 return -EINVAL;
1050                         dev->rcache = arg;
1051                         break;
1052                 case BLKI2OSWSTRAT:
1053                         if(arg!=0 && (arg<CACHE_WRITETHROUGH || arg>CACHE_SMARTBACK))
1054                                 return -EINVAL;
1055                         dev->wcache = arg;
1056                         break;
1057         
1058                 case BLKRRPART:
1059                         if(!capable(CAP_SYS_ADMIN))
1060                                 return -EACCES;
1061                         return do_i2ob_revalidate(inode->i_rdev,1);
1062                         
1063                 default:
1064                         return blk_ioctl(inode->i_rdev, cmd, arg);
1065         }
1066         return 0;
1067 }
1068
1069 /*
1070  *      Close the block device down
1071  */
1072  
1073 static int i2ob_release(struct inode *inode, struct file *file)
1074 {
1075         struct i2ob_device *dev;
1076         int minor;
1077
1078         minor = MINOR(inode->i_rdev);
1079         if (minor >= (MAX_I2OB<<4))
1080                 return -ENODEV;
1081         dev = &i2ob_dev[(minor&0xF0)];
1082
1083         /*
1084          * This is to deail with the case of an application
1085          * opening a device and then the device dissapears while
1086          * it's in use, and then the application tries to release
1087          * it.  ex: Unmounting a deleted RAID volume at reboot. 
1088          * If we send messages, it will just cause FAILs since
1089          * the TID no longer exists.
1090          */
1091         if(!dev->i2odev)
1092                 return 0;
1093
1094         if (dev->refcnt <= 0)
1095                 printk(KERN_ALERT "i2ob_release: refcount(%d) <= 0\n", dev->refcnt);
1096         dev->refcnt--;
1097         if(dev->refcnt==0)
1098         {
1099                 /*
1100                  *      Flush the onboard cache on unmount
1101                  */
1102                 u32 msg[5];
1103                 int *query_done = &dev->done_flag;
1104                 msg[0] = (FIVE_WORD_MSG_SIZE|SGL_OFFSET_0);
1105                 msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1106                 msg[2] = i2ob_context|0x40000000;
1107                 msg[3] = (u32)query_done;
1108                 msg[4] = 60<<16;
1109                 DEBUG("Flushing...");
1110                 i2o_post_wait(dev->controller, msg, 20, 60);
1111
1112                 /*
1113                  *      Unlock the media
1114                  */
1115                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1116                 msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1117                 msg[2] = i2ob_context|0x40000000;
1118                 msg[3] = (u32)query_done;
1119                 msg[4] = -1;
1120                 DEBUG("Unlocking...");
1121                 i2o_post_wait(dev->controller, msg, 20, 2);
1122                 DEBUG("Unlocked.\n");
1123
1124                 msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
1125                 msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
1126                 if(dev->flags & (1<<3|1<<4))    /* Removable */
1127                         msg[4] = 0x21 << 24;
1128                 else
1129                         msg[4] = 0x24 << 24;
1130
1131                 if(i2o_post_wait(dev->controller, msg, 20, 60)==0)
1132                         dev->power = 0x24;
1133
1134                 /*
1135                  * Now unclaim the device.
1136                  */
1137
1138                 if (i2o_release_device(dev->i2odev, &i2o_block_handler))
1139                         printk(KERN_ERR "i2ob_release: controller rejected unclaim.\n");
1140                 
1141                 DEBUG("Unclaim\n");
1142         }
1143         return 0;
1144 }
1145
1146 /*
1147  *      Open the block device.
1148  */
1149  
1150 static int i2ob_open(struct inode *inode, struct file *file)
1151 {
1152         int minor;
1153         struct i2ob_device *dev;
1154         
1155         if (!inode)
1156                 return -EINVAL;
1157         minor = MINOR(inode->i_rdev);
1158         if (minor >= MAX_I2OB<<4)
1159                 return -ENODEV;
1160         dev=&i2ob_dev[(minor&0xF0)];
1161
1162         if(!dev->i2odev)        
1163                 return -ENODEV;
1164         
1165         if(dev->refcnt++==0)
1166         { 
1167                 u32 msg[6];
1168                 
1169                 DEBUG("Claim ");
1170                 if(i2o_claim_device(dev->i2odev, &i2o_block_handler))
1171                 {
1172                         dev->refcnt--;
1173                         printk(KERN_INFO "I2O Block: Could not open device\n");
1174                         return -EBUSY;
1175                 }
1176                 DEBUG("Claimed ");
1177                 /*
1178                  *      Power up if needed
1179                  */
1180
1181                 if(dev->power > 0x1f)
1182                 {
1183                         msg[0] = FOUR_WORD_MSG_SIZE|SGL_OFFSET_0;
1184                         msg[1] = I2O_CMD_BLOCK_POWER<<24 | HOST_TID << 12 | dev->tid;
1185                         msg[4] = 0x02 << 24;
1186                         if(i2o_post_wait(dev->controller, msg, 20, 60) == 0)
1187                                 dev->power = 0x02;
1188                 }
1189
1190                 /*
1191                  *      Mount the media if needed. Note that we don't use
1192                  *      the lock bit. Since we have to issue a lock if it
1193                  *      refuses a mount (quite possible) then we might as
1194                  *      well just send two messages out.
1195                  */
1196                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;               
1197                 msg[1] = I2O_CMD_BLOCK_MMOUNT<<24|HOST_TID<<12|dev->tid;
1198                 msg[4] = -1;
1199                 msg[5] = 0;
1200                 DEBUG("Mount ");
1201                 i2o_post_wait(dev->controller, msg, 24, 2);
1202
1203                 /*
1204                  *      Lock the media
1205                  */
1206                 msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1207                 msg[1] = I2O_CMD_BLOCK_MLOCK<<24|HOST_TID<<12|dev->tid;
1208                 msg[4] = -1;
1209                 DEBUG("Lock ");
1210                 i2o_post_wait(dev->controller, msg, 20, 2);
1211                 DEBUG("Ready.\n");
1212         }               
1213         return 0;
1214 }
1215
1216 /*
1217  *      Issue a device query
1218  */
1219  
1220 static int i2ob_query_device(struct i2ob_device *dev, int table, 
1221         int field, void *buf, int buflen)
1222 {
1223         return i2o_query_scalar(dev->controller, dev->tid,
1224                 table, field, buf, buflen);
1225 }
1226
1227
1228 /*
1229  *      Install the I2O block device we found.
1230  */
1231  
1232 static int i2ob_install_device(struct i2o_controller *c, struct i2o_device *d, int unit)
1233 {
1234         u64 size;
1235         u32 blocksize;
1236         u8 type;
1237         u16 power;
1238         u32 flags, status;
1239         struct i2ob_device *dev=&i2ob_dev[unit];
1240         int i;
1241
1242         /*
1243          * For logging purposes...
1244          */
1245         printk(KERN_INFO "i2ob: Installing tid %d device at unit %d\n", 
1246                         d->lct_data.tid, unit); 
1247
1248         /*
1249          *      Ask for the current media data. If that isn't supported
1250          *      then we ask for the device capacity data
1251          */
1252         if(i2ob_query_device(dev, 0x0004, 1, &blocksize, 4) != 0
1253           || i2ob_query_device(dev, 0x0004, 0, &size, 8) !=0 )
1254         {
1255                 i2ob_query_device(dev, 0x0000, 3, &blocksize, 4);
1256                 i2ob_query_device(dev, 0x0000, 4, &size, 8);
1257         }
1258         
1259         if(i2ob_query_device(dev, 0x0000, 2, &power, 2)!=0)
1260                 power = 0;
1261         i2ob_query_device(dev, 0x0000, 5, &flags, 4);
1262         i2ob_query_device(dev, 0x0000, 6, &status, 4);
1263         i2ob_sizes[unit] = (int)(size>>10);
1264         for(i=unit; i <= unit+15 ; i++)
1265                 i2ob_hardsizes[i] = blocksize;
1266         i2ob_gendisk.part[unit].nr_sects = size>>9;
1267         i2ob[unit].nr_sects = (int)(size>>9);
1268
1269         /*
1270          * Max number of Scatter-Gather Elements
1271          */     
1272
1273         i2ob_dev[unit].power = power;   /* Save power state in device proper */
1274         i2ob_dev[unit].flags = flags;
1275
1276         for(i=unit;i<=unit+15;i++)
1277         {
1278                 i2ob_dev[i].power = power;      /* Save power state */
1279                 i2ob_dev[unit].flags = flags;   /* Keep the type info */
1280                 i2ob_max_sectors[i] = 96;       /* 256 might be nicer but many controllers 
1281                                                    explode on 65536 or higher */
1282                 i2ob_dev[i].max_segments = (d->controller->status_block->inbound_frame_size - 7) / 2;
1283                 
1284                 i2ob_dev[i].rcache = CACHE_SMARTFETCH;
1285                 i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1286                 
1287                 if(d->controller->battery == 0)
1288                         i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1289
1290                 if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.promise)
1291                         i2ob_dev[i].wcache = CACHE_WRITETHROUGH;
1292
1293                 if(d->controller->type == I2O_TYPE_PCI && d->controller->bus.pci.short_req)
1294                 {
1295                         i2ob_max_sectors[i] = 8;
1296                         i2ob_dev[i].max_segments = 8;
1297                 }
1298         }
1299
1300         sprintf(d->dev_name, "%s%c", i2ob_gendisk.major_name, 'a' + (unit>>4));
1301
1302         printk(KERN_INFO "%s: Max segments %d, queue depth %d, byte limit %d.\n",
1303                  d->dev_name, i2ob_dev[unit].max_segments, i2ob_dev[unit].depth, i2ob_max_sectors[unit]<<9);
1304
1305         i2ob_query_device(dev, 0x0000, 0, &type, 1);
1306
1307         printk(KERN_INFO "%s: ", d->dev_name);
1308         switch(type)
1309         {
1310                 case 0: printk("Disk Storage");break;
1311                 case 4: printk("WORM");break;
1312                 case 5: printk("CD-ROM");break;
1313                 case 7: printk("Optical device");break;
1314                 default:
1315                         printk("Type %d", type);
1316         }
1317         if(status&(1<<10))
1318                 printk("(RAID)");
1319
1320         if((flags^status)&(1<<4|1<<3))  /* Missing media or device */
1321         {
1322                 printk(KERN_INFO " Not loaded.\n");
1323                 /* Device missing ? */
1324                 if((flags^status)&(1<<4))
1325                         return 1;
1326         }
1327         else
1328         {
1329                 printk(": %dMB, %d byte sectors",
1330                         (int)(size>>20), blocksize);
1331         }
1332         if(status&(1<<0))
1333         {
1334                 u32 cachesize;
1335                 i2ob_query_device(dev, 0x0003, 0, &cachesize, 4);
1336                 cachesize>>=10;
1337                 if(cachesize>4095)
1338                         printk(", %dMb cache", cachesize>>10);
1339                 else
1340                         printk(", %dKb cache", cachesize);
1341         }
1342         printk(".\n");
1343         printk(KERN_INFO "%s: Maximum sectors/read set to %d.\n", 
1344                 d->dev_name, i2ob_max_sectors[unit]);
1345
1346         /* 
1347          * If this is the first I2O block device found on this IOP,
1348          * we need to initialize all the queue data structures
1349          * before any I/O can be performed. If it fails, this
1350          * device is useless.
1351          */
1352         if(!i2ob_queues[c->unit]) {
1353                 if(i2ob_init_iop(c->unit))
1354                         return 1;
1355         }
1356
1357         /* 
1358          * This will save one level of lookup/indirection in critical 
1359          * code so that we can directly get the queue ptr from the
1360          * device instead of having to go the IOP data structure.
1361          */
1362         dev->req_queue = &i2ob_queues[c->unit]->req_queue;
1363
1364         grok_partitions(&i2ob_gendisk, unit>>4, 1<<4, (long)(size>>9));
1365
1366         /*
1367          * Register for the events we're interested in and that the
1368          * device actually supports.
1369          */
1370         i2o_event_register(c, d->lct_data.tid, i2ob_context, unit, 
1371                 (I2OB_EVENT_MASK & d->lct_data.event_capabilities));
1372
1373         return 0;
1374 }
1375
1376 /*
1377  * Initialize IOP specific queue structures.  This is called
1378  * once for each IOP that has a block device sitting behind it.
1379  */
1380 static int i2ob_init_iop(unsigned int unit)
1381 {
1382         int i;
1383
1384         i2ob_queues[unit] = (struct i2ob_iop_queue *) kmalloc(sizeof(struct i2ob_iop_queue), GFP_ATOMIC);
1385         if(!i2ob_queues[unit])
1386         {
1387                 printk(KERN_WARNING "Could not allocate request queue for I2O block device!\n");
1388                 return -1;
1389         }
1390
1391         for(i = 0; i< MAX_I2OB_DEPTH; i++)
1392         {
1393                 i2ob_queues[unit]->request_queue[i].next =  &i2ob_queues[unit]->request_queue[i+1];
1394                 i2ob_queues[unit]->request_queue[i].num = i;
1395         }
1396         
1397         /* Queue is MAX_I2OB + 1... */
1398         i2ob_queues[unit]->request_queue[i].next = NULL;
1399         i2ob_queues[unit]->i2ob_qhead = &i2ob_queues[unit]->request_queue[0];
1400         atomic_set(&i2ob_queues[unit]->queue_depth, 0);
1401
1402         blk_init_queue(&i2ob_queues[unit]->req_queue, i2ob_request);
1403         blk_queue_headactive(&i2ob_queues[unit]->req_queue, 0);
1404         i2ob_queues[unit]->req_queue.back_merge_fn = i2ob_back_merge;
1405         i2ob_queues[unit]->req_queue.front_merge_fn = i2ob_front_merge;
1406         i2ob_queues[unit]->req_queue.merge_requests_fn = i2ob_merge_requests;
1407         i2ob_queues[unit]->req_queue.queuedata = &i2ob_queues[unit];
1408
1409         return 0;
1410 }
1411
1412 /*
1413  * Get the request queue for the given device.
1414  */     
1415 static request_queue_t* i2ob_get_queue(kdev_t dev)
1416 {
1417         int unit = MINOR(dev)&0xF0;
1418         return i2ob_dev[unit].req_queue;
1419 }
1420
1421 /*
1422  * Probe the I2O subsytem for block class devices
1423  */
1424 static void i2ob_scan(int bios)
1425 {
1426         int i;
1427         int warned = 0;
1428
1429         struct i2o_device *d, *b=NULL;
1430         struct i2o_controller *c;
1431         struct i2ob_device *dev;
1432                 
1433         for(i=0; i< MAX_I2O_CONTROLLERS; i++)
1434         {
1435                 c=i2o_find_controller(i);
1436         
1437                 if(c==NULL)
1438                         continue;
1439
1440                 /*
1441                  *    The device list connected to the I2O Controller is doubly linked
1442                  * Here we traverse the end of the list , and start claiming devices
1443                  * from that end. This assures that within an I2O controller atleast
1444                  * the newly created volumes get claimed after the older ones, thus
1445                  * mapping to same major/minor (and hence device file name) after 
1446                  * every reboot.
1447                  * The exception being: 
1448                  * 1. If there was a TID reuse.
1449                  * 2. There was more than one I2O controller. 
1450                  */
1451
1452                 if(!bios)
1453                 {
1454                         for (d=c->devices;d!=NULL;d=d->next)
1455                         if(d->next == NULL)
1456                                 b = d;
1457                 }
1458                 else
1459                         b = c->devices;
1460
1461                 while(b != NULL)
1462                 {
1463                         d=b;
1464                         if(bios)
1465                                 b = b->next;
1466                         else
1467                                 b = b->prev;
1468
1469                         if(d->lct_data.class_id!=I2O_CLASS_RANDOM_BLOCK_STORAGE)
1470                                 continue;
1471
1472                         if(d->lct_data.user_tid != 0xFFF)
1473                                 continue;
1474
1475                         if(bios)
1476                         {
1477                                 if(d->lct_data.bios_info != 0x80)
1478                                         continue;
1479                                 printk(KERN_INFO "Claiming as Boot device: Controller %d, TID %d\n", c->unit, d->lct_data.tid);
1480                         }
1481                         else
1482                         {
1483                                 if(d->lct_data.bios_info == 0x80)
1484                                         continue; /*Already claimed on pass 1 */
1485                         }
1486
1487                         if(i2o_claim_device(d, &i2o_block_handler))
1488                         {
1489                                 printk(KERN_WARNING "i2o_block: Controller %d, TID %d\n", c->unit,
1490                                         d->lct_data.tid);
1491                                 printk(KERN_WARNING "\t%sevice refused claim! Skipping installation\n", bios?"Boot d":"D");
1492                                 continue;
1493                         }
1494
1495                         if(scan_unit<MAX_I2OB<<4)
1496                         {
1497                                 /*
1498                                  * Get the device and fill in the
1499                                  * Tid and controller.
1500                                  */
1501                                 dev=&i2ob_dev[scan_unit];
1502                                 dev->i2odev = d; 
1503                                 dev->controller = c;
1504                                 dev->unit = c->unit;
1505                                 dev->tid = d->lct_data.tid;
1506
1507                                 if(i2ob_install_device(c,d,scan_unit))
1508                                         printk(KERN_WARNING "Could not install I2O block device\n");
1509                                 else
1510                                 {
1511                                         scan_unit+=16;
1512                                         i2ob_dev_count++;
1513
1514                                         /* We want to know when device goes away */
1515                                         i2o_device_notify_on(d, &i2o_block_handler);
1516                                 }
1517                         }
1518                         else
1519                         {
1520                                 if(!warned++)
1521                                         printk(KERN_WARNING "i2o_block: too many device, registering only %d.\n", scan_unit>>4);
1522                         }
1523                         i2o_release_device(d, &i2o_block_handler);
1524                 }
1525                 i2o_unlock_controller(c);
1526         }
1527 }
1528
1529 static void i2ob_probe(void)
1530 {
1531         /*
1532          *      Some overhead/redundancy involved here, while trying to
1533          *      claim the first boot volume encountered as /dev/i2o/hda
1534          *      everytime. All the i2o_controllers are searched and the
1535          *      first i2o block device marked as bootable is claimed
1536          *      If an I2O block device was booted off , the bios sets
1537          *      its bios_info field to 0x80, this what we search for.
1538          *      Assuming that the bootable volume is /dev/i2o/hda
1539          *      everytime will prevent any kernel panic while mounting
1540          *      root partition
1541          */
1542
1543         printk(KERN_INFO "i2o_block: Checking for Boot device...\n");
1544         i2ob_scan(1);
1545
1546         /*
1547          *      Now the remainder.
1548          */
1549         printk(KERN_INFO "i2o_block: Checking for I2O Block devices...\n");
1550         i2ob_scan(0);
1551 }
1552
1553
1554 /*
1555  * New device notification handler.  Called whenever a new
1556  * I2O block storage device is added to the system.
1557  * 
1558  * Should we spin lock around this to keep multiple devs from 
1559  * getting updated at the same time? 
1560  * 
1561  */
1562 void i2ob_new_device(struct i2o_controller *c, struct i2o_device *d)
1563 {
1564         struct i2ob_device *dev;
1565         int unit = 0;
1566
1567         printk(KERN_INFO "i2o_block: New device detected\n");
1568         printk(KERN_INFO "   Controller %d Tid %d\n",c->unit, d->lct_data.tid);
1569
1570         /* Check for available space */
1571         if(i2ob_dev_count>=MAX_I2OB<<4)
1572         {
1573                 printk(KERN_ERR "i2o_block: No more devices allowed!\n");
1574                 return;
1575         }
1576         for(unit = 0; unit < (MAX_I2OB<<4); unit += 16)
1577         {
1578                 if(!i2ob_dev[unit].i2odev)
1579                         break;
1580         }
1581
1582         if(i2o_claim_device(d, &i2o_block_handler))
1583         {
1584                 printk(KERN_INFO "i2o_block: Unable to claim device. Installation aborted\n");
1585                 return;
1586         }
1587
1588         dev = &i2ob_dev[unit];
1589         dev->i2odev = d; 
1590         dev->controller = c;
1591         dev->tid = d->lct_data.tid;
1592
1593         if(i2ob_install_device(c,d,unit))
1594                 printk(KERN_ERR "i2o_block: Could not install new device\n");
1595         else    
1596         {
1597                 i2ob_dev_count++;
1598                 i2o_device_notify_on(d, &i2o_block_handler);
1599         }
1600
1601         i2o_release_device(d, &i2o_block_handler);
1602  
1603         return;
1604 }
1605
1606 /*
1607  * Deleted device notification handler.  Called when a device we
1608  * are talking to has been deleted by the user or some other
1609  * mysterious fource outside the kernel.
1610  */
1611 void i2ob_del_device(struct i2o_controller *c, struct i2o_device *d)
1612 {       
1613         int unit = 0;
1614         int i = 0;
1615         unsigned long flags;
1616
1617         spin_lock_irqsave(&io_request_lock, flags);
1618
1619         /*
1620          * Need to do this...we somtimes get two events from the IRTOS
1621          * in a row and that causes lots of problems.
1622          */
1623         i2o_device_notify_off(d, &i2o_block_handler);
1624
1625         printk(KERN_INFO "I2O Block Device Deleted\n");
1626
1627         for(unit = 0; unit < MAX_I2OB<<4; unit += 16)
1628         {
1629                 if(i2ob_dev[unit].i2odev == d)
1630                 {
1631                         printk(KERN_INFO "  /dev/%s: Controller %d Tid %d\n", 
1632                                 d->dev_name, c->unit, d->lct_data.tid);
1633                         break;
1634                 }
1635         }
1636         if(unit >= MAX_I2OB<<4)
1637         {
1638                 printk(KERN_ERR "i2ob_del_device called, but not in dev table!\n");
1639                 spin_unlock_irqrestore(&io_request_lock, flags);
1640                 return;
1641         }
1642
1643         /* 
1644          * This will force errors when i2ob_get_queue() is called
1645          * by the kenrel.
1646          */
1647         i2ob_dev[unit].req_queue = NULL;
1648         for(i = unit; i <= unit+15; i++)
1649         {
1650                 i2ob_dev[i].i2odev = NULL;
1651                 i2ob_sizes[i] = 0;
1652                 i2ob_hardsizes[i] = 0;
1653                 i2ob_max_sectors[i] = 0;
1654                 i2ob[i].nr_sects = 0;
1655                 i2ob_gendisk.part[i].nr_sects = 0;
1656         }
1657         spin_unlock_irqrestore(&io_request_lock, flags);
1658
1659         /*
1660          * Decrease usage count for module
1661          */     
1662
1663         while(i2ob_dev[unit].refcnt--)
1664                 MOD_DEC_USE_COUNT;
1665
1666         i2ob_dev[unit].refcnt = 0;
1667         
1668         i2ob_dev[i].tid = 0;
1669
1670         /* 
1671          * Do we need this?
1672          * The media didn't really change...the device is just gone
1673          */
1674         i2ob_media_change_flag[unit] = 1;
1675
1676         i2ob_dev_count--;       
1677 }
1678
1679 /*
1680  *      Have we seen a media change ?
1681  */
1682 static int i2ob_media_change(kdev_t dev)
1683 {
1684         int i=MINOR(dev);
1685         i>>=4;
1686         if(i2ob_media_change_flag[i])
1687         {
1688                 i2ob_media_change_flag[i]=0;
1689                 return 1;
1690         }
1691         return 0;
1692 }
1693
1694 static int i2ob_revalidate(kdev_t dev)
1695 {
1696         return do_i2ob_revalidate(dev, 0);
1697 }
1698
1699 /*
1700  * Reboot notifier.  This is called by i2o_core when the system
1701  * shuts down.
1702  */
1703 static void i2ob_reboot_event(void)
1704 {
1705         int i;
1706         
1707         for(i=0;i<MAX_I2OB;i++)
1708         {
1709                 struct i2ob_device *dev=&i2ob_dev[(i<<4)];
1710                 
1711                 if(dev->refcnt!=0)
1712                 {
1713                         /*
1714                          *      Flush the onboard cache
1715                          */
1716                         u32 msg[5];
1717                         int *query_done = &dev->done_flag;
1718                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1719                         msg[1] = I2O_CMD_BLOCK_CFLUSH<<24|HOST_TID<<12|dev->tid;
1720                         msg[2] = i2ob_context|0x40000000;
1721                         msg[3] = (u32)query_done;
1722                         msg[4] = 60<<16;
1723                         
1724                         DEBUG("Flushing...");
1725                         i2o_post_wait(dev->controller, msg, 20, 60);
1726
1727                         DEBUG("Unlocking...");
1728                         /*
1729                          *      Unlock the media
1730                          */
1731                         msg[0] = FIVE_WORD_MSG_SIZE|SGL_OFFSET_0;
1732                         msg[1] = I2O_CMD_BLOCK_MUNLOCK<<24|HOST_TID<<12|dev->tid;
1733                         msg[2] = i2ob_context|0x40000000;
1734                         msg[3] = (u32)query_done;
1735                         msg[4] = -1;
1736                         i2o_post_wait(dev->controller, msg, 20, 2);
1737                         
1738                         DEBUG("Unlocked.\n");
1739                 }
1740         }       
1741 }
1742
1743 static struct block_device_operations i2ob_fops =
1744 {
1745         owner:                  THIS_MODULE,
1746         open:                   i2ob_open,
1747         release:                i2ob_release,
1748         ioctl:                  i2ob_ioctl,
1749         check_media_change:     i2ob_media_change,
1750         revalidate:             i2ob_revalidate,
1751 };
1752
1753 static struct gendisk i2ob_gendisk = 
1754 {
1755         major:          MAJOR_NR,
1756         major_name:     "i2o/hd",
1757         minor_shift:    4,
1758         max_p:          1<<4,
1759         part:           i2ob,
1760         sizes:          i2ob_sizes,
1761         nr_real:        MAX_I2OB,
1762         fops:           &i2ob_fops,
1763 };
1764
1765
1766 /*
1767  * And here should be modules and kernel interface 
1768  *  (Just smiley confuses emacs :-)
1769  */
1770
1771 static int i2o_block_init(void)
1772 {
1773         int i;
1774
1775         printk(KERN_INFO "I2O Block Storage OSM v0.9\n");
1776         printk(KERN_INFO "   (c) Copyright 1999-2001 Red Hat Software.\n");
1777         
1778         /*
1779          *      Register the block device interfaces
1780          */
1781
1782         if (register_blkdev(MAJOR_NR, "i2o_block", &i2ob_fops)) {
1783                 printk(KERN_ERR "Unable to get major number %d for i2o_block\n",
1784                        MAJOR_NR);
1785                 return -EIO;
1786         }
1787 #ifdef MODULE
1788         printk(KERN_INFO "i2o_block: registered device at major %d\n", MAJOR_NR);
1789 #endif
1790
1791         /*
1792          *      Now fill in the boiler plate
1793          */
1794          
1795         blksize_size[MAJOR_NR] = i2ob_blksizes;
1796         hardsect_size[MAJOR_NR] = i2ob_hardsizes;
1797         blk_size[MAJOR_NR] = i2ob_sizes;
1798         max_sectors[MAJOR_NR] = i2ob_max_sectors;
1799         blk_dev[MAJOR_NR].queue = i2ob_get_queue;
1800         
1801         blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), i2ob_request);
1802         blk_queue_headactive(BLK_DEFAULT_QUEUE(MAJOR_NR), 0);
1803
1804         for (i = 0; i < MAX_I2OB << 4; i++) {
1805                 i2ob_dev[i].refcnt = 0;
1806                 i2ob_dev[i].flags = 0;
1807                 i2ob_dev[i].controller = NULL;
1808                 i2ob_dev[i].i2odev = NULL;
1809                 i2ob_dev[i].tid = 0;
1810                 i2ob_dev[i].head = NULL;
1811                 i2ob_dev[i].tail = NULL;
1812                 i2ob_dev[i].depth = MAX_I2OB_DEPTH;
1813                 i2ob_blksizes[i] = 1024;
1814                 i2ob_max_sectors[i] = 2;
1815         }
1816         
1817         /*
1818          *      Set up the queue
1819          */
1820         for(i = 0; i < MAX_I2O_CONTROLLERS; i++)
1821         {
1822                 i2ob_queues[i] = NULL;
1823         }
1824
1825         /*
1826          *      Register the OSM handler as we will need this to probe for
1827          *      drives, geometry and other goodies.
1828          */
1829
1830         if(i2o_install_handler(&i2o_block_handler)<0)
1831         {
1832                 unregister_blkdev(MAJOR_NR, "i2o_block");
1833                 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1834                 printk(KERN_ERR "i2o_block: unable to register OSM.\n");
1835                 return -EINVAL;
1836         }
1837         i2ob_context = i2o_block_handler.context;        
1838
1839         /*
1840          * Initialize event handling thread
1841          */
1842         init_MUTEX_LOCKED(&i2ob_evt_sem);
1843         evt_pid = kernel_thread(i2ob_evt, NULL, CLONE_SIGHAND);
1844         if(evt_pid < 0)
1845         {
1846                 printk(KERN_ERR 
1847                         "i2o_block: Could not initialize event thread.  Aborting\n");
1848                 i2o_remove_handler(&i2o_block_handler);
1849                 return 0;
1850         }
1851
1852         /*
1853          *      Finally see what is actually plugged in to our controllers
1854          */
1855         for (i = 0; i < MAX_I2OB; i++)
1856                 register_disk(&i2ob_gendisk, MKDEV(MAJOR_NR,i<<4), 1<<4,
1857                         &i2ob_fops, 0);
1858         i2ob_probe();
1859
1860         /*
1861          *      Adding i2ob_gendisk into the gendisk list.
1862          */
1863         add_gendisk(&i2ob_gendisk);
1864
1865         return 0;
1866 }
1867
1868
1869 static void i2o_block_exit(void)
1870 {
1871         int i;
1872         
1873         if(evt_running) {
1874                 printk(KERN_INFO "Killing I2O block threads...");
1875                 i = kill_proc(evt_pid, SIGTERM, 1);
1876                 if(!i) {
1877                         printk("waiting...");
1878                 }
1879                 /* Be sure it died */
1880                 wait_for_completion(&i2ob_thread_dead);
1881                 printk("done.\n");
1882         }
1883
1884         /*
1885          * Unregister for updates from any devices..otherwise we still
1886          * get them and the core jumps to random memory :O
1887          */
1888         if(i2ob_dev_count) {
1889                 struct i2o_device *d;
1890                 for(i = 0; i < MAX_I2OB; i++)
1891                 if((d=i2ob_dev[i<<4].i2odev)) {
1892                         i2o_device_notify_off(d, &i2o_block_handler);
1893                         i2o_event_register(d->controller, d->lct_data.tid, 
1894                                 i2ob_context, i<<4, 0);
1895                 }
1896         }
1897         
1898         /*
1899          *      We may get further callbacks for ourself. The i2o_core
1900          *      code handles this case reasonably sanely. The problem here
1901          *      is we shouldn't get them .. but a couple of cards feel 
1902          *      obliged to tell us stuff we dont care about.
1903          *
1904          *      This isnt ideal at all but will do for now.
1905          */
1906          
1907         set_current_state(TASK_UNINTERRUPTIBLE);
1908         schedule_timeout(HZ);
1909         
1910         /*
1911          *      Flush the OSM
1912          */
1913
1914         i2o_remove_handler(&i2o_block_handler);
1915                  
1916         /*
1917          *      Return the block device
1918          */
1919         if (unregister_blkdev(MAJOR_NR, "i2o_block") != 0)
1920                 printk("i2o_block: cleanup_module failed\n");
1921
1922         /*
1923          * free request queue
1924          */
1925         blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1926
1927         del_gendisk(&i2ob_gendisk);
1928 }
1929
1930 EXPORT_NO_SYMBOLS;
1931 MODULE_AUTHOR("Red Hat Software");
1932 MODULE_DESCRIPTION("I2O Block Device OSM");
1933 MODULE_LICENSE("GPL");
1934
1935 module_init(i2o_block_init);
1936 module_exit(i2o_block_exit);