3 * Authors: Dave Boutcher <boutcher@us.ibm.com>
4 * Ryan Arnold <ryanarn@us.ibm.com>
5 * Colin Devilbiss <devilbis@us.ibm.com>
7 * (C) Copyright 2000 IBM Corporation
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License as
11 * published by the Free Software Foundation; either version 2 of the
12 * License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 ***************************************************************************
23 * This routine provides access to disk space (termed "DASD" in historical
24 * IBM terms) owned and managed by an OS/400 partition running on the
25 * same box as this Linux partition.
27 * All disk operations are performed by sending messages back and forth to
28 * the OS/400 partition.
30 * This device driver can either use it's own major number, or it can
31 * pretend to be an IDE drive (Major #3). Currently it doesn't
32 * emulate all the other IDE majors. This is controlled with a
33 * CONFIG option. You can either call this an elegant solution to the
34 * fact that a lot of software doesn't recognize a new disk major number...
35 * or you can call this a really ugly hack. Your choice.
38 #include <linux/major.h>
39 #include <linux/config.h>
41 /* Decide if we are using our own major or pretending to be an IDE drive
43 * If we are using our own majors, we only support 3 partitions per physical
44 * disk....so with minor numbers 0-255 we get a maximum of 64 disks. If we
45 * are emulating IDE, we get 16 partitions per disk, with a maximum of 16
48 #ifdef CONFIG_VIODASD_IDE
49 #define MAJOR_NR IDE0_MAJOR
50 #define PARTITION_SHIFT 6
51 #define do_viodasd_request do_hd_request
52 static int numdsk = 16;
53 static int viodasd_max_disk = 16;
54 #define VIOD_DEVICE_NAME "hd"
55 #define VIOD_GENHD_NAME "hd"
57 #define MAJOR_NR VIODASD_MAJOR
58 #define PARTITION_SHIFT 3
59 static int numdsk = 32;
60 static int viodasd_max_disk = 32;
61 #define VIOD_DEVICE_NAME "viod"
62 #ifdef CONFIG_DEVFS_FS
63 #define VIOD_GENHD_NAME "viod"
65 #define VIOD_GENHD_NAME "iSeries/vd"
66 #endif /* CONFIG_DEVFS */
67 #endif /* CONFIG_VIODASD_IDE */
69 #define VIODASD_VERS "1.02"
70 #define LOCAL_END_REQUEST
72 #include <linux/sched.h>
73 #include <linux/timer.h>
74 #include <asm/uaccess.h>
75 #include <linux/module.h>
76 #include <linux/kernel.h>
77 #include <linux/blk.h>
78 #include <linux/genhd.h>
79 #include <linux/hdreg.h>
81 #include <linux/proc_fs.h>
82 #include <linux/errno.h>
83 #include <linux/init.h>
84 #include <linux/vmalloc.h>
85 #include <linux/string.h>
86 #include <linux/pci.h>
88 #include <asm/iSeries/HvTypes.h>
89 #include <asm/iSeries/HvLpEvent.h>
90 #include <asm/iSeries/HvLpConfig.h>
92 #include <asm/iSeries/iSeries_proc.h>
94 MODULE_DESCRIPTION("iSeries Virtual DASD");
95 MODULE_AUTHOR("Dave Boutcher");
98 #define VIOMAXBLOCKDMA 12
100 extern struct pci_dev * iSeries_vio_dev;
102 struct vioblocklpevent {
103 struct HvLpEvent event;
110 struct { // Used during open
118 struct { // Used during rw
124 } dmaInfo[VIOMAXBLOCKDMA];
133 #define vioblockflags_ro 0x0001
135 enum vioblocksubtype {
136 vioblockopen = 0x0001,
137 vioblockclose = 0x0002,
138 vioblockread = 0x0003,
139 vioblockwrite = 0x0004,
140 vioblockflush = 0x0005,
141 vioblockcheck = 0x0007
144 /* In a perfect world we will perform better if we get page-aligned I/O
145 * requests, in multiples of pages. At least peg our block size fo the
148 static int blksize = HVPAGESIZE; /* in bytes */
150 static DECLARE_WAIT_QUEUE_HEAD(viodasd_wait);
151 struct viodasd_waitevent {
152 struct semaphore *sem;
154 int changed; /* Used only for check_change */
157 /* All our disk-related global structures
159 static struct hd_struct *viodasd_partitions;
160 static int *viodasd_sizes;
161 static int *viodasd_sectsizes;
162 static int *viodasd_maxsectors;
163 extern struct gendisk viodasd_gendsk;
165 /* Figure out the biggest I/O request (in sectors) we can accept
167 #define VIODASD_MAXSECTORS (4096 / 512 * VIOMAXBLOCKDMA)
169 /* Keep some statistics on what's happening for the PROC file system
174 long ntce[VIOMAXBLOCKDMA];
177 /* Number of disk I/O requests we've sent to OS/400
179 static int numReqOut;
181 /* This is our internal structure for keeping track of disk devices
183 struct viodasd_device {
193 /* When we get a disk I/O request we take it off the general request queue
196 static LIST_HEAD(reqlist);
198 /* Handle reads from the proc file system
200 static int proc_read(char *buf, char **start, off_t offset,
201 int blen, int *eof, void *data)
210 "viod Module opened %d times. Major number %d\n",
211 MOD_IN_USE, MAJOR_NR);
213 len += sprintf(buf + len, "viod %d devices\n", numdsk);
215 for (i = 0; i < 16; i++) {
216 if (viod_stats[i][0].tot || viod_stats[i][1].tot) {
219 "DISK %2.2d: rd %-10.10ld wr %-10.10ld (no buffer list rd %-10.10ld wr %-10.10ld\n",
220 i, viod_stats[i][0].tot,
221 viod_stats[i][1].tot,
222 viod_stats[i][0].nobh,
223 viod_stats[i][1].nobh);
225 len += sprintf(buf + len, "rd DMA: ");
227 for (j = 0; j < VIOMAXBLOCKDMA; j++)
228 len += sprintf(buf + len, " [%2.2d] %ld",
230 viod_stats[i][0].ntce[j]);
232 len += sprintf(buf + len, "\nwr DMA: ");
234 for (j = 0; j < VIOMAXBLOCKDMA; j++)
235 len += sprintf(buf + len, " [%2.2d] %ld",
237 viod_stats[i][1].ntce[j]);
238 len += sprintf(buf + len, "\n");
246 /* Handle writes to our proc file system
248 static int proc_write(struct file *file, const char *buffer,
249 unsigned long count, void *data)
254 /* setup our proc file system entries
256 void viodasd_proc_init(struct proc_dir_entry *iSeries_proc)
258 struct proc_dir_entry *ent;
260 create_proc_entry("viodasd", S_IFREG | S_IRUSR, iSeries_proc);
265 ent->read_proc = proc_read;
266 ent->write_proc = proc_write;
269 /* clean up our proc file system entries
271 void viodasd_proc_delete(struct proc_dir_entry *iSeries_proc)
273 remove_proc_entry("viodasd", iSeries_proc);
278 static void viodasd_end_request(struct request *req, int uptodate)
281 if (end_that_request_first(req, uptodate, VIOD_DEVICE_NAME))
284 end_that_request_last(req);
287 /* This rebuilds the partition information for a single disk device
289 static int viodasd_revalidate(kdev_t dev)
292 int device_no = DEVICE_NR(dev);
293 int part0 = (device_no << PARTITION_SHIFT);
294 int npart = (1 << PARTITION_SHIFT);
297 struct super_block *sb;
299 if (viodasd_devices[device_no].size == 0)
302 for (i = npart - 1; i >= 0; i--) {
305 if (viodasd_partitions[minor].nr_sects != 0) {
306 devp = MKDEV(MAJOR_NR, minor);
309 sb = get_super(devp);
311 invalidate_inodes(sb);
313 invalidate_buffers(devp);
316 viodasd_partitions[minor].start_sect = 0;
317 viodasd_partitions[minor].nr_sects = 0;
320 grok_partitions(&viodasd_gendsk, device_no, npart,
321 viodasd_devices[device_no].size >> 9);
326 /* This is the actual open code. It gets called from the external
327 * open entry point, as well as from the init code when we're figuring
328 * out what disks we have
330 static int internal_open(int device_no)
333 struct viodasd_waitevent we;
336 /* This semaphore is raised in the interrupt handler */
337 DECLARE_MUTEX_LOCKED(Semaphore);
339 /* Check that we are dealing with a valid hosting partition */
340 if (viopath_hostLp == HvLpIndexInvalid) {
341 printk(KERN_WARNING_VIO "Invalid hosting partition\n");
347 /* Send the open event to OS/400 */
348 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
349 HvLpEvent_Type_VirtualIo,
350 viomajorsubtype_blockio |
352 HvLpEvent_AckInd_DoAck,
353 HvLpEvent_AckType_ImmediateAck,
358 (u64) (unsigned long) &we,
360 ((u64) device_no << 48), 0, 0,
364 printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", (int) hvrc);
368 /* Wait for the interrupt handler to get the response */
371 /* Check the return code */
373 printk(KERN_WARNING_VIO "bad rc opening disk: %d\n", (int) we.rc);
377 /* If this is the first open of this device, update the device information */
378 /* If this is NOT the first open, assume that it isn't changing */
379 if (viodasd_devices[device_no].useCount == 0) {
380 if (viodasd_devices[device_no].size > 0) {
382 u64 tmpint = viodasd_devices[device_no].size >> 9;
383 viodasd_partitions[device_no << PARTITION_SHIFT].
385 /* Now the value divided by 1024 */
386 tmpint = tmpint >> 1;
387 viodasd_sizes[device_no << PARTITION_SHIFT] =
390 for (i = (device_no << PARTITION_SHIFT);
391 i < ((device_no + 1) << PARTITION_SHIFT); i++)
392 viodasd_sectsizes[i] =
393 viodasd_devices[device_no].
398 /* If the size of the device changed, wierd things are happening! */
399 if (viodasd_sizes[device_no << PARTITION_SHIFT] !=
400 viodasd_devices[device_no].size >> 10) {
401 printk(KERN_WARNING_VIO
402 "disk size change (%dK to %dK) for device %d\n",
403 viodasd_sizes[device_no << PARTITION_SHIFT],
404 (int) viodasd_devices[device_no].size >> 10,
409 /* Bump the use count */
410 viodasd_devices[device_no].useCount++;
415 /* This is the actual release code. It gets called from the external
416 * release entry point, as well as from the init code when we're figuring
417 * out what disks we have
419 static int internal_release(int device_no)
421 /* Send the event to OS/400. We DON'T expect a response */
422 HvLpEvent_Rc hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
423 HvLpEvent_Type_VirtualIo,
424 viomajorsubtype_blockio
426 HvLpEvent_AckInd_NoAck,
427 HvLpEvent_AckType_ImmediateAck,
438 viodasd_devices[device_no].useCount--;
441 printk(KERN_WARNING_VIO "bad rc sending event to OS/400 %d\n", (int) hvrc);
447 /* External open entry point.
449 static int viodasd_open(struct inode *ino, struct file *fil)
453 /* Do a bunch of sanity checks */
455 printk(KERN_WARNING_VIO "no inode provided in open\n");
459 if (MAJOR(ino->i_rdev) != MAJOR_NR) {
460 printk(KERN_WARNING_VIO "Wierd error...wrong major number on open\n");
464 device_no = DEVICE_NR(ino->i_rdev);
465 if (device_no > numdsk) {
466 printk(KERN_WARNING_VIO "Invalid minor device number %d in open\n",
471 /* Call the actual open code */
472 if (internal_open(device_no) == 0) {
473 if (fil && fil->f_mode) {
474 if (fil->f_mode & 2) {
475 if (viodasd_devices[device_no].readOnly) {
476 internal_release(device_no);
488 /* External release entry point.
490 static int viodasd_release(struct inode *ino, struct file *fil)
494 /* Do a bunch of sanity checks */
496 printk(KERN_WARNING_VIO "no inode provided in release\n");
500 if (MAJOR(ino->i_rdev) != MAJOR_NR) {
501 printk(KERN_WARNING_VIO
502 "Wierd error...wrong major number on release\n");
506 device_no = DEVICE_NR(ino->i_rdev);
507 if (device_no > numdsk) {
511 /* Just to be paranoid, sync the device */
512 fsync_dev(ino->i_rdev);
514 /* Call the actual release code */
515 internal_release(device_no);
521 /* External ioctl entry point.
523 static int viodasd_ioctl(struct inode *ino, struct file *fil,
524 unsigned int cmd, unsigned long arg)
529 DECLARE_MUTEX_LOCKED(Semaphore);
533 printk(KERN_WARNING_VIO "no inode provided in ioctl\n");
537 if (MAJOR(ino->i_rdev) != MAJOR_NR) {
538 printk(KERN_WARNING_VIO "Wierd error...wrong major number on ioctl\n");
542 device_no = DEVICE_NR(ino->i_rdev);
543 if (device_no > numdsk) {
544 printk(KERN_WARNING_VIO "Invalid minor device number %d in ioctl\n",
551 /* return the device size in sectors */
555 verify_area(VERIFY_WRITE, (long *) arg, sizeof(long));
559 put_user(viodasd_partitions[MINOR(ino->i_rdev)].nr_sects,
567 fsync_dev(ino->i_rdev);
568 invalidate_buffers(ino->i_rdev);
569 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
570 HvLpEvent_Type_VirtualIo,
571 viomajorsubtype_blockio
573 HvLpEvent_AckInd_DoAck,
574 HvLpEvent_AckType_ImmediateAck,
579 (u64) (unsigned long)
587 printk(KERN_WARNING_VIO "bad rc on sync signalLpEvent %d\n",
600 verify_area(VERIFY_WRITE, (long *) arg, sizeof(long));
603 put_user(read_ahead[MAJOR_NR], (long *) arg);
611 read_ahead[MAJOR_NR] = arg;
615 viodasd_revalidate(ino->i_rdev);
620 unsigned char sectors;
622 unsigned short cylinders;
624 struct hd_geometry *geo =
625 (struct hd_geometry *) arg;
629 err = verify_area(VERIFY_WRITE, geo, sizeof(*geo));
633 sectors = viodasd_devices[device_no].sectors;
637 heads = viodasd_devices[device_no].tracks;
641 cylinders = viodasd_devices[device_no].cylinders;
644 viodasd_partitions[MINOR(ino->i_rdev)].
645 nr_sects / (sectors * heads);
647 put_user(sectors, &geo->sectors);
648 put_user(heads, &geo->heads);
649 put_user(cylinders, &geo->cylinders);
651 put_user(viodasd_partitions[MINOR(ino->i_rdev)].
652 start_sect, (long *) &geo->start);
657 #define PRTIOC(x) case x: printk(KERN_WARNING_VIO "got unsupported FD ioctl " #x "\n"); \
669 PRTIOC(FDSETEMSGTRESH);
670 PRTIOC(FDSETMAXERRS);
671 PRTIOC(FDGETMAXERRS);
675 PRTIOC(FDGETDRVSTAT);
676 PRTIOC(FDPOLLDRVSTAT);
678 PRTIOC(FDGETFDCSTAT);
690 /* Send an actual I/O request to OS/400
692 static int send_request(struct request *req)
701 struct vioblocklpevent *bevent;
702 struct scatterlist sg[VIOMAXBLOCKDMA];
703 struct buffer_head *bh;
705 int device_no = DEVICE_NR(req->rq_dev);
708 /* Note that this SHOULD always be 512...but lets be architecturally correct */
709 sect_size = hardsect_size[MAJOR_NR][device_no];
711 /* Figure out teh starting sector and length */
714 viodasd_partitions[MINOR(req->rq_dev)].start_sect) *
716 len = req->nr_sectors * sect_size;
718 /* More paranoia checks */
719 if ((req->sector + req->nr_sectors) >
720 (viodasd_partitions[MINOR(req->rq_dev)].start_sect +
721 viodasd_partitions[MINOR(req->rq_dev)].nr_sects)) {
722 printk(KERN_WARNING_VIO "Invalid request offset & length\n");
723 printk(KERN_WARNING_VIO "req->sector: %ld, req->nr_sectors: %ld\n",
724 req->sector, req->nr_sectors);
725 printk(KERN_WARNING_VIO "RQ_DEV: %d, minor: %d\n", req->rq_dev,
730 if (req->cmd == READ) {
731 direction = PCI_DMA_FROMDEVICE;
732 viocmd = viomajorsubtype_blockio | vioblockread;
735 direction = PCI_DMA_TODEVICE;
736 viocmd = viomajorsubtype_blockio | vioblockwrite;
741 viod_stats[device_no][statindex].tot++;
743 /* Now build the scatter-gather list */
744 memset(&sg, 0x00, sizeof(sg));
747 /* See if this is a swap I/O (without a bh pointer) or a regular I/O */
749 /* OK...this loop takes buffers from the request and adds them to the SG
750 until we're done, or until we hit a maximum. If we hit a maximum we'll
751 just finish this request later */
753 while ((bh) && (sgindex < VIOMAXBLOCKDMA)) {
754 sg[sgindex].address = bh->b_data;
755 sg[sgindex].length = bh->b_size;
760 nsg = pci_map_sg(iSeries_vio_dev, sg, sgindex, direction);
761 if ((nsg == 0) || (sg_dma_len(sg) == 0)
762 || (sg_dma_address(sg) == 0xFFFFFFFF)) {
763 printk(KERN_WARNING_VIO "error getting sg tces\n");
769 viod_stats[device_no][statindex].nobh++;
771 sg_dma_address(sg) = pci_map_single(iSeries_vio_dev, req->buffer,
773 if (sg_dma_address(sg) == 0xFFFFFFFF) {
774 printk(KERN_WARNING_VIO
775 "error allocating tce for address %p len %ld\n",
776 req->buffer, (long) len);
779 sg_dma_len(sg) = len;
784 viod_stats[device_no][statindex].ntce[sgindex]++;
786 /* This optimization handles a single DMA block */
788 /* Send the open event to OS/400 */
789 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
790 HvLpEvent_Type_VirtualIo,
791 viomajorsubtype_blockio
793 HvLpEvent_AckInd_DoAck,
794 HvLpEvent_AckType_ImmediateAck,
799 (u64) (unsigned long)
808 bevent = (struct vioblocklpevent *) vio_get_event_buffer(viomajorsubtype_blockio);
809 if (bevent == NULL) {
810 printk(KERN_WARNING_VIO
811 "error allocating disk event buffer\n");
815 /* Now build up the actual request. Note that we store the pointer */
816 /* to the request buffer in the correlation token so we can match */
817 /* this response up later */
818 memset(bevent, 0x00, sizeof(struct vioblocklpevent));
819 bevent->event.xFlags.xValid = 1;
820 bevent->event.xFlags.xFunction = HvLpEvent_Function_Int;
821 bevent->event.xFlags.xAckInd = HvLpEvent_AckInd_DoAck;
822 bevent->event.xFlags.xAckType =
823 HvLpEvent_AckType_ImmediateAck;
824 bevent->event.xType = HvLpEvent_Type_VirtualIo;
825 bevent->event.xSubtype = viocmd;
826 bevent->event.xSourceLp = HvLpConfig_getLpIndex();
827 bevent->event.xTargetLp = viopath_hostLp;
828 bevent->event.xSizeMinus1 =
829 offsetof(struct vioblocklpevent,
831 (sizeof(bevent->u.rwData.dmaInfo[0]) * (sgindex)) - 1;
832 bevent->event.xSizeMinus1 =
833 sizeof(struct vioblocklpevent) - 1;
834 bevent->event.xSourceInstanceId =
835 viopath_sourceinst(viopath_hostLp);
836 bevent->event.xTargetInstanceId =
837 viopath_targetinst(viopath_hostLp);
838 bevent->event.xCorrelationToken =
839 (u64) (unsigned long) req->buffer;
840 bevent->mVersion = VIOVERSION;
841 bevent->mDisk = device_no;
842 bevent->u.rwData.mOffset = start;
844 /* Copy just the dma information from the sg list into the request */
845 for (sgindex = 0; sgindex < nsg; sgindex++) {
846 bevent->u.rwData.dmaInfo[sgindex].mToken =
847 sg_dma_address(&sg[sgindex]);
848 bevent->u.rwData.dmaInfo[sgindex].mLen =
849 sg_dma_len(&sg[sgindex]);
852 /* Send the request */
853 hvrc = HvCallEvent_signalLpEvent(&bevent->event);
854 vio_free_event_buffer(viomajorsubtype_blockio, bevent);
857 if (hvrc != HvLpEvent_Rc_Good) {
858 printk(KERN_WARNING_VIO "error sending disk event to OS/400 (rcp %d)\n", (int) hvrc);
861 /* If the request was successful, bump the number of outstanding */
867 /* This is the external request processing routine
869 static void do_viodasd_request(request_queue_t * q)
877 device_no = CURRENT_DEV;
878 if (device_no > numdsk) {
879 printk(KERN_WARNING_VIO "Invalid device # %d\n", CURRENT_DEV);
880 viodasd_end_request(CURRENT, 0);
884 if (viodasd_gendsk.sizes == NULL) {
885 printk(KERN_WARNING_VIO
886 "Ouch! viodasd_gendsk.sizes is NULL\n");
887 viodasd_end_request(CURRENT, 0);
891 /* If the queue is plugged, don't dequeue anything right now */
892 if ((q) && (q->plugged)) {
896 /* If we already have the maximum number of requests outstanding to OS/400
897 just bail out. We'll come back later */
898 if (numReqOut >= VIOMAXREQ)
901 /* get the current request, then dequeue it from the queue */
903 blkdev_dequeue_request(req);
905 /* Try sending the request */
906 if (send_request(req) == 0) {
907 list_add_tail(&req->queue, &reqlist);
909 viodasd_end_request(req, 0);
914 /* Check for changed disks
916 static int viodasd_check_change(kdev_t dev)
918 struct viodasd_waitevent we;
920 int device_no = DEVICE_NR(dev);
922 /* This semaphore is raised in the interrupt handler */
923 DECLARE_MUTEX_LOCKED(Semaphore);
925 /* Check that we are dealing with a valid hosting partition */
926 if (viopath_hostLp == HvLpIndexInvalid) {
927 printk(KERN_WARNING_VIO "Invalid hosting partition\n");
933 /* Send the open event to OS/400 */
934 hvrc = HvCallEvent_signalLpEventFast(viopath_hostLp,
935 HvLpEvent_Type_VirtualIo,
936 viomajorsubtype_blockio |
938 HvLpEvent_AckInd_DoAck,
939 HvLpEvent_AckType_ImmediateAck,
944 (u64) (unsigned long) &we,
946 ((u64) device_no << 48), 0, 0,
950 printk(KERN_WARNING_VIO "bad rc on signalLpEvent %d\n", (int) hvrc);
954 /* Wait for the interrupt handler to get the response */
957 /* Check the return code. If bad, assume no change */
959 printk(KERN_WARNING_VIO "bad rc on check_change. Assuming no change\n");
966 /* Our file operations table
968 static struct block_device_operations viodasd_fops = {
970 release:viodasd_release,
972 check_media_change:viodasd_check_change,
973 revalidate:viodasd_revalidate
978 struct gendisk viodasd_gendsk = {
979 0, /* major - fill in later */
982 1 << PARTITION_SHIFT,
983 NULL, /* partition array - fill in later */
984 NULL, /* block sizes - fill in later */
986 NULL, /* "real device" pointer */
988 &viodasd_fops /* operations */
991 /* This routine handles incoming block LP events
993 static void vioHandleBlockEvent(struct HvLpEvent *event)
995 struct scatterlist sg[VIOMAXBLOCKDMA];
996 struct vioblocklpevent *bevent = (struct vioblocklpevent *) event;
1000 struct viodasd_waitevent *pwe;
1001 unsigned long flags;
1004 if (event == NULL) {
1005 /* Notification that a partition went away! */
1008 // First, we should NEVER get an int here...only acks
1009 if (event->xFlags.xFunction == HvLpEvent_Function_Int) {
1010 printk(KERN_WARNING_VIO
1011 "Yikes! got an int in viodasd event handler!\n");
1012 if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
1013 event->xRc = HvLpEvent_Rc_InvalidSubtype;
1014 HvCallEvent_ackLpEvent(event);
1018 switch (event->xSubtype & VIOMINOR_SUBTYPE_MASK) {
1020 /* Handle a response to an open request. We get all the disk information
1021 * in the response, so update it. The correlation token contains a pointer to
1022 * a waitevent structure that has a semaphore in it. update the return code
1023 * in the waitevent structure and post the semaphore to wake up the guy who
1024 * sent the request */
1027 (struct viodasd_waitevent *) (unsigned long) event->
1029 pwe->rc = event->xRc;
1030 if (event->xRc == HvLpEvent_Rc_Good) {
1031 viodasd_devices[bevent->mDisk].size =
1032 bevent->u.openData.mDiskLen;
1033 viodasd_devices[bevent->mDisk].cylinders =
1034 bevent->u.openData.mCylinders;
1035 viodasd_devices[bevent->mDisk].tracks =
1036 bevent->u.openData.mTracks;
1037 viodasd_devices[bevent->mDisk].sectors =
1038 bevent->u.openData.mSectors;
1039 viodasd_devices[bevent->mDisk].bytesPerSector =
1040 bevent->u.openData.mBytesPerSector;
1041 viodasd_devices[bevent->mDisk].readOnly =
1042 bevent->mFlags & vioblockflags_ro;
1044 if (viodasd_max_disk !=
1045 bevent->u.openData.mMaxDisks) {
1047 bevent->u.openData.mMaxDisks;
1056 /* For read and write requests, decrement the number of outstanding requests,
1057 * Free the DMA buffers we allocated, and find the matching request by
1058 * using the buffer pointer we stored in the correlation token.
1063 /* Free the DMA buffers */
1066 memset(sg, 0x00, sizeof(sg));
1068 maxsg = (((bevent->event.xSizeMinus1 + 1) -
1069 offsetof(struct vioblocklpevent,
1070 u.rwData.dmaInfo)) /
1071 sizeof(bevent->u.rwData.dmaInfo[0]));
1074 while ((i < maxsg) &&
1075 (bevent->u.rwData.dmaInfo[i].mLen > 0) &&
1076 (i < VIOMAXBLOCKDMA)) {
1077 sg_dma_address(&sg[i]) =
1078 bevent->u.rwData.dmaInfo[i].mToken;
1079 sg_dma_len(&sg[i]) =
1080 bevent->u.rwData.dmaInfo[i].mLen;
1081 nsect += bevent->u.rwData.dmaInfo[i].mLen;
1085 pci_unmap_sg(iSeries_vio_dev,
1088 (bevent->event.xSubtype ==
1089 (viomajorsubtype_blockio | vioblockread)) ?
1090 PCI_DMA_FROMDEVICE : PCI_DMA_TODEVICE);
1093 /* Since this is running in interrupt mode, we need to make sure we're not
1094 * stepping on any global I/O operations
1096 spin_lock_irqsave(&io_request_lock, flags);
1098 /* Decrement the number of outstanding requests */
1101 /* Now find the matching request in OUR list (remember we moved the request
1102 * from the global list to our list when we got it)
1104 req = blkdev_entry_to_request(reqlist.next);
1105 while ((&req->queue != &reqlist) &&
1106 ((u64) (unsigned long) req->buffer !=
1107 bevent->event.xCorrelationToken))
1108 req = blkdev_entry_to_request(req->queue.next);
1110 if (&req->queue == &reqlist) {
1111 printk(KERN_WARNING_VIO
1112 "Yikes! Could not find matching buffer %p in reqlist\n",
1117 /* Remove the request from our list */
1118 list_del(&req->queue);
1120 /* Calculate the number of sectors from the length in bytes */
1123 if (event->xRc != HvLpEvent_Rc_Good) {
1124 printk(KERN_WARNING_VIO "read/wrute error %d:%d\n", event->xRc,
1125 bevent->mSubTypeRc);
1126 viodasd_end_request(req, 0);
1128 if (nsect != req->current_nr_sectors) {
1129 printk(KERN_WARNING_VIO
1130 "Yikes...non bh i/o # sect doesn't match!!!\n");
1132 viodasd_end_request(req, 1);
1135 while ((nsect > 0) && (req->bh)) {
1136 nsect -= req->current_nr_sectors;
1137 viodasd_end_request(req, 1);
1140 printk(KERN_WARNING_VIO
1141 "Yikes...sectors left over on a request!!!\n");
1144 /* If the original request could not handle all the buffers, re-send
1148 if (send_request(req) == 0) {
1149 list_add_tail(&req->queue,
1152 viodasd_end_request(req, 0);
1158 /* Finally, send more requests */
1159 do_viodasd_request(NULL);
1161 spin_unlock_irqrestore(&io_request_lock, flags);
1165 up((void *) (unsigned long) event->xCorrelationToken);
1170 (struct viodasd_waitevent *) (unsigned long) event->
1172 pwe->rc = event->xRc;
1173 pwe->changed = bevent->u.check.changed;
1178 printk(KERN_WARNING_VIO "invalid subtype!");
1179 if (event->xFlags.xAckInd == HvLpEvent_AckInd_DoAck) {
1180 event->xRc = HvLpEvent_Rc_InvalidSubtype;
1181 HvCallEvent_ackLpEvent(event);
1186 /* This routine tries to clean up anything we allocated/registered
1188 static void cleanup2(void)
1192 #define CLEANIT(x) if (x) {kfree(x); x=NULL;}
1194 for (i = 0; i < numdsk; i++)
1195 fsync_dev(MKDEV(MAJOR_NR, i));
1197 read_ahead[MAJOR_NR] = 0;
1199 CLEANIT(viodasd_devices);
1200 CLEANIT(blk_size[MAJOR_NR]);
1201 CLEANIT(blksize_size[MAJOR_NR]);
1202 CLEANIT(hardsect_size[MAJOR_NR]);
1203 CLEANIT(max_sectors[MAJOR_NR]);
1204 CLEANIT(viodasd_gendsk.part);
1205 blk_size[MAJOR_NR] = NULL;
1206 blksize_size[MAJOR_NR] = NULL;
1208 devfs_unregister_blkdev(MAJOR_NR, VIOD_DEVICE_NAME);
1211 /* Initialize the whole device driver. Handle module and non-module
1214 __init int viodasd_init(void)
1218 int *viodasd_blksizes;
1219 int numpart = numdsk << PARTITION_SHIFT;
1221 /* Try to open to our host lp
1223 if (viopath_hostLp == HvLpIndexInvalid) {
1227 if (viopath_hostLp == HvLpIndexInvalid) {
1228 printk(KERN_WARNING_VIO "%s: invalid hosting partition\n",
1234 * Do the devfs_register. This works even if devfs is not
1237 if (devfs_register_blkdev
1238 (MAJOR_NR, VIOD_DEVICE_NAME, &viodasd_fops)) {
1239 printk(KERN_WARNING_VIO "%s: unable to get major number %d\n",
1240 VIOD_DEVICE_NAME, MAJOR_NR);
1244 printk(KERN_INFO_VIO
1245 "%s: Disk vers %s, major %d, max disks %d, hosting partition %d\n",
1246 VIOD_DEVICE_NAME, VIODASD_VERS, MAJOR_NR, numdsk,
1249 if (ROOT_DEV == NODEV) {
1250 ROOT_DEV = MKDEV(MAJOR_NR,1);
1252 printk(KERN_INFO_VIO
1253 "Claiming root file system as first partition of first virtual disk");
1256 /* Do the blk device initialization */
1257 blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST);
1259 read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read ahead */
1261 /* Start filling in gendsk structure */
1262 viodasd_gendsk.major = MAJOR_NR;
1263 viodasd_gendsk.major_name = VIOD_GENHD_NAME;
1264 viodasd_gendsk.nr_real = numdsk;
1265 add_gendisk(&viodasd_gendsk);
1267 /* Actually open the path to the hosting partition */
1268 rc = viopath_open(viopath_hostLp, viomajorsubtype_blockio, VIOMAXREQ+2);
1270 printk(KERN_WARNING_VIO "error opening path to host partition %d\n",
1272 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1275 printk("%s: opened path to hosting partition %d\n",
1276 VIOD_DEVICE_NAME, viopath_hostLp);
1280 * Initialize our request handler
1282 vio_setHandler(viomajorsubtype_blockio, vioHandleBlockEvent);
1285 * Now fill in all the device driver info
1288 kmalloc(numdsk * sizeof(struct viodasd_device), GFP_KERNEL);
1289 if (!viodasd_devices) {
1293 memset(viodasd_devices, 0x00,
1294 numdsk * sizeof(struct viodasd_device));
1296 viodasd_sizes = kmalloc(numpart * sizeof(int), GFP_KERNEL);
1297 if (!viodasd_sizes) {
1301 memset(viodasd_sizes, 0x00, numpart * sizeof(int));
1302 blk_size[MAJOR_NR] = viodasd_gendsk.sizes = viodasd_sizes;
1304 viodasd_partitions =
1305 kmalloc(numpart * sizeof(struct hd_struct), GFP_KERNEL);
1306 if (!viodasd_partitions) {
1310 memset(viodasd_partitions, 0x00,
1311 numpart * sizeof(struct hd_struct));
1312 viodasd_gendsk.part = viodasd_partitions;
1314 viodasd_blksizes = kmalloc(numpart * sizeof(int), GFP_KERNEL);
1315 if (!viodasd_blksizes) {
1319 for (i = 0; i < numpart; i++)
1320 viodasd_blksizes[i] = blksize;
1321 blksize_size[MAJOR_NR] = viodasd_blksizes;
1323 viodasd_sectsizes = kmalloc(numpart * sizeof(int), GFP_KERNEL);
1324 if (!viodasd_sectsizes) {
1328 for (i = 0; i < numpart; i++)
1329 viodasd_sectsizes[i] = 0;
1330 hardsect_size[MAJOR_NR] = viodasd_sectsizes;
1332 viodasd_maxsectors = kmalloc(numpart * sizeof(int), GFP_KERNEL);
1333 if (!viodasd_maxsectors) {
1337 for (i = 0; i < numpart; i++)
1338 viodasd_maxsectors[i] = VIODASD_MAXSECTORS;
1339 max_sectors[MAJOR_NR] = viodasd_maxsectors;
1341 viodasd_max_disk = numdsk;
1342 for (i = 0; i <= viodasd_max_disk; i++) {
1343 // Note that internal_open has two side effects:
1344 // a) it updates the size of the disk
1345 // b) it updates viodasd_max_disk
1346 if (internal_open(i) == 0) {
1348 printk(KERN_INFO_VIO
1349 "%s: Currently %d disks connected\n",
1351 (int) viodasd_max_disk + 1);
1353 register_disk(&viodasd_gendsk,
1355 i << PARTITION_SHIFT),
1356 1 << PARTITION_SHIFT, &viodasd_fops,
1357 viodasd_partitions[i <<
1361 printk(KERN_INFO_VIO
1362 "%s: Disk %2.2d size %dM, sectors %d, heads %d, cylinders %d, sectsize %d\n",
1363 VIOD_DEVICE_NAME, i,
1364 (int) (viodasd_devices[i].size /
1366 (int) viodasd_devices[i].sectors,
1367 (int) viodasd_devices[i].tracks,
1368 (int) viodasd_devices[i].cylinders,
1369 (int) viodasd_sectsizes[i <<
1372 for (j = (i << PARTITION_SHIFT) + 1;
1373 j < ((i + 1) << PARTITION_SHIFT); j++) {
1374 if (viodasd_gendsk.part[j].nr_sects)
1375 printk(KERN_INFO_VIO
1376 "%s: Disk %2.2d partition %2.2d start sector %ld, # sector %ld\n",
1377 VIOD_DEVICE_NAME, i,
1378 j - (i << PARTITION_SHIFT),
1379 viodasd_gendsk.part[j].
1381 viodasd_gendsk.part[j].
1385 internal_release(i);
1390 * Create the proc entry
1392 iSeries_proc_callback(&viodasd_proc_init);
1398 void viodasd_exit(void)
1401 for (i = 0; i < numdsk << PARTITION_SHIFT; i++)
1402 fsync_dev(MKDEV(MAJOR_NR, i));
1404 blk_cleanup_queue(BLK_DEFAULT_QUEUE(MAJOR_NR));
1406 iSeries_proc_callback(&viodasd_proc_delete);
1413 module_init(viodasd_init);
1414 module_exit(viodasd_exit);