1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Copyright (C) 2000 Stelias Computing, Inc.
5 * Copyright (C) 2000 Red Hat, Inc.
6 * Copyright (C) 2000 TurboLinux, Inc.
7 * Copyright (C) 2000 Los Alamos National Laboratory.
8 * Copyright (C) 2000, 2001 Tacit Networks, Inc.
9 * Copyright (C) 2000 Peter J. Braam
10 * Copyright (C) 2001 Mountain View Data, Inc.
11 * Copyright (C) 2001 Cluster File Systems, Inc.
13 * This file is part of InterMezzo, http://www.inter-mezzo.org.
15 * InterMezzo is free software; you can redistribute it and/or
16 * modify it under the terms of version 2 of the GNU General Public
17 * License as published by the Free Software Foundation.
19 * InterMezzo is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with InterMezzo; if not, write to the Free Software
26 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
28 * This file manages file I/O
34 #include <asm/bitops.h>
35 #include <asm/uaccess.h>
36 #include <asm/system.h>
38 #include <linux/errno.h>
40 #include <linux/ext2_fs.h>
41 #include <linux/slab.h>
42 #include <linux/vmalloc.h>
43 #include <linux/sched.h>
44 #include <linux/stat.h>
45 #include <linux/string.h>
46 #include <linux/locks.h>
47 #include <linux/blkdev.h>
48 #include <linux/init.h>
49 #include <linux/smp_lock.h>
50 #define __NO_VERSION__
51 #include <linux/module.h>
53 #include <linux/intermezzo_fs.h>
54 #include <linux/intermezzo_psdev.h>
55 #include <linux/fsfilter.h>
57 * these are initialized in super.c
59 extern int presto_permission(struct inode *inode, int mask);
62 static int presto_open_upcall(int minor, struct dentry *de)
66 struct presto_file_set *fset;
68 struct lento_vfs_context info;
69 struct presto_dentry_data *dd = presto_d2d(de);
71 PRESTO_ALLOC(buffer, PAGE_SIZE);
73 CERROR("PRESTO: out of memory!\n");
76 fset = presto_fset(de);
77 path = presto_path(de, fset->fset_dentry, buffer, PAGE_SIZE);
78 pathlen = MYPATHLEN(buffer, path);
80 CDEBUG(D_FILE, "de %p, dd %p\n", de, dd);
81 if (dd->remote_ino == 0) {
82 rc = presto_get_fileid(minor, fset, de);
84 memset (&info, 0, sizeof(info));
85 if (dd->remote_ino > 0) {
86 info.remote_ino = dd->remote_ino;
87 info.remote_generation = dd->remote_generation;
89 CERROR("get_fileid failed %d, ino: %Lx, fetching by name\n", rc,
92 rc = izo_upc_open(minor, pathlen, path, fset->fset_name, &info);
93 PRESTO_FREE(buffer, PAGE_SIZE);
97 static inline int open_check_dod(struct file *file,
98 struct presto_file_set *fset)
100 int gen, is_iopen = 0, minor;
101 struct presto_cache *cache = fset->fset_cache;
104 minor = presto_c2m(cache);
106 if ( ISLENTO(minor) ) {
107 CDEBUG(D_CACHE, "is lento, not doing DOD.\n");
111 /* Files are only ever opened by inode during backfetches, when by
112 * definition we have the authoritative copy of the data. No DOD. */
113 is_iopen = izo_dentry_is_ilookup(file->f_dentry, &inum, &gen);
116 CDEBUG(D_CACHE, "doing iopen, not doing DOD.\n");
120 if (!(fset->fset_flags & FSET_DATA_ON_DEMAND)) {
121 CDEBUG(D_CACHE, "fileset not on demand.\n");
125 if (file->f_flags & O_TRUNC) {
126 CDEBUG(D_CACHE, "fileset dod: O_TRUNC.\n");
130 if (presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL)) {
131 CDEBUG(D_CACHE, "file under .intermezzo, not doing DOD\n");
135 if (presto_chk(file->f_dentry, PRESTO_DATA)) {
136 CDEBUG(D_CACHE, "PRESTO_DATA is set, not doing DOD.\n");
140 if (cache->cache_filter->o_trops->tr_all_data(file->f_dentry->d_inode)) {
141 CDEBUG(D_CACHE, "file not sparse, not doing DOD.\n");
148 static int presto_file_open(struct inode *inode, struct file *file)
151 struct file_operations *fops;
152 struct presto_cache *cache;
153 struct presto_file_set *fset;
154 struct presto_file_data *fdata;
155 int writable = (file->f_flags & (O_RDWR | O_WRONLY));
160 if (presto_prep(file->f_dentry, &cache, &fset) < 0) {
165 minor = presto_c2m(cache);
167 CDEBUG(D_CACHE, "DATA_OK: %d, ino: %ld, islento: %d\n",
168 presto_chk(file->f_dentry, PRESTO_DATA), inode->i_ino,
171 if ( !ISLENTO(minor) && (file->f_flags & O_RDWR ||
172 file->f_flags & O_WRONLY)) {
173 CDEBUG(D_CACHE, "calling presto_get_permit\n");
174 if ( presto_get_permit(inode) < 0 ) {
178 presto_put_permit(inode);
181 if (open_check_dod(file, fset)) {
182 CDEBUG(D_CACHE, "presto_open_upcall\n");
183 CDEBUG(D_CACHE, "dentry: %p setting DATA, ATTR\n", file->f_dentry);
184 presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
185 rc = presto_open_upcall(minor, file->f_dentry);
188 CERROR("%s: returning error %d\n", __FUNCTION__, rc);
194 /* file was truncated upon open: do not refetch */
195 if (file->f_flags & O_TRUNC) {
196 CDEBUG(D_CACHE, "setting DATA, ATTR\n");
197 presto_set(file->f_dentry, PRESTO_ATTR | PRESTO_DATA);
200 fops = filter_c2cffops(cache->cache_filter);
202 CDEBUG(D_CACHE, "calling fs open\n");
203 rc = fops->open(inode, file);
212 PRESTO_ALLOC(fdata, sizeof(*fdata));
217 /* LOCK: XXX check that the kernel lock protects this alloc */
218 fdata->fd_do_lml = 0;
219 fdata->fd_bytes_written = 0;
220 fdata->fd_fsuid = current->fsuid;
221 fdata->fd_fsgid = current->fsgid;
222 fdata->fd_mode = file->f_dentry->d_inode->i_mode;
223 fdata->fd_uid = file->f_dentry->d_inode->i_uid;
224 fdata->fd_gid = file->f_dentry->d_inode->i_gid;
225 fdata->fd_ngroups = current->ngroups;
226 for (i=0 ; i < current->ngroups ; i++)
227 fdata->fd_groups[i] = current->groups[i];
229 fdata->fd_info.flags = LENTO_FL_KML;
231 /* this is for the case of DOD,
232 reint_close will adjust flags if needed */
233 fdata->fd_info.flags = 0;
236 presto_getversion(&fdata->fd_version, inode);
237 file->private_data = fdata;
239 file->private_data = NULL;
246 int presto_adjust_lml(struct file *file, struct lento_vfs_context *info)
248 struct presto_file_data *fdata =
249 (struct presto_file_data *) file->private_data;
256 memcpy(&fdata->fd_info, info, sizeof(*info));
262 static int presto_file_release(struct inode *inode, struct file *file)
265 struct file_operations *fops;
266 struct presto_cache *cache;
267 struct presto_file_set *fset;
268 struct presto_file_data *fdata =
269 (struct presto_file_data *)file->private_data;
272 rc = presto_prep(file->f_dentry, &cache, &fset);
278 fops = filter_c2cffops(cache->cache_filter);
279 if (fops && fops->release)
280 rc = fops->release(inode, file);
282 CDEBUG(D_CACHE, "islento = %d (minor %d), rc %d, data %p\n",
283 ISLENTO(cache->cache_psdev->uc_minor),
284 cache->cache_psdev->uc_minor, rc, fdata);
286 /* this file was modified: ignore close errors, write KML */
287 if (fdata && fdata->fd_do_lml) {
288 /* XXX: remove when lento gets file granularity cd */
289 if ( presto_get_permit(inode) < 0 ) {
294 fdata->fd_info.updated_time = file->f_dentry->d_inode->i_mtime;
295 rc = presto_do_close(fset, file);
296 presto_put_permit(inode);
300 PRESTO_FREE(fdata, sizeof(*fdata));
301 file->private_data = NULL;
308 static void presto_apply_write_policy(struct file *file,
309 struct presto_file_set *fset, loff_t res)
311 struct presto_file_data *fdata =
312 (struct presto_file_data *)file->private_data;
313 struct presto_cache *cache = fset->fset_cache;
314 struct presto_version new_file_ver;
318 /* Here we do a journal close after a fixed or a specified
319 amount of KBytes, currently a global parameter set with
320 sysctl. If files are open for a long time, this gives added
321 protection. (XXX todo: per cache, add ioctl, handle
322 journaling in a thread, add more options etc.)
325 if ((fset->fset_flags & FSET_JCLOSE_ON_WRITE) &&
326 (!ISLENTO(cache->cache_psdev->uc_minor))) {
327 fdata->fd_bytes_written += res;
329 if (fdata->fd_bytes_written >= fset->fset_file_maxio) {
330 presto_getversion(&new_file_ver,
331 file->f_dentry->d_inode);
332 /* This is really heavy weight and should be fixed
333 ASAP. At most we should be recording the number
334 of bytes written and not locking the kernel,
335 wait for permits, etc, on the write path. SHP
338 if ( presto_get_permit(file->f_dentry->d_inode) < 0 ) {
340 /* we must be disconnected, not to worry */
344 error = presto_journal_close(&rec, fset, file,
348 presto_put_permit(file->f_dentry->d_inode);
351 CERROR("presto_close: cannot journal close\n");
352 /* XXX these errors are really bad */
356 fdata->fd_bytes_written = 0;
361 static ssize_t presto_file_write(struct file *file, const char *buf,
362 size_t size, loff_t *off)
366 struct presto_cache *cache;
367 struct presto_file_set *fset;
368 struct file_operations *fops;
372 unsigned long blocks;
373 struct presto_file_data *fdata;
376 error = presto_prep(file->f_dentry, &cache, &fset);
382 blocks = (size >> file->f_dentry->d_inode->i_sb->s_blocksize_bits) + 1;
383 /* XXX 3 is for ext2 indirect blocks ... */
384 res_size = 2 * PRESTO_REQHIGH + ((blocks+3)
385 << file->f_dentry->d_inode->i_sb->s_blocksize_bits);
387 error = presto_reserve_space(fset->fset_cache, res_size);
388 CDEBUG(D_INODE, "Reserved %Ld for %d\n", res_size, size);
394 CDEBUG(D_INODE, "islento %d, minor: %d\n",
395 ISLENTO(cache->cache_psdev->uc_minor),
396 cache->cache_psdev->uc_minor);
399 * XXX this lock should become a per inode lock when
400 * Vinny's changes are in; we could just use i_sem.
402 read_lock(&fset->fset_lml.fd_lock);
403 fdata = (struct presto_file_data *)file->private_data;
404 do_lml_here = size && (fdata->fd_do_lml == 0) &&
405 !presto_chk(file->f_dentry, PRESTO_DONT_JOURNAL);
408 fdata->fd_do_lml = 1;
409 read_unlock(&fset->fset_lml.fd_lock);
412 There might be a bug here. We need to make
413 absolutely sure that the ext3_file_write commits
414 after our transaction that writes the LML record.
415 Nesting the file write helps if new blocks are allocated.
419 struct presto_version file_version;
420 /* handle different space reqs from file system below! */
421 handle = presto_trans_start(fset, file->f_dentry->d_inode,
423 if ( IS_ERR(handle) ) {
424 presto_release_space(fset->fset_cache, res_size);
425 CERROR("presto_write: no space for transaction\n");
429 presto_getversion(&file_version, file->f_dentry->d_inode);
430 res = presto_write_lml_close(&rec, fset, file,
431 fdata->fd_info.remote_ino,
432 fdata->fd_info.remote_generation,
433 &fdata->fd_info.remote_version,
435 fdata->fd_lml_offset = rec.offset;
437 CERROR("intermezzo: PANIC failed to write LML\n");
442 presto_trans_commit(fset, handle);
445 fops = filter_c2cffops(cache->cache_filter);
446 res = fops->write(file, buf, size, off);
448 CDEBUG(D_FILE, "file write returns short write: size %d, res %d\n", size, res);
451 if ( (res > 0) && fdata )
452 presto_apply_write_policy(file, fset, res);
455 presto_release_space(fset->fset_cache, res_size);
459 struct file_operations presto_file_fops = {
460 .write = presto_file_write,
461 .open = presto_file_open,
462 .release = presto_file_release,
463 .ioctl = presto_ioctl
466 struct inode_operations presto_file_iops = {
467 .permission = presto_permission,
468 .setattr = presto_setattr,
469 #ifdef CONFIG_FS_EXT_ATTR
470 .set_ext_attr = presto_set_ext_attr,
474 /* FIXME: I bet we want to add a lock here and in presto_file_open. */
475 int izo_purge_file(struct presto_file_set *fset, char *file)
481 struct dentry *dentry;
485 /* FIXME: not mtpt it's gone */
486 len = strlen(fset->fset_cache->cache_mtpt) + strlen(file) + 1;
487 PRESTO_ALLOC(path, len + 1);
491 sprintf(path, "%s/%s", fset->fset_cache->cache_mtpt, file);
492 rc = izo_lookup_file(fset, path, &nd);
497 /* FIXME: take a lock here */
499 if (dentry->d_inode->i_atime > CURRENT_TIME - 5) {
500 /* We lost the race; this file was accessed while we were doing
501 * ioctls and lookups and whatnot. */
506 /* FIXME: Check if this file is open. */
508 handle = presto_trans_start(fset, dentry->d_inode, KML_OPCODE_TRUNC);
509 if (IS_ERR(handle)) {
514 /* FIXME: Write LML record */
516 oldsize = dentry->d_inode->i_size;
517 rc = izo_do_truncate(fset, dentry, 0, oldsize);
520 rc = izo_do_truncate(fset, dentry, oldsize, 0);
525 /* FIXME: clear LML record */
528 /* FIXME: release the lock here */
531 if (handle != NULL && !IS_ERR(handle))
532 presto_trans_commit(fset, handle);
534 PRESTO_FREE(path, len + 1);