1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
4 * Author: Peter J. Braam <braam@clusterfs.com>
5 * Copyright (C) 1998 Stelias Computing Inc
6 * Copyright (C) 1999 Red Hat Inc.
8 * This file is part of InterMezzo, http://www.inter-mezzo.org.
10 * InterMezzo is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * InterMezzo is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with InterMezzo; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * This file implements basic routines supporting the semantics
25 #include <linux/types.h>
26 #include <linux/kernel.h>
27 #include <linux/sched.h>
29 #include <linux/stat.h>
30 #include <linux/errno.h>
31 #include <linux/vmalloc.h>
32 #include <linux/slab.h>
33 #include <linux/locks.h>
34 #include <asm/segment.h>
35 #include <asm/uaccess.h>
36 #include <linux/string.h>
37 #include <linux/smp_lock.h>
39 #include <linux/intermezzo_fs.h>
40 #include <linux/intermezzo_psdev.h>
42 int presto_walk(const char *name, struct nameidata *nd)
45 /* we do not follow symlinks to support symlink operations
46 correctly. The vfs should always hand us resolved dentries
47 so we should not be required to use LOOKUP_FOLLOW. At the
48 reintegrating end, lento again should be working with the
49 resolved pathname and not the symlink. SHP
50 XXX: This code implies that direct symlinks do not work. SHP
52 unsigned int flags = LOOKUP_POSITIVE;
56 if (path_init(name, flags, nd))
57 err = path_walk(name, nd);
62 /* find the presto minor device for this inode */
63 int presto_i2m(struct inode *inode)
65 struct presto_cache *cache;
67 cache = presto_get_cache(inode);
68 CDEBUG(D_PSDEV, "\n");
70 CERROR("PRESTO: BAD: cannot find cache for dev %d, ino %ld\n",
71 inode->i_dev, inode->i_ino);
76 return cache->cache_psdev->uc_minor;
79 inline int presto_f2m(struct presto_file_set *fset)
81 return fset->fset_cache->cache_psdev->uc_minor;
85 inline int presto_c2m(struct presto_cache *cache)
87 return cache->cache_psdev->uc_minor;
91 /* XXX check this out */
92 struct presto_file_set *presto_path2fileset(const char *name)
95 struct presto_file_set *fileset;
99 error = presto_walk(name, &nd);
102 error = do_revalidate(nd.dentry);
105 fileset = presto_fset(nd.dentry);
109 fileset = ERR_PTR(error);
115 /* check a flag on this dentry or fset root. Semantics:
116 - most flags: test if it is set
117 - PRESTO_ATTR, PRESTO_DATA return 1 if PRESTO_FSETINSYNC is set
119 int presto_chk(struct dentry *dentry, int flag)
122 struct presto_file_set *fset = presto_fset(dentry);
125 minor = presto_i2m(dentry->d_inode);
126 if ( izo_channels[minor].uc_no_filter ) {
131 /* if the fileset is in sync DATA and ATTR are OK */
133 (flag == PRESTO_ATTR || flag == PRESTO_DATA) &&
134 (fset->fset_flags & FSET_INSYNC) ) {
135 CDEBUG(D_INODE, "fset in sync (ino %ld)!\n",
136 fset->fset_dentry->d_inode->i_ino);
142 return (presto_d2d(dentry)->dd_flags & flag);
145 /* set a bit in the dentry flags */
146 void presto_set(struct dentry *dentry, int flag)
149 if ( dentry->d_inode ) {
150 CDEBUG(D_INODE, "SET ino %ld, flag %x\n",
151 dentry->d_inode->i_ino, flag);
153 if ( presto_d2d(dentry) == NULL) {
154 CERROR("dentry without d_fsdata in presto_set: %p: %*s", dentry,
155 dentry->d_name.len, dentry->d_name.name);
158 presto_d2d(dentry)->dd_flags |= flag;
162 /* given a path: complete the closes on the fset */
163 int lento_complete_closes(char *path)
166 struct dentry *dentry;
168 struct presto_file_set *fset;
171 error = presto_walk(path, &nd);
180 if ( !presto_ispresto(dentry->d_inode) ) {
185 fset = presto_fset(dentry);
188 CERROR("No fileset!\n");
193 /* transactions and locking are internal to this function */
194 error = presto_complete_lml(fset);
203 /* given a path: write a close record and cancel an LML record, finally
204 call truncate LML. Lento is doing this so it goes in with uid/gid's
207 int lento_cancel_lml(char *path,
210 __u32 remote_generation,
211 __u32 remote_version,
212 struct lento_vfs_context *info)
216 struct dentry *dentry;
218 struct presto_file_set *fset;
220 struct presto_version new_ver;
224 error = presto_walk(path, &nd);
232 if ( !presto_ispresto(dentry->d_inode) ) {
237 fset = presto_fset(dentry);
241 CERROR("No fileset!\n");
246 /* this only requires a transaction below which is automatic */
247 handle = presto_trans_start(fset, dentry->d_inode, PRESTO_OP_RELEASE);
248 if ( IS_ERR(handle) ) {
254 if (info->flags & LENTO_FL_CANCEL_LML) {
255 error = presto_clear_lml_close(fset, lml_offset);
257 presto_trans_commit(fset, handle);
264 if (info->flags & LENTO_FL_WRITE_KML) {
266 file.private_data = NULL;
267 file.f_dentry = dentry;
268 presto_getversion(&new_ver, dentry->d_inode);
269 error = presto_journal_close(&rec, fset, &file, dentry,
273 presto_trans_commit(fset, handle);
278 if (info->flags & LENTO_FL_WRITE_EXPECT) {
279 error = presto_write_last_rcvd(&rec, fset, info);
282 presto_trans_commit(fset, handle);
287 presto_trans_commit(fset, handle);
289 if (info->flags & LENTO_FL_CANCEL_LML) {
290 presto_truncate_lml(fset);
301 /* given a dentry, operate on the flags in its dentry. Used by downcalls */
302 int izo_mark_dentry(struct dentry *dentry, int and_flag, int or_flag,
307 if (presto_d2d(dentry) == NULL) {
308 CERROR("InterMezzo: no ddata for inode %ld in %s\n",
309 dentry->d_inode->i_ino, __FUNCTION__);
313 CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
314 dentry->d_inode->i_ino, and_flag, or_flag,
315 presto_d2d(dentry)->dd_flags);
317 presto_d2d(dentry)->dd_flags &= and_flag;
318 presto_d2d(dentry)->dd_flags |= or_flag;
320 *res = presto_d2d(dentry)->dd_flags;
325 /* given a path, operate on the flags in its cache. Used by mark_ioctl */
326 int izo_mark_cache(struct dentry *dentry, int and_flag, int or_flag,
329 struct presto_cache *cache;
331 if (presto_d2d(dentry) == NULL) {
332 CERROR("InterMezzo: no ddata for inode %ld in %s\n",
333 dentry->d_inode->i_ino, __FUNCTION__);
337 CDEBUG(D_INODE, "inode: %ld, and flag %x, or flag %x, dd_flags %x\n",
338 dentry->d_inode->i_ino, and_flag, or_flag,
339 presto_d2d(dentry)->dd_flags);
341 cache = presto_get_cache(dentry->d_inode);
343 CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
347 cache->cache_flags &= and_flag;
348 cache->cache_flags |= or_flag;
350 *res = (int)cache->cache_flags;
355 int presto_set_max_kml_size(const char *path, unsigned long max_size)
357 struct presto_file_set *fset;
361 fset = presto_path2fileset(path);
364 return PTR_ERR(fset);
367 fset->kml_truncate_size = max_size;
368 CDEBUG(D_CACHE, "KML truncate size set to %lu bytes for fset %s.\n",
375 int izo_mark_fset(struct dentry *dentry, int and_flag, int or_flag,
378 struct presto_file_set *fset;
380 fset = presto_fset(dentry);
382 CERROR("PRESTO: BAD: cannot find cache in izo_mark_cache\n");
383 make_bad_inode(dentry->d_inode);
386 fset->fset_flags &= and_flag;
387 fset->fset_flags |= or_flag;
389 *res = (int)fset->fset_flags;
394 /* talk to Lento about the permit */
395 static int presto_permit_upcall(struct dentry *dentry)
402 struct presto_file_set *fset = NULL;
406 if ( (minor = presto_i2m(dentry->d_inode)) < 0) {
411 fset = presto_fset(dentry);
417 if ( !presto_lento_up(minor) ) {
418 if ( fset->fset_flags & FSET_STEAL_PERMIT ) {
427 PRESTO_ALLOC(buffer, PAGE_SIZE);
429 CERROR("PRESTO: out of memory!\n");
433 path = presto_path(dentry, fset->fset_dentry, buffer, PAGE_SIZE);
434 pathlen = MYPATHLEN(buffer, path);
435 fsetnamelen = strlen(fset->fset_name);
436 rc = izo_upc_permit(minor, dentry, pathlen, path, fset->fset_name);
437 PRESTO_FREE(buffer, PAGE_SIZE);
442 /* get a write permit for the fileset of this inode
443 * - if this returns a negative value there was an error
444 * - if 0 is returned the permit was already in the kernel -- or --
445 * Lento gave us the permit without reintegration
446 * - lento returns the number of records it reintegrated
448 * Note that if this fileset has branches, a permit will -never- to a normal
449 * process for writing in the data area (ie, outside of .intermezzo)
451 int presto_get_permit(struct inode * inode)
454 struct presto_file_set *fset;
455 int minor = presto_i2m(inode);
464 if ( ISLENTO(minor) ) {
469 if (list_empty(&inode->i_dentry)) {
470 CERROR("No alias for inode %d\n", (int) inode->i_ino);
475 de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
477 if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
482 fset = presto_fset(de);
484 CERROR("Presto: no fileset in presto_get_permit!\n");
489 if (fset->fset_flags & FSET_HAS_BRANCHES) {
494 spin_lock(&fset->fset_permit_lock);
495 if (fset->fset_flags & FSET_HASPERMIT) {
496 fset->fset_permit_count++;
497 CDEBUG(D_INODE, "permit count now %d, inode %lx\n",
498 fset->fset_permit_count, inode->i_ino);
499 spin_unlock(&fset->fset_permit_lock);
504 /* Allow reintegration to proceed without locks -SHP */
505 fset->fset_permit_upcall_count++;
506 if (fset->fset_permit_upcall_count == 1) {
507 spin_unlock(&fset->fset_permit_lock);
508 rc = presto_permit_upcall(fset->fset_dentry);
509 spin_lock(&fset->fset_permit_lock);
510 fset->fset_permit_upcall_count--;
512 izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
514 fset->fset_permit_count++;
515 } else if (rc == ENOTCONN) {
516 CERROR("InterMezzo: disconnected operation. stealing permit.\n");
517 izo_mark_fset(fset->fset_dentry, ~0, FSET_HASPERMIT,
519 fset->fset_permit_count++;
520 /* set a disconnected flag here to stop upcalls */
523 CERROR("InterMezzo: presto_permit_upcall failed: %d\n", rc);
525 /* go to sleep here and try again? */
527 wake_up_interruptible(&fset->fset_permit_queue);
529 /* Someone is already doing an upcall; go to sleep. */
530 DECLARE_WAITQUEUE(wait, current);
532 spin_unlock(&fset->fset_permit_lock);
533 add_wait_queue(&fset->fset_permit_queue, &wait);
535 set_current_state(TASK_INTERRUPTIBLE);
537 spin_lock(&fset->fset_permit_lock);
538 if (fset->fset_permit_upcall_count == 0)
540 spin_unlock(&fset->fset_permit_lock);
542 if (signal_pending(current)) {
543 remove_wait_queue(&fset->fset_permit_queue,
549 remove_wait_queue(&fset->fset_permit_queue, &wait);
550 /* We've been woken up: do we have the permit? */
551 if (fset->fset_flags & FSET_HASPERMIT)
552 /* FIXME: Is this the right thing? */
556 CDEBUG(D_INODE, "permit count now %d, ino %ld (likely 1), "
557 "rc %d\n", fset->fset_permit_count, inode->i_ino, rc);
558 spin_unlock(&fset->fset_permit_lock);
563 int presto_put_permit(struct inode * inode)
566 struct presto_file_set *fset;
567 int minor = presto_i2m(inode);
575 if ( ISLENTO(minor) ) {
580 if (list_empty(&inode->i_dentry)) {
581 CERROR("No alias for inode %d\n", (int) inode->i_ino);
586 de = list_entry(inode->i_dentry.next, struct dentry, d_alias);
588 fset = presto_fset(de);
590 CERROR("InterMezzo: no fileset in %s!\n", __FUNCTION__);
595 if (presto_chk(de, PRESTO_DONT_JOURNAL)) {
600 spin_lock(&fset->fset_permit_lock);
601 if (fset->fset_flags & FSET_HASPERMIT) {
602 if (fset->fset_permit_count > 0)
603 fset->fset_permit_count--;
605 CERROR("Put permit while permit count is 0, "
606 "inode %ld!\n", inode->i_ino);
608 fset->fset_permit_count = 0;
609 CERROR("InterMezzo: put permit while no permit, inode %ld, "
610 "flags %x!\n", inode->i_ino, fset->fset_flags);
613 CDEBUG(D_INODE, "permit count now %d, inode %ld\n",
614 fset->fset_permit_count, inode->i_ino);
616 if (fset->fset_flags & FSET_PERMIT_WAITING &&
617 fset->fset_permit_count == 0) {
618 CDEBUG(D_INODE, "permit count now 0, ino %ld, wake sleepers\n",
620 wake_up_interruptible(&fset->fset_permit_queue);
622 spin_unlock(&fset->fset_permit_lock);
628 void presto_getversion(struct presto_version * presto_version,
629 struct inode * inode)
631 presto_version->pv_mtime = (__u64)inode->i_mtime;
632 presto_version->pv_ctime = (__u64)inode->i_ctime;
633 presto_version->pv_size = (__u64)inode->i_size;
637 /* If uuid is non-null, it is the uuid of the peer that's making the revocation
638 * request. If it is null, this request was made locally, without external
639 * pressure to give up the permit. This most often occurs when a client
642 * FIXME: this function needs to be refactored slightly once we start handling
645 int izo_revoke_permit(struct dentry *dentry, __u8 uuid[16])
647 struct presto_file_set *fset;
648 DECLARE_WAITQUEUE(wait, current);
653 minor = presto_i2m(dentry->d_inode);
659 fset = presto_fset(dentry);
665 spin_lock(&fset->fset_permit_lock);
666 if (fset->fset_flags & FSET_PERMIT_WAITING) {
667 CERROR("InterMezzo: Two processes are waiting on the same permit--this not yet supported! Aborting this particular permit request...\n");
669 spin_unlock(&fset->fset_permit_lock);
673 if (fset->fset_permit_count == 0)
676 /* Something is still using this permit. Mark that we're waiting for it
677 * and go to sleep. */
678 rc = izo_mark_fset(dentry, ~0, FSET_PERMIT_WAITING, NULL);
679 spin_unlock(&fset->fset_permit_lock);
685 add_wait_queue(&fset->fset_permit_queue, &wait);
687 set_current_state(TASK_INTERRUPTIBLE);
689 spin_lock(&fset->fset_permit_lock);
690 if (fset->fset_permit_count == 0)
692 spin_unlock(&fset->fset_permit_lock);
694 if (signal_pending(current)) {
695 /* FIXME: there must be a better thing to return... */
696 remove_wait_queue(&fset->fset_permit_queue, &wait);
701 /* FIXME: maybe there should be a timeout here. */
706 remove_wait_queue(&fset->fset_permit_queue, &wait);
708 /* By this point fset->fset_permit_count is zero and we're holding the
710 CDEBUG(D_CACHE, "InterMezzo: releasing permit inode %ld\n",
711 dentry->d_inode->i_ino);
714 rc = izo_upc_revoke_permit(minor, fset->fset_name, uuid);
716 spin_unlock(&fset->fset_permit_lock);
722 izo_mark_fset(fset->fset_dentry, ~FSET_PERMIT_WAITING, 0, NULL);
723 izo_mark_fset(fset->fset_dentry, ~FSET_HASPERMIT, 0, NULL);
724 spin_unlock(&fset->fset_permit_lock);
729 inline int presto_is_read_only(struct presto_file_set * fset)
732 struct presto_cache *cache = fset->fset_cache;
734 minor= cache->cache_psdev->uc_minor;
735 mask= (ISLENTO(minor)? FSET_LENTO_RO : FSET_CLIENT_RO);
736 if ( fset->fset_flags & mask )
738 mask= (ISLENTO(minor)? CACHE_LENTO_RO : CACHE_CLIENT_RO);
739 return ((cache->cache_flags & mask)? 1 : 0);