#include <asm/ioctls.h>
#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/stats.h>
* providing that certain rules are followed:
*
* SK_CONN, SK_DATA, can be set or cleared at any time.
- * after a set, svc_sock_enqueue must be called.
+ * after a set, svc_sock_enqueue must be called.
* after a clear, the socket must be read/accepted
* if this succeeds, it must be set again.
* SK_CLOSE can set at any time. It is never cleared.
+ * sk_inuse contains a bias of '1' until SK_DEAD is set.
+ * so when sk_inuse hits zero, we know the socket is dead
+ * and no-one is using it.
+ * SK_DEAD can only be set while SK_BUSY is held which ensures
+ * no other thread will be using the socket or will try to
+ * set SK_DEAD.
*
*/
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
- int *errp, int pmap_reg);
+ int *errp, int flags);
+static void svc_delete_socket(struct svc_sock *svsk);
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
}
#endif
+static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len)
+{
+ switch (addr->sa_family) {
+ case AF_INET:
+ snprintf(buf, len, "%u.%u.%u.%u, port=%u",
+ NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
+ htons(((struct sockaddr_in *) addr)->sin_port));
+ break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ case AF_INET6:
+ snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
+ NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
+ htons(((struct sockaddr_in6 *) addr)->sin6_port));
+ break;
+#endif
+ default:
+ snprintf(buf, len, "unknown address type: %d", addr->sa_family);
+ break;
+ }
+ return buf;
+}
+
+/**
+ * svc_print_addr - Format rq_addr field for printing
+ * @rqstp: svc_rqst struct containing address to print
+ * @buf: target buffer for formatted address
+ * @len: length of target buffer
+ *
+ */
+char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
+{
+ return __svc_print_addr((struct sockaddr *) &rqstp->rq_addr, buf, len);
+}
+EXPORT_SYMBOL_GPL(svc_print_addr);
+
/*
* Queue up an idle server thread. Must have pool->sp_lock held.
* Note: this is really a stack rather than a queue, so that we only
svsk->sk_sk, rqstp);
svc_thread_dequeue(pool, rqstp);
if (rqstp->rq_sock)
- printk(KERN_ERR
+ printk(KERN_ERR
"svc_sock_enqueue: server %p, rq_sock=%p!\n",
rqstp, rqstp->rq_sock);
rqstp->rq_sock = svsk;
static inline void
svc_sock_put(struct svc_sock *svsk)
{
- if (atomic_dec_and_test(&svsk->sk_inuse) &&
- test_bit(SK_DEAD, &svsk->sk_flags)) {
+ if (atomic_dec_and_test(&svsk->sk_inuse)) {
+ BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
+
dprintk("svc: releasing dead socket\n");
if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
size_t base = xdr->page_base;
unsigned int pglen = xdr->page_len;
unsigned int flags = MSG_MORE;
+ char buf[RPC_MAX_ADDRBUFLEN];
slen = xdr->len;
if (xdr->tail[0].iov_len) {
result = kernel_sendpage(sock, rqstp->rq_respages[0],
((unsigned long)xdr->tail[0].iov_base)
- & (PAGE_SIZE-1),
+ & (PAGE_SIZE-1),
xdr->tail[0].iov_len, 0);
if (result > 0)
len += result;
}
out:
- dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n",
- rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len,
- rqstp->rq_addr.sin_addr.s_addr);
+ dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n",
+ rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len,
+ xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf)));
return len;
}
if (!serv)
return 0;
- spin_lock(&serv->sv_lock);
+ spin_lock_bh(&serv->sv_lock);
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
int onelen = one_sock_name(buf+len, svsk);
if (toclose && strcmp(toclose, buf+len) == 0)
else
len += onelen;
}
- spin_unlock(&serv->sv_lock);
+ spin_unlock_bh(&serv->sv_lock);
if (closesk)
/* Should unregister with portmap, but you cannot
* unregister just one protocol...
*/
- svc_delete_socket(closesk);
+ svc_close_socket(closesk);
else if (toclose)
return -ENOENT;
return len;
static int
svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen)
{
- struct msghdr msg;
- struct socket *sock;
- int len, alen;
-
- rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
- sock = rqstp->rq_sock->sk_sock;
-
- msg.msg_name = &rqstp->rq_addr;
- msg.msg_namelen = sizeof(rqstp->rq_addr);
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
-
- msg.msg_flags = MSG_DONTWAIT;
+ struct svc_sock *svsk = rqstp->rq_sock;
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT,
+ };
+ int len;
- len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT);
+ len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
+ msg.msg_flags);
/* sock_recvmsg doesn't fill in the name/namelen, so we must..
- * possibly we should cache this in the svc_sock structure
- * at accept time. FIXME
*/
- alen = sizeof(rqstp->rq_addr);
- kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen);
+ memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen);
+ rqstp->rq_addrlen = svsk->sk_remotelen;
dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
- rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len);
+ svsk, iov[0].iov_base, iov[0].iov_len, len);
return len;
}
return svc_deferred_recv(rqstp);
}
+ if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+ svc_delete_socket(svsk);
+ return 0;
+ }
+
clear_bit(SK_DATA, &svsk->sk_flags);
while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
if (err == -EAGAIN) {
tv.tv_sec = xtime.tv_sec;
tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
skb_set_timestamp(skb, &tv);
- /* Don't enable netstamp, sunrpc doesn't
+ /* Don't enable netstamp, sunrpc doesn't
need that much accuracy */
}
skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
return 0;
}
local_bh_enable();
- skb_free_datagram(svsk->sk_sk, skb);
+ skb_free_datagram(svsk->sk_sk, skb);
} else {
/* we can use it in-place */
rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr);
svsk->sk_sendto = svc_udp_sendto;
/* initialise setting must have enough space to
- * receive and respond to one request.
+ * receive and respond to one request.
* svc_udp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
struct socket *newsock;
struct svc_sock *newsvsk;
int err, slen;
+ char buf[RPC_MAX_ADDRBUFLEN];
dprintk("svc: tcp_accept %p sock %p\n", svsk, sock);
if (!sock)
}
/* Ideally, we would want to reject connections from unauthorized
- * hosts here, but when we get encription, the IP of the host won't
- * tell us anything. For now just warn about unpriv connections.
+ * hosts here, but when we get encryption, the IP of the host won't
+ * tell us anything. For now just warn about unpriv connections.
*/
if (ntohs(sin.sin_port) >= 1024) {
dprintk(KERN_WARNING
- "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n",
- serv->sv_name,
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ "%s: connect from unprivileged port: %s\n",
+ serv->sv_name,
+ __svc_print_addr((struct sockaddr *) &sin, buf,
+ sizeof(buf)));
}
-
- dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name,
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ dprintk("%s: connect from %s\n", serv->sv_name,
+ __svc_print_addr((struct sockaddr *) &sin, buf,
+ sizeof(buf)));
/* make sure that a write doesn't block forever when
* low on memory
*/
newsock->sk->sk_sndtimeo = HZ*30;
- if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0)))
+ if (!(newsvsk = svc_setup_socket(serv, newsock, &err,
+ (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY))))
goto failed;
+ memcpy(&newsvsk->sk_remote, &sin, slen);
+ newsvsk->sk_remotelen = slen;
+ svc_sock_received(newsvsk);
/* make sure that we don't have too many active connections.
* If we have, something must be dropped.
"sockets, consider increasing the "
"number of nfsd threads\n",
serv->sv_name);
- printk(KERN_NOTICE "%s: last TCP connect from "
- "%u.%u.%u.%u:%d\n",
- serv->sv_name,
- NIPQUAD(sin.sin_addr.s_addr),
- ntohs(sin.sin_port));
+ printk(KERN_NOTICE
+ "%s: last TCP connect from %s\n",
+ serv->sv_name, buf);
}
/*
* Always select the oldest socket. It's not fair,
* on the number of threads which will access the socket.
*
* rcvbuf just needs to be able to hold a few requests.
- * Normally they will be removed from the queue
+ * Normally they will be removed from the queue
* as soon a a complete request arrives.
*/
svc_sock_setbufsize(svsk->sk_sock,
if (len < want) {
dprintk("svc: short recvfrom while reading record length (%d of %lu)\n",
- len, want);
+ len, want);
svc_sock_received(svsk);
return -EAGAIN; /* record header not complete */
}
* bit set in the fragment length header.
* But apparently no known nfs clients send fragmented
* records. */
- printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (non-terminal)\n",
- (unsigned long) svsk->sk_reclen);
+ if (net_ratelimit())
+ printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx"
+ " (non-terminal)\n",
+ (unsigned long) svsk->sk_reclen);
goto err_delete;
}
svsk->sk_reclen &= 0x7fffffff;
dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen);
if (svsk->sk_reclen > serv->sv_max_mesg) {
- printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx (large)\n",
- (unsigned long) svsk->sk_reclen);
+ if (net_ratelimit())
+ printk(KERN_NOTICE "RPC: bad TCP reclen 0x%08lx"
+ " (large)\n",
+ (unsigned long) svsk->sk_reclen);
goto err_delete;
}
}
rqstp->rq_sock->sk_server->sv_name,
(sent<0)?"got error":"sent only",
sent, xbufp->len);
- svc_delete_socket(rqstp->rq_sock);
+ set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags);
+ svc_sock_enqueue(rqstp->rq_sock);
sent = -EAGAIN;
}
return sent;
tp->nonagle = 1; /* disable Nagle's algorithm */
/* initialise setting must have enough space to
- * receive and respond to one request.
+ * receive and respond to one request.
* svc_tcp_recvfrom will re-adjust if necessary
*/
svc_sock_setbufsize(svsk->sk_sock,
set_bit(SK_CHNGBUF, &svsk->sk_flags);
set_bit(SK_DATA, &svsk->sk_flags);
- if (sk->sk_state != TCP_ESTABLISHED)
+ if (sk->sk_state != TCP_ESTABLISHED)
set_bit(SK_CLOSE, &svsk->sk_flags);
}
}
spin_lock_bh(&serv->sv_lock);
list_for_each(le, &serv->sv_permsocks) {
- struct svc_sock *svsk =
+ struct svc_sock *svsk =
list_entry(le, struct svc_sock, sk_list);
set_bit(SK_CHNGBUF, &svsk->sk_flags);
}
rqstp, timeout);
if (rqstp->rq_sock)
- printk(KERN_ERR
+ printk(KERN_ERR
"svc_recv: service %p, socket not NULL!\n",
rqstp);
if (waitqueue_active(&rqstp->rq_wait))
- printk(KERN_ERR
+ printk(KERN_ERR
"svc_recv: service %p, wait queue active!\n",
rqstp);
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
rqstp->rq_pages[i] = p;
}
+ rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
+ BUG_ON(pages >= RPCSVC_MAXPAGES);
/* Make arg->head point to first page and arg->pages point to rest */
arg = &rqstp->rq_arg;
return len;
}
-/*
+/*
* Drop request
*/
void
* Initialize socket for RPC use and create svc_sock struct
* XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF.
*/
-static struct svc_sock *
-svc_setup_socket(struct svc_serv *serv, struct socket *sock,
- int *errp, int pmap_register)
+static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
+ struct socket *sock,
+ int *errp, int flags)
{
struct svc_sock *svsk;
struct sock *inet;
+ int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
+ int is_temporary = flags & SVC_SOCK_TEMPORARY;
dprintk("svc: svc_setup_socket %p\n", sock);
if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
svsk->sk_server = serv;
- atomic_set(&svsk->sk_inuse, 0);
+ atomic_set(&svsk->sk_inuse, 1);
svsk->sk_lastrecv = get_seconds();
spin_lock_init(&svsk->sk_defer_lock);
INIT_LIST_HEAD(&svsk->sk_deferred);
svc_tcp_init(svsk);
spin_lock_bh(&serv->sv_lock);
- if (!pmap_register) {
+ if (is_temporary) {
set_bit(SK_TEMP, &svsk->sk_flags);
list_add(&svsk->sk_list, &serv->sv_tempsocks);
serv->sv_tmpcnt++;
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
- clear_bit(SK_BUSY, &svsk->sk_flags);
- svc_sock_enqueue(svsk);
return svsk;
}
else if (so->state > SS_UNCONNECTED)
err = -EISCONN;
else {
- svsk = svc_setup_socket(serv, so, &err, 1);
- if (svsk)
+ svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS);
+ if (svsk) {
+ svc_sock_received(svsk);
err = 0;
+ }
}
if (err) {
sockfd_put(so);
/*
* Create socket for RPC service.
*/
-static int
-svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin)
+static int svc_create_socket(struct svc_serv *serv, int protocol,
+ struct sockaddr_in *sin, int flags)
{
struct svc_sock *svsk;
struct socket *sock;
int error;
int type;
+ char buf[RPC_MAX_ADDRBUFLEN];
- dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n",
- serv->sv_program->pg_name, protocol,
- NIPQUAD(sin->sin_addr.s_addr),
- ntohs(sin->sin_port));
+ dprintk("svc: svc_create_socket(%s, %d, %s)\n",
+ serv->sv_program->pg_name, protocol,
+ __svc_print_addr((struct sockaddr *) sin, buf,
+ sizeof(buf)));
if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) {
printk(KERN_WARNING "svc: only UDP and TCP "
goto bummer;
}
- if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL)
- return 0;
+ if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) {
+ svc_sock_received(svsk);
+ return ntohs(inet_sk(svsk->sk_sk)->sport);
+ }
bummer:
dprintk("svc: svc_create_socket error = %d\n", -error);
/*
* Remove a dead socket
*/
-void
+static void
svc_delete_socket(struct svc_sock *svsk)
{
struct svc_serv *serv;
if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags))
list_del_init(&svsk->sk_list);
- /*
+ /*
* We used to delete the svc_sock from whichever list
* it's sk_ready node was on, but we don't actually
* need to. This is because the only time we're called
* while still attached to a queue, the queue itself
* is about to be destroyed (in svc_destroy).
*/
- if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
+ if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
+ BUG_ON(atomic_read(&svsk->sk_inuse)<2);
+ atomic_dec(&svsk->sk_inuse);
if (test_bit(SK_TEMP, &svsk->sk_flags))
serv->sv_tmpcnt--;
+ }
- /* This atomic_inc should be needed - svc_delete_socket
- * should have the semantic of dropping a reference.
- * But it doesn't yet....
- */
- atomic_inc(&svsk->sk_inuse);
spin_unlock_bh(&serv->sv_lock);
+}
+
+void svc_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
+ /* someone else will have to effect the close */
+ return;
+
+ atomic_inc(&svsk->sk_inuse);
+ svc_delete_socket(svsk);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
svc_sock_put(svsk);
}
-/*
- * Make a socket for nfsd and lockd
+/**
+ * svc_makesock - Make a socket for nfsd and lockd
+ * @serv: RPC server structure
+ * @protocol: transport protocol to use
+ * @port: port to use
+ * @flags: requested socket characteristics
+ *
*/
-int
-svc_makesock(struct svc_serv *serv, int protocol, unsigned short port)
+int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port,
+ int flags)
{
- struct sockaddr_in sin;
+ struct sockaddr_in sin = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = INADDR_ANY,
+ .sin_port = htons(port),
+ };
dprintk("svc: creating socket proto = %d\n", protocol);
- sin.sin_family = AF_INET;
- sin.sin_addr.s_addr = INADDR_ANY;
- sin.sin_port = htons(port);
- return svc_create_socket(serv, protocol, &sin);
+ return svc_create_socket(serv, protocol, &sin, flags);
}
/*
- * Handle defer and revisit of requests
+ * Handle defer and revisit of requests
*/
static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
dr->handle.owner = rqstp->rq_server;
dr->prot = rqstp->rq_prot;
- dr->addr = rqstp->rq_addr;
+ memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen);
+ dr->addrlen = rqstp->rq_addrlen;
dr->daddr = rqstp->rq_daddr;
dr->argslen = rqstp->rq_arg.len >> 2;
memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2);
rqstp->rq_arg.page_len = 0;
rqstp->rq_arg.len = dr->argslen<<2;
rqstp->rq_prot = dr->prot;
- rqstp->rq_addr = dr->addr;
+ memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen);
+ rqstp->rq_addrlen = dr->addrlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
return dr->argslen<<2;
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk)
{
struct svc_deferred_req *dr = NULL;
-
+
if (!test_bit(SK_DEFERRED, &svsk->sk_flags))
return NULL;
spin_lock_bh(&svsk->sk_defer_lock);