X-Git-Url: http://git.rot13.org/?a=blobdiff_plain;f=net%2Fsunrpc%2Fsvcsock.c;h=22f61aee4824cea33b419529f62bb5eee599f5fd;hb=c4d3efafcc933fd2ffd169d7dc4f980393a13796;hp=ff1f8bf680aab43e741249912409b09013ba6fad;hpb=bcd8b54f7d9c980b945d3f1cc48f76f2fa6e2e1b;p=powerpc.git diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ff1f8bf680..22f61aee48 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -36,11 +36,13 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -58,10 +60,16 @@ * providing that certain rules are followed: * * SK_CONN, SK_DATA, can be set or cleared at any time. - * after a set, svc_sock_enqueue must be called. + * after a set, svc_sock_enqueue must be called. * after a clear, the socket must be read/accepted * if this succeeds, it must be set again. * SK_CLOSE can set at any time. It is never cleared. + * sk_inuse contains a bias of '1' until SK_DEAD is set. + * so when sk_inuse hits zero, we know the socket is dead + * and no-one is using it. + * SK_DEAD can only be set while SK_BUSY is held which ensures + * no other thread will be using the socket or will try to + * set SK_DEAD. * */ @@ -69,10 +77,12 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, - int *errp, int pmap_reg); + int *errp, int flags); +static void svc_delete_socket(struct svc_sock *svsk); static void svc_udp_data_ready(struct sock *, int); static int svc_udp_recvfrom(struct svc_rqst *); static int svc_udp_sendto(struct svc_rqst *); +static void svc_close_socket(struct svc_sock *svsk); static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk); static int svc_deferred_recv(struct svc_rqst *rqstp); @@ -114,6 +124,41 @@ static inline void svc_reclassify_socket(struct socket *sock) } #endif +static char *__svc_print_addr(struct sockaddr *addr, char *buf, size_t len) +{ + switch (addr->sa_family) { + case AF_INET: + snprintf(buf, len, "%u.%u.%u.%u, port=%u", + NIPQUAD(((struct sockaddr_in *) addr)->sin_addr), + htons(((struct sockaddr_in *) addr)->sin_port)); + break; + + case AF_INET6: + snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u", + NIP6(((struct sockaddr_in6 *) addr)->sin6_addr), + htons(((struct sockaddr_in6 *) addr)->sin6_port)); + break; + + default: + snprintf(buf, len, "unknown address type: %d", addr->sa_family); + break; + } + return buf; +} + +/** + * svc_print_addr - Format rq_addr field for printing + * @rqstp: svc_rqst struct containing address to print + * @buf: target buffer for formatted address + * @len: length of target buffer + * + */ +char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len) +{ + return __svc_print_addr(svc_addr(rqstp), buf, len); +} +EXPORT_SYMBOL_GPL(svc_print_addr); + /* * Queue up an idle server thread. Must have pool->sp_lock held. * Note: this is really a stack rather than a queue, so that we only @@ -245,7 +290,7 @@ svc_sock_enqueue(struct svc_sock *svsk) svsk->sk_sk, rqstp); svc_thread_dequeue(pool, rqstp); if (rqstp->rq_sock) - printk(KERN_ERR + printk(KERN_ERR "svc_sock_enqueue: server %p, rq_sock=%p!\n", rqstp, rqstp->rq_sock); rqstp->rq_sock = svsk; @@ -329,8 +374,9 @@ void svc_reserve(struct svc_rqst *rqstp, int space) static inline void svc_sock_put(struct svc_sock *svsk) { - if (atomic_dec_and_test(&svsk->sk_inuse) && - test_bit(SK_DEAD, &svsk->sk_flags)) { + if (atomic_dec_and_test(&svsk->sk_inuse)) { + BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags)); + dprintk("svc: releasing dead socket\n"); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); @@ -402,6 +448,42 @@ svc_wake_up(struct svc_serv *serv) } } +union svc_pktinfo_u { + struct in_pktinfo pkti; + struct in6_pktinfo pkti6; +}; +#define SVC_PKTINFO_SPACE \ + CMSG_SPACE(sizeof(union svc_pktinfo_u)) + +static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) +{ + switch (rqstp->rq_sock->sk_sk->sk_family) { + case AF_INET: { + struct in_pktinfo *pki = CMSG_DATA(cmh); + + cmh->cmsg_level = SOL_IP; + cmh->cmsg_type = IP_PKTINFO; + pki->ipi_ifindex = 0; + pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr; + cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); + } + break; + + case AF_INET6: { + struct in6_pktinfo *pki = CMSG_DATA(cmh); + + cmh->cmsg_level = SOL_IPV6; + cmh->cmsg_type = IPV6_PKTINFO; + pki->ipi6_ifindex = 0; + ipv6_addr_copy(&pki->ipi6_addr, + &rqstp->rq_daddr.addr6); + cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); + } + break; + } + return; +} + /* * Generic sendto routine */ @@ -411,9 +493,11 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) struct svc_sock *svsk = rqstp->rq_sock; struct socket *sock = svsk->sk_sock; int slen; - char buffer[CMSG_SPACE(sizeof(struct in_pktinfo))]; - struct cmsghdr *cmh = (struct cmsghdr *)buffer; - struct in_pktinfo *pki = (struct in_pktinfo *)CMSG_DATA(cmh); + union { + struct cmsghdr hdr; + long all[SVC_PKTINFO_SPACE / sizeof(long)]; + } buffer; + struct cmsghdr *cmh = &buffer.hdr; int len = 0; int result; int size; @@ -421,25 +505,20 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) size_t base = xdr->page_base; unsigned int pglen = xdr->page_len; unsigned int flags = MSG_MORE; + char buf[RPC_MAX_ADDRBUFLEN]; slen = xdr->len; if (rqstp->rq_prot == IPPROTO_UDP) { - /* set the source and destination */ - struct msghdr msg; - msg.msg_name = &rqstp->rq_addr; - msg.msg_namelen = sizeof(rqstp->rq_addr); - msg.msg_iov = NULL; - msg.msg_iovlen = 0; - msg.msg_flags = MSG_MORE; - - msg.msg_control = cmh; - msg.msg_controllen = sizeof(buffer); - cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); - cmh->cmsg_level = SOL_IP; - cmh->cmsg_type = IP_PKTINFO; - pki->ipi_ifindex = 0; - pki->ipi_spec_dst.s_addr = rqstp->rq_daddr; + struct msghdr msg = { + .msg_name = &rqstp->rq_addr, + .msg_namelen = rqstp->rq_addrlen, + .msg_control = cmh, + .msg_controllen = sizeof(buffer), + .msg_flags = MSG_MORE, + }; + + svc_set_cmsg_data(rqstp, cmh); if (sock_sendmsg(sock, &msg, 0) < 0) goto out; @@ -476,16 +555,16 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) if (xdr->tail[0].iov_len) { result = kernel_sendpage(sock, rqstp->rq_respages[0], ((unsigned long)xdr->tail[0].iov_base) - & (PAGE_SIZE-1), + & (PAGE_SIZE-1), xdr->tail[0].iov_len, 0); if (result > 0) len += result; } out: - dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %x)\n", - rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, xdr->len, len, - rqstp->rq_addr.sin_addr.s_addr); + dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", + rqstp->rq_sock, xdr->head[0].iov_base, xdr->head[0].iov_len, + xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); return len; } @@ -520,7 +599,7 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) if (!serv) return 0; - spin_lock(&serv->sv_lock); + spin_lock_bh(&serv->sv_lock); list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) { int onelen = one_sock_name(buf+len, svsk); if (toclose && strcmp(toclose, buf+len) == 0) @@ -528,12 +607,12 @@ svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) else len += onelen; } - spin_unlock(&serv->sv_lock); + spin_unlock_bh(&serv->sv_lock); if (closesk) /* Should unregister with portmap, but you cannot * unregister just one protocol... */ - svc_delete_socket(closesk); + svc_close_socket(closesk); else if (toclose) return -ENOENT; return len; @@ -560,31 +639,22 @@ svc_recv_available(struct svc_sock *svsk) static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) { - struct msghdr msg; - struct socket *sock; - int len, alen; - - rqstp->rq_addrlen = sizeof(rqstp->rq_addr); - sock = rqstp->rq_sock->sk_sock; - - msg.msg_name = &rqstp->rq_addr; - msg.msg_namelen = sizeof(rqstp->rq_addr); - msg.msg_control = NULL; - msg.msg_controllen = 0; - - msg.msg_flags = MSG_DONTWAIT; + struct svc_sock *svsk = rqstp->rq_sock; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT, + }; + int len; - len = kernel_recvmsg(sock, &msg, iov, nr, buflen, MSG_DONTWAIT); + len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, + msg.msg_flags); /* sock_recvmsg doesn't fill in the name/namelen, so we must.. - * possibly we should cache this in the svc_sock structure - * at accept time. FIXME */ - alen = sizeof(rqstp->rq_addr); - kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); + memcpy(&rqstp->rq_addr, &svsk->sk_remote, svsk->sk_remotelen); + rqstp->rq_addrlen = svsk->sk_remotelen; dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", - rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); + svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -654,6 +724,23 @@ svc_write_space(struct sock *sk) } } +static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp, + struct cmsghdr *cmh) +{ + switch (rqstp->rq_sock->sk_sk->sk_family) { + case AF_INET: { + struct in_pktinfo *pki = CMSG_DATA(cmh); + rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; + break; + } + case AF_INET6: { + struct in6_pktinfo *pki = CMSG_DATA(cmh); + ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); + break; + } + } +} + /* * Receive a datagram from a UDP socket. */ @@ -663,7 +750,18 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) struct svc_sock *svsk = rqstp->rq_sock; struct svc_serv *serv = svsk->sk_server; struct sk_buff *skb; + union { + struct cmsghdr hdr; + long all[SVC_PKTINFO_SPACE / sizeof(long)]; + } buffer; + struct cmsghdr *cmh = &buffer.hdr; int err, len; + struct msghdr msg = { + .msg_name = svc_addr(rqstp), + .msg_control = cmh, + .msg_controllen = sizeof(buffer), + .msg_flags = MSG_DONTWAIT, + }; if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags)) /* udp sockets need large rcvbuf as all pending @@ -683,8 +781,15 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) return svc_deferred_recv(rqstp); } + if (test_bit(SK_CLOSE, &svsk->sk_flags)) { + svc_delete_socket(svsk); + return 0; + } + clear_bit(SK_DATA, &svsk->sk_flags); - while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { + while ((err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, + 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 || + (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) { if (err == -EAGAIN) { svc_sock_received(svsk); return err; @@ -692,16 +797,13 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* possibly an icmp error */ dprintk("svc: recvfrom returned error %d\n", -err); } - if (skb->tstamp.off_sec == 0) { - struct timeval tv; - - tv.tv_sec = xtime.tv_sec; - tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; - skb_set_timestamp(skb, &tv); - /* Don't enable netstamp, sunrpc doesn't + rqstp->rq_addrlen = sizeof(rqstp->rq_addr); + if (skb->tstamp.tv64 == 0) { + skb->tstamp = ktime_get_real(); + /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); + svsk->sk_sk->sk_stamp = skb->tstamp; set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ /* @@ -712,13 +814,18 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) len = skb->len - sizeof(struct udphdr); rqstp->rq_arg.len = len; - rqstp->rq_prot = IPPROTO_UDP; + rqstp->rq_prot = IPPROTO_UDP; - /* Get sender address */ - rqstp->rq_addr.sin_family = AF_INET; - rqstp->rq_addr.sin_port = skb->h.uh->source; - rqstp->rq_addr.sin_addr.s_addr = skb->nh.iph->saddr; - rqstp->rq_daddr = skb->nh.iph->daddr; + if (cmh->cmsg_level != IPPROTO_IP || + cmh->cmsg_type != IP_PKTINFO) { + if (net_ratelimit()) + printk("rpcsvc: received unknown control message:" + "%d/%d\n", + cmh->cmsg_level, cmh->cmsg_type); + skb_free_datagram(svsk->sk_sk, skb); + return 0; + } + svc_udp_get_dest_address(rqstp, cmh); if (skb_is_nonlinear(skb)) { /* we have to copy */ @@ -730,7 +837,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) return 0; } local_bh_enable(); - skb_free_datagram(svsk->sk_sk, skb); + skb_free_datagram(svsk->sk_sk, skb); } else { /* we can use it in-place */ rqstp->rq_arg.head[0].iov_base = skb->data + sizeof(struct udphdr); @@ -775,13 +882,16 @@ svc_udp_sendto(struct svc_rqst *rqstp) static void svc_udp_init(struct svc_sock *svsk) { + int one = 1; + mm_segment_t oldfs; + svsk->sk_sk->sk_data_ready = svc_udp_data_ready; svsk->sk_sk->sk_write_space = svc_write_space; svsk->sk_recvfrom = svc_udp_recvfrom; svsk->sk_sendto = svc_udp_sendto; /* initialise setting must have enough space to - * receive and respond to one request. + * receive and respond to one request. * svc_udp_recvfrom will re-adjust if necessary */ svc_sock_setbufsize(svsk->sk_sock, @@ -790,6 +900,13 @@ svc_udp_init(struct svc_sock *svsk) set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */ set_bit(SK_CHNGBUF, &svsk->sk_flags); + + oldfs = get_fs(); + set_fs(KERNEL_DS); + /* make sure we get destination address info */ + svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO, + (char __user *)&one, sizeof(one)); + set_fs(oldfs); } /* @@ -862,18 +979,34 @@ svc_tcp_data_ready(struct sock *sk, int count) wake_up_interruptible(sk->sk_sleep); } +static inline int svc_port_is_privileged(struct sockaddr *sin) +{ + switch (sin->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sin)->sin_port) + < PROT_SOCK; + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sin)->sin6_port) + < PROT_SOCK; + default: + return 0; + } +} + /* * Accept a TCP connection */ static void svc_tcp_accept(struct svc_sock *svsk) { - struct sockaddr_in sin; + struct sockaddr_storage addr; + struct sockaddr *sin = (struct sockaddr *) &addr; struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; struct svc_sock *newsvsk; int err, slen; + char buf[RPC_MAX_ADDRBUFLEN]; dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) @@ -894,8 +1027,7 @@ svc_tcp_accept(struct svc_sock *svsk) set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); - slen = sizeof(sin); - err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); + err = kernel_getpeername(newsock, sin, &slen); if (err < 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: peername failed (err %d)!\n", @@ -904,27 +1036,30 @@ svc_tcp_accept(struct svc_sock *svsk) } /* Ideally, we would want to reject connections from unauthorized - * hosts here, but when we get encription, the IP of the host won't - * tell us anything. For now just warn about unpriv connections. + * hosts here, but when we get encryption, the IP of the host won't + * tell us anything. For now just warn about unpriv connections. */ - if (ntohs(sin.sin_port) >= 1024) { + if (!svc_port_is_privileged(sin)) { dprintk(KERN_WARNING - "%s: connect from unprivileged port: %u.%u.%u.%u:%d\n", - serv->sv_name, - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + "%s: connect from unprivileged port: %s\n", + serv->sv_name, + __svc_print_addr(sin, buf, sizeof(buf))); } - - dprintk("%s: connect from %u.%u.%u.%u:%04x\n", serv->sv_name, - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + dprintk("%s: connect from %s\n", serv->sv_name, + __svc_print_addr(sin, buf, sizeof(buf))); /* make sure that a write doesn't block forever when * low on memory */ newsock->sk->sk_sndtimeo = HZ*30; - if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 0))) + if (!(newsvsk = svc_setup_socket(serv, newsock, &err, + (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) goto failed; + memcpy(&newsvsk->sk_remote, sin, slen); + newsvsk->sk_remotelen = slen; + svc_sock_received(newsvsk); /* make sure that we don't have too many active connections. * If we have, something must be dropped. @@ -947,11 +1082,9 @@ svc_tcp_accept(struct svc_sock *svsk) "sockets, consider increasing the " "number of nfsd threads\n", serv->sv_name); - printk(KERN_NOTICE "%s: last TCP connect from " - "%u.%u.%u.%u:%d\n", - serv->sv_name, - NIPQUAD(sin.sin_addr.s_addr), - ntohs(sin.sin_port)); + printk(KERN_NOTICE + "%s: last TCP connect from %s\n", + serv->sv_name, buf); } /* * Always select the oldest socket. It's not fair, @@ -1025,7 +1158,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) * on the number of threads which will access the socket. * * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue + * Normally they will be removed from the queue * as soon a a complete request arrives. */ svc_sock_setbufsize(svsk->sk_sock, @@ -1050,7 +1183,7 @@ svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record length (%d of %lu)\n", - len, want); + len, want); svc_sock_received(svsk); return -EAGAIN; /* record header not complete */ } @@ -1176,7 +1309,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp) rqstp->rq_sock->sk_server->sv_name, (sent<0)?"got error":"sent only", sent, xbufp->len); - svc_delete_socket(rqstp->rq_sock); + set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags); + svc_sock_enqueue(rqstp->rq_sock); sent = -EAGAIN; } return sent; @@ -1207,7 +1341,7 @@ svc_tcp_init(struct svc_sock *svsk) tp->nonagle = 1; /* disable Nagle's algorithm */ /* initialise setting must have enough space to - * receive and respond to one request. + * receive and respond to one request. * svc_tcp_recvfrom will re-adjust if necessary */ svc_sock_setbufsize(svsk->sk_sock, @@ -1216,7 +1350,7 @@ svc_tcp_init(struct svc_sock *svsk) set_bit(SK_CHNGBUF, &svsk->sk_flags); set_bit(SK_DATA, &svsk->sk_flags); - if (sk->sk_state != TCP_ESTABLISHED) + if (sk->sk_state != TCP_ESTABLISHED) set_bit(SK_CLOSE, &svsk->sk_flags); } } @@ -1232,7 +1366,7 @@ svc_sock_update_bufs(struct svc_serv *serv) spin_lock_bh(&serv->sv_lock); list_for_each(le, &serv->sv_permsocks) { - struct svc_sock *svsk = + struct svc_sock *svsk = list_entry(le, struct svc_sock, sk_list); set_bit(SK_CHNGBUF, &svsk->sk_flags); } @@ -1252,7 +1386,7 @@ svc_sock_update_bufs(struct svc_serv *serv) int svc_recv(struct svc_rqst *rqstp, long timeout) { - struct svc_sock *svsk =NULL; + struct svc_sock *svsk = NULL; struct svc_serv *serv = rqstp->rq_server; struct svc_pool *pool = rqstp->rq_pool; int len, i; @@ -1264,11 +1398,11 @@ svc_recv(struct svc_rqst *rqstp, long timeout) rqstp, timeout); if (rqstp->rq_sock) - printk(KERN_ERR + printk(KERN_ERR "svc_recv: service %p, socket not NULL!\n", rqstp); if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR + printk(KERN_ERR "svc_recv: service %p, wait queue active!\n", rqstp); @@ -1349,7 +1483,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) svsk->sk_lastrecv = get_seconds(); clear_bit(SK_OLD, &svsk->sk_flags); - rqstp->rq_secure = ntohs(rqstp->rq_addr.sin_port) < 1024; + rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); rqstp->rq_chandle.defer = svc_defer; if (serv->sv_stats) @@ -1357,7 +1491,7 @@ svc_recv(struct svc_rqst *rqstp, long timeout) return len; } -/* +/* * Drop request */ void @@ -1462,12 +1596,14 @@ svc_age_temp_sockets(unsigned long closure) * Initialize socket for RPC use and create svc_sock struct * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. */ -static struct svc_sock * -svc_setup_socket(struct svc_serv *serv, struct socket *sock, - int *errp, int pmap_register) +static struct svc_sock *svc_setup_socket(struct svc_serv *serv, + struct socket *sock, + int *errp, int flags) { struct svc_sock *svsk; struct sock *inet; + int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); + int is_temporary = flags & SVC_SOCK_TEMPORARY; dprintk("svc: svc_setup_socket %p\n", sock); if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { @@ -1495,7 +1631,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; svsk->sk_server = serv; - atomic_set(&svsk->sk_inuse, 0); + atomic_set(&svsk->sk_inuse, 1); svsk->sk_lastrecv = get_seconds(); spin_lock_init(&svsk->sk_defer_lock); INIT_LIST_HEAD(&svsk->sk_deferred); @@ -1509,7 +1645,7 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, svc_tcp_init(svsk); spin_lock_bh(&serv->sv_lock); - if (!pmap_register) { + if (is_temporary) { set_bit(SK_TEMP, &svsk->sk_flags); list_add(&svsk->sk_list, &serv->sv_tempsocks); serv->sv_tmpcnt++; @@ -1529,8 +1665,6 @@ svc_setup_socket(struct svc_serv *serv, struct socket *sock, dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); - clear_bit(SK_BUSY, &svsk->sk_flags); - svc_sock_enqueue(svsk); return svsk; } @@ -1553,9 +1687,11 @@ int svc_addsock(struct svc_serv *serv, else if (so->state > SS_UNCONNECTED) err = -EISCONN; else { - svsk = svc_setup_socket(serv, so, &err, 1); - if (svsk) + svsk = svc_setup_socket(serv, so, &err, SVC_SOCK_DEFAULTS); + if (svsk) { + svc_sock_received(svsk); err = 0; + } } if (err) { sockfd_put(so); @@ -1569,18 +1705,18 @@ EXPORT_SYMBOL_GPL(svc_addsock); /* * Create socket for RPC service. */ -static int -svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) +static int svc_create_socket(struct svc_serv *serv, int protocol, + struct sockaddr *sin, int len, int flags) { struct svc_sock *svsk; struct socket *sock; int error; int type; + char buf[RPC_MAX_ADDRBUFLEN]; - dprintk("svc: svc_create_socket(%s, %d, %u.%u.%u.%u:%d)\n", - serv->sv_program->pg_name, protocol, - NIPQUAD(sin->sin_addr.s_addr), - ntohs(sin->sin_port)); + dprintk("svc: svc_create_socket(%s, %d, %s)\n", + serv->sv_program->pg_name, protocol, + __svc_print_addr(sin, buf, sizeof(buf))); if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { printk(KERN_WARNING "svc: only UDP and TCP " @@ -1589,15 +1725,15 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) } type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; - if ((error = sock_create_kern(PF_INET, type, protocol, &sock)) < 0) + error = sock_create_kern(sin->sa_family, type, protocol, &sock); + if (error < 0) return error; svc_reclassify_socket(sock); if (type == SOCK_STREAM) - sock->sk->sk_reuse = 1; /* allow address reuse */ - error = kernel_bind(sock, (struct sockaddr *) sin, - sizeof(*sin)); + sock->sk->sk_reuse = 1; /* allow address reuse */ + error = kernel_bind(sock, sin, len); if (error < 0) goto bummer; @@ -1606,8 +1742,10 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) goto bummer; } - if ((svsk = svc_setup_socket(serv, sock, &error, 1)) != NULL) - return 0; + if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { + svc_sock_received(svsk); + return ntohs(inet_sk(svsk->sk_sk)->sport); + } bummer: dprintk("svc: svc_create_socket error = %d\n", -error); @@ -1618,7 +1756,7 @@ bummer: /* * Remove a dead socket */ -void +static void svc_delete_socket(struct svc_sock *svsk) { struct svc_serv *serv; @@ -1637,43 +1775,73 @@ svc_delete_socket(struct svc_sock *svsk) if (!test_and_set_bit(SK_DETACHED, &svsk->sk_flags)) list_del_init(&svsk->sk_list); - /* + /* * We used to delete the svc_sock from whichever list * it's sk_ready node was on, but we don't actually * need to. This is because the only time we're called * while still attached to a queue, the queue itself * is about to be destroyed (in svc_destroy). */ - if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) + if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) { + BUG_ON(atomic_read(&svsk->sk_inuse)<2); + atomic_dec(&svsk->sk_inuse); if (test_bit(SK_TEMP, &svsk->sk_flags)) serv->sv_tmpcnt--; + } - /* This atomic_inc should be needed - svc_delete_socket - * should have the semantic of dropping a reference. - * But it doesn't yet.... - */ - atomic_inc(&svsk->sk_inuse); spin_unlock_bh(&serv->sv_lock); +} + +static void svc_close_socket(struct svc_sock *svsk) +{ + set_bit(SK_CLOSE, &svsk->sk_flags); + if (test_and_set_bit(SK_BUSY, &svsk->sk_flags)) + /* someone else will have to effect the close */ + return; + + atomic_inc(&svsk->sk_inuse); + svc_delete_socket(svsk); + clear_bit(SK_BUSY, &svsk->sk_flags); svc_sock_put(svsk); } -/* - * Make a socket for nfsd and lockd +void svc_force_close_socket(struct svc_sock *svsk) +{ + set_bit(SK_CLOSE, &svsk->sk_flags); + if (test_bit(SK_BUSY, &svsk->sk_flags)) { + /* Waiting to be processed, but no threads left, + * So just remove it from the waiting list + */ + list_del_init(&svsk->sk_ready); + clear_bit(SK_BUSY, &svsk->sk_flags); + } + svc_close_socket(svsk); +} + +/** + * svc_makesock - Make a socket for nfsd and lockd + * @serv: RPC server structure + * @protocol: transport protocol to use + * @port: port to use + * @flags: requested socket characteristics + * */ -int -svc_makesock(struct svc_serv *serv, int protocol, unsigned short port) +int svc_makesock(struct svc_serv *serv, int protocol, unsigned short port, + int flags) { - struct sockaddr_in sin; + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = INADDR_ANY, + .sin_port = htons(port), + }; dprintk("svc: creating socket proto = %d\n", protocol); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = INADDR_ANY; - sin.sin_port = htons(port); - return svc_create_socket(serv, protocol, &sin); + return svc_create_socket(serv, protocol, (struct sockaddr *) &sin, + sizeof(sin), flags); } /* - * Handle defer and revisit of requests + * Handle defer and revisit of requests */ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) @@ -1718,7 +1886,8 @@ svc_defer(struct cache_req *req) dr->handle.owner = rqstp->rq_server; dr->prot = rqstp->rq_prot; - dr->addr = rqstp->rq_addr; + memcpy(&dr->addr, &rqstp->rq_addr, rqstp->rq_addrlen); + dr->addrlen = rqstp->rq_addrlen; dr->daddr = rqstp->rq_daddr; dr->argslen = rqstp->rq_arg.len >> 2; memcpy(dr->args, rqstp->rq_arg.head[0].iov_base-skip, dr->argslen<<2); @@ -1742,7 +1911,8 @@ static int svc_deferred_recv(struct svc_rqst *rqstp) rqstp->rq_arg.page_len = 0; rqstp->rq_arg.len = dr->argslen<<2; rqstp->rq_prot = dr->prot; - rqstp->rq_addr = dr->addr; + memcpy(&rqstp->rq_addr, &dr->addr, dr->addrlen); + rqstp->rq_addrlen = dr->addrlen; rqstp->rq_daddr = dr->daddr; rqstp->rq_respages = rqstp->rq_pages; return dr->argslen<<2; @@ -1752,7 +1922,7 @@ static int svc_deferred_recv(struct svc_rqst *rqstp) static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk) { struct svc_deferred_req *dr = NULL; - + if (!test_bit(SK_DEFERRED, &svsk->sk_flags)) return NULL; spin_lock_bh(&svsk->sk_defer_lock);