about summary refs log tree commit diff
path: root/src/network
diff options
context:
space:
mode:
Diffstat (limited to 'src/network')
-rw-r--r--src/network/accept4.c4
-rw-r--r--src/network/dns_parse.c11
-rw-r--r--src/network/gai_strerror.c2
-rw-r--r--src/network/getaddrinfo.c7
-rw-r--r--src/network/gethostbyaddr.c2
-rw-r--r--src/network/gethostbyaddr_r.c5
-rw-r--r--src/network/gethostbyname2.c2
-rw-r--r--src/network/gethostbyname2_r.c6
-rw-r--r--src/network/getifaddrs.c14
-rw-r--r--src/network/getnameinfo.c9
-rw-r--r--src/network/getservbyport_r.c4
-rw-r--r--src/network/inet_pton.c1
-rw-r--r--src/network/lookup.h2
-rw-r--r--src/network/lookup_ipliteral.c4
-rw-r--r--src/network/lookup_name.c47
-rw-r--r--src/network/res_mkquery.c1
-rw-r--r--src/network/res_msend.c183
-rw-r--r--src/network/res_send.c10
-rw-r--r--src/network/sendmsg.c7
19 files changed, 248 insertions, 73 deletions
diff --git a/src/network/accept4.c b/src/network/accept4.c
index 59ab1726..765a38ed 100644
--- a/src/network/accept4.c
+++ b/src/network/accept4.c
@@ -9,6 +9,10 @@ int accept4(int fd, struct sockaddr *restrict addr, socklen_t *restrict len, int
 	if (!flg) return accept(fd, addr, len);
 	int ret = socketcall_cp(accept4, fd, addr, len, flg, 0, 0);
 	if (ret>=0 || (errno != ENOSYS && errno != EINVAL)) return ret;
+	if (flg & ~(SOCK_CLOEXEC|SOCK_NONBLOCK)) {
+		errno = EINVAL;
+		return -1;
+	}
 	ret = accept(fd, addr, len);
 	if (ret<0) return ret;
 	if (flg & SOCK_CLOEXEC)
diff --git a/src/network/dns_parse.c b/src/network/dns_parse.c
index e6ee19d9..09813112 100644
--- a/src/network/dns_parse.c
+++ b/src/network/dns_parse.c
@@ -1,7 +1,7 @@
 #include <string.h>
 #include "lookup.h"
 
-int __dns_parse(const unsigned char *r, int rlen, int (*callback)(void *, int, const void *, int, const void *), void *ctx)
+int __dns_parse(const unsigned char *r, int rlen, int (*callback)(void *, int, const void *, int, const void *, int), void *ctx)
 {
 	int qdcount, ancount;
 	const unsigned char *p;
@@ -12,21 +12,20 @@ int __dns_parse(const unsigned char *r, int rlen, int (*callback)(void *, int, c
 	p = r+12;
 	qdcount = r[4]*256 + r[5];
 	ancount = r[6]*256 + r[7];
-	if (qdcount+ancount > 64) return -1;
 	while (qdcount--) {
 		while (p-r < rlen && *p-1U < 127) p++;
-		if (*p>193 || (*p==193 && p[1]>254) || p>r+rlen-6)
+		if (p>r+rlen-6)
 			return -1;
 		p += 5 + !!*p;
 	}
 	while (ancount--) {
 		while (p-r < rlen && *p-1U < 127) p++;
-		if (*p>193 || (*p==193 && p[1]>254) || p>r+rlen-6)
+		if (p>r+rlen-12)
 			return -1;
 		p += 1 + !!*p;
 		len = p[8]*256 + p[9];
-		if (p+len > r+rlen) return -1;
-		if (callback(ctx, p[1], p+10, len, r) < 0) return -1;
+		if (len+10 > r+rlen-p) return -1;
+		if (callback(ctx, p[1], p+10, len, r, rlen) < 0) return -1;
 		p += 10 + len;
 	}
 	return 0;
diff --git a/src/network/gai_strerror.c b/src/network/gai_strerror.c
index 9596580e..56b71503 100644
--- a/src/network/gai_strerror.c
+++ b/src/network/gai_strerror.c
@@ -6,7 +6,7 @@ static const char msgs[] =
 	"Name does not resolve\0"
 	"Try again\0"
 	"Non-recoverable error\0"
-	"Unknown error\0"
+	"Name has no usable address\0"
 	"Unrecognized address family or invalid length\0"
 	"Unrecognized socket type\0"
 	"Unrecognized service\0"
diff --git a/src/network/getaddrinfo.c b/src/network/getaddrinfo.c
index efaab306..64ad259a 100644
--- a/src/network/getaddrinfo.c
+++ b/src/network/getaddrinfo.c
@@ -16,6 +16,7 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 	char canon[256], *outcanon;
 	int nservs, naddrs, nais, canon_len, i, j, k;
 	int family = AF_UNSPEC, flags = 0, proto = 0, socktype = 0;
+	int no_family = 0;
 	struct aibuf *out;
 
 	if (!host && !serv) return EAI_NONAME;
@@ -66,9 +67,11 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 				pthread_setcancelstate(
 					PTHREAD_CANCEL_DISABLE, &cs);
 				int r = connect(s, ta[i], tl[i]);
+				int saved_errno = errno;
 				pthread_setcancelstate(cs, 0);
 				close(s);
 				if (!r) continue;
+				errno = saved_errno;
 			}
 			switch (errno) {
 			case EADDRNOTAVAIL:
@@ -80,7 +83,7 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 			default:
 				return EAI_SYSTEM;
 			}
-			if (family == tf[i]) return EAI_NONAME;
+			if (family == tf[i]) no_family = 1;
 			family = tf[1-i];
 		}
 	}
@@ -91,6 +94,8 @@ int getaddrinfo(const char *restrict host, const char *restrict serv, const stru
 	naddrs = __lookup_name(addrs, canon, host, family, flags);
 	if (naddrs < 0) return naddrs;
 
+	if (no_family) return EAI_NODATA;
+
 	nais = nservs * naddrs;
 	canon_len = strlen(canon);
 	out = calloc(1, nais * sizeof(*out) + canon_len + 1);
diff --git a/src/network/gethostbyaddr.c b/src/network/gethostbyaddr.c
index 598e2241..c3cacaac 100644
--- a/src/network/gethostbyaddr.c
+++ b/src/network/gethostbyaddr.c
@@ -20,5 +20,5 @@ struct hostent *gethostbyaddr(const void *a, socklen_t l, int af)
 		err = gethostbyaddr_r(a, l, af, h,
 			(void *)(h+1), size-sizeof *h, &res, &h_errno);
 	} while (err == ERANGE);
-	return err ? 0 : h;
+	return res;
 }
diff --git a/src/network/gethostbyaddr_r.c b/src/network/gethostbyaddr_r.c
index 0f1e61aa..ceaf3935 100644
--- a/src/network/gethostbyaddr_r.c
+++ b/src/network/gethostbyaddr_r.c
@@ -54,10 +54,11 @@ int gethostbyaddr_r(const void *a, socklen_t l, int af,
 	case EAI_OVERFLOW:
 		return ERANGE;
 	default:
-	case EAI_MEMORY:
-	case EAI_SYSTEM:
 	case EAI_FAIL:
 		*err = NO_RECOVERY;
+		return EBADMSG;
+	case EAI_SYSTEM:
+		*err = NO_RECOVERY;
 		return errno;
 	case 0:
 		break;
diff --git a/src/network/gethostbyname2.c b/src/network/gethostbyname2.c
index dc9d6621..bd0da7f8 100644
--- a/src/network/gethostbyname2.c
+++ b/src/network/gethostbyname2.c
@@ -21,5 +21,5 @@ struct hostent *gethostbyname2(const char *name, int af)
 		err = gethostbyname2_r(name, af, h,
 			(void *)(h+1), size-sizeof *h, &res, &h_errno);
 	} while (err == ERANGE);
-	return err ? 0 : h;
+	return res;
 }
diff --git a/src/network/gethostbyname2_r.c b/src/network/gethostbyname2_r.c
index fc894877..a5eb67fe 100644
--- a/src/network/gethostbyname2_r.c
+++ b/src/network/gethostbyname2_r.c
@@ -22,7 +22,10 @@ int gethostbyname2_r(const char *name, int af,
 	if (cnt<0) switch (cnt) {
 	case EAI_NONAME:
 		*err = HOST_NOT_FOUND;
-		return ENOENT;
+		return 0;
+	case EAI_NODATA:
+		*err = NO_DATA;
+		return 0;
 	case EAI_AGAIN:
 		*err = TRY_AGAIN;
 		return EAGAIN;
@@ -30,7 +33,6 @@ int gethostbyname2_r(const char *name, int af,
 	case EAI_FAIL:
 		*err = NO_RECOVERY;
 		return EBADMSG;
-	case EAI_MEMORY:
 	case EAI_SYSTEM:
 		*err = NO_RECOVERY;
 		return errno;
diff --git a/src/network/getifaddrs.c b/src/network/getifaddrs.c
index fed75bd8..74df4d6c 100644
--- a/src/network/getifaddrs.c
+++ b/src/network/getifaddrs.c
@@ -39,8 +39,8 @@ struct ifaddrs_storage {
 };
 
 struct ifaddrs_ctx {
-	struct ifaddrs_storage *first;
-	struct ifaddrs_storage *last;
+	struct ifaddrs *first;
+	struct ifaddrs *last;
 	struct ifaddrs_storage *hash[IFADDRS_HASH_SIZE];
 };
 
@@ -195,9 +195,9 @@ static int netlink_msg_to_ifaddr(void *pctx, struct nlmsghdr *h)
 	}
 
 	if (ifs->ifa.ifa_name) {
-		if (!ctx->first) ctx->first = ifs;
-		if (ctx->last) ctx->last->ifa.ifa_next = &ifs->ifa;
-		ctx->last = ifs;
+		if (!ctx->first) ctx->first = &ifs->ifa;
+		if (ctx->last) ctx->last->ifa_next = &ifs->ifa;
+		ctx->last = &ifs->ifa;
 	} else {
 		free(ifs);
 	}
@@ -210,7 +210,7 @@ int getifaddrs(struct ifaddrs **ifap)
 	int r;
 	memset(ctx, 0, sizeof *ctx);
 	r = __rtnetlink_enumerate(AF_UNSPEC, AF_UNSPEC, netlink_msg_to_ifaddr, ctx);
-	if (r == 0) *ifap = &ctx->first->ifa;
-	else freeifaddrs(&ctx->first->ifa);
+	if (r == 0) *ifap = ctx->first;
+	else freeifaddrs(ctx->first);
 	return r;
 }
diff --git a/src/network/getnameinfo.c b/src/network/getnameinfo.c
index 949e1811..133c15b3 100644
--- a/src/network/getnameinfo.c
+++ b/src/network/getnameinfo.c
@@ -58,6 +58,7 @@ static void reverse_hosts(char *buf, const unsigned char *a, unsigned scopeid, i
 		if ((p=strchr(line, '#'))) *p++='\n', *p=0;
 
 		for (p=line; *p && !isspace(*p); p++);
+		if (!*p) continue;
 		*p++ = 0;
 		if (__lookup_ipliteral(&iplit, line, AF_UNSPEC)<=0)
 			continue;
@@ -108,10 +109,10 @@ static void reverse_services(char *buf, int port, int dgram)
 	__fclose_ca(f);
 }
 
-static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet)
+static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet, int plen)
 {
 	if (rr != RR_PTR) return 0;
-	if (__dn_expand(packet, (const unsigned char *)packet + 512,
+	if (__dn_expand(packet, (const unsigned char *)packet + plen,
 	    data, c, 256) <= 0)
 		*(char *)c = 0;
 	return 0;
@@ -161,8 +162,10 @@ int getnameinfo(const struct sockaddr *restrict sa, socklen_t sl,
 			query[3] = 0; /* don't need AD flag */
 			int rlen = __res_send(query, qlen, reply, sizeof reply);
 			buf[0] = 0;
-			if (rlen > 0)
+			if (rlen > 0) {
+				if (rlen > sizeof reply) rlen = sizeof reply;
 				__dns_parse(reply, rlen, dns_parse_callback, buf);
+			}
 		}
 		if (!*buf) {
 			if (flags & NI_NAMEREQD) return EAI_NONAME;
diff --git a/src/network/getservbyport_r.c b/src/network/getservbyport_r.c
index b7f21c6b..e4cc3079 100644
--- a/src/network/getservbyport_r.c
+++ b/src/network/getservbyport_r.c
@@ -26,7 +26,7 @@ int getservbyport_r(int port, const char *prots,
 	/* Align buffer */
 	i = (uintptr_t)buf & sizeof(char *)-1;
 	if (!i) i = sizeof(char *);
-	if (buflen < 3*sizeof(char *)-i)
+	if (buflen <= 3*sizeof(char *)-i)
 		return ERANGE;
 	buf += sizeof(char *)-i;
 	buflen -= sizeof(char *)-i;
@@ -46,6 +46,8 @@ int getservbyport_r(int port, const char *prots,
 	case EAI_MEMORY:
 	case EAI_SYSTEM:
 		return ENOMEM;
+	case EAI_OVERFLOW:
+		return ERANGE;
 	default:
 		return ENOENT;
 	case 0:
diff --git a/src/network/inet_pton.c b/src/network/inet_pton.c
index d36c3689..bcbdd9ef 100644
--- a/src/network/inet_pton.c
+++ b/src/network/inet_pton.c
@@ -54,6 +54,7 @@ int inet_pton(int af, const char *restrict s, void *restrict a0)
 			if (s[j]!='.' || (i<6 && brk<0)) return 0;
 			need_v4=1;
 			i++;
+			ip[i&7]=0;
 			break;
 		}
 		s += j+1;
diff --git a/src/network/lookup.h b/src/network/lookup.h
index ef662725..54b2f8b5 100644
--- a/src/network/lookup.h
+++ b/src/network/lookup.h
@@ -50,6 +50,6 @@ hidden int __lookup_ipliteral(struct address buf[static 1], const char *name, in
 hidden int __get_resolv_conf(struct resolvconf *, char *, size_t);
 hidden int __res_msend_rc(int, const unsigned char *const *, const int *, unsigned char *const *, int *, int, const struct resolvconf *);
 
-hidden int __dns_parse(const unsigned char *, int, int (*)(void *, int, const void *, int, const void *), void *);
+hidden int __dns_parse(const unsigned char *, int, int (*)(void *, int, const void *, int, const void *, int), void *);
 
 #endif
diff --git a/src/network/lookup_ipliteral.c b/src/network/lookup_ipliteral.c
index 2fddab73..1e766206 100644
--- a/src/network/lookup_ipliteral.c
+++ b/src/network/lookup_ipliteral.c
@@ -15,7 +15,7 @@ int __lookup_ipliteral(struct address buf[static 1], const char *name, int famil
 	struct in6_addr a6;
 	if (__inet_aton(name, &a4) > 0) {
 		if (family == AF_INET6) /* wrong family */
-			return EAI_NONAME;
+			return EAI_NODATA;
 		memcpy(&buf[0].addr, &a4, sizeof a4);
 		buf[0].family = AF_INET;
 		buf[0].scopeid = 0;
@@ -34,7 +34,7 @@ int __lookup_ipliteral(struct address buf[static 1], const char *name, int famil
 	if (inet_pton(AF_INET6, name, &a6) <= 0)
 		return 0;
 	if (family == AF_INET) /* wrong family */
-		return EAI_NONAME;
+		return EAI_NODATA;
 
 	memcpy(&buf[0].addr, &a6, sizeof a6);
 	buf[0].family = AF_INET6;
diff --git a/src/network/lookup_name.c b/src/network/lookup_name.c
index b5232ce8..35218185 100644
--- a/src/network/lookup_name.c
+++ b/src/network/lookup_name.c
@@ -79,7 +79,7 @@ static int name_from_hosts(struct address buf[static MAXADDRS], char canon[stati
 		case 0:
 			continue;
 		default:
-			badfam = EAI_NONAME;
+			badfam = EAI_NODATA;
 			break;
 		}
 
@@ -102,45 +102,50 @@ struct dpc_ctx {
 	struct address *addrs;
 	char *canon;
 	int cnt;
+	int rrtype;
 };
 
 #define RR_A 1
 #define RR_CNAME 5
 #define RR_AAAA 28
 
-static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet)
+#define ABUF_SIZE 4800
+
+static int dns_parse_callback(void *c, int rr, const void *data, int len, const void *packet, int plen)
 {
 	char tmp[256];
+	int family;
 	struct dpc_ctx *ctx = c;
-	if (ctx->cnt >= MAXADDRS) return -1;
+	if (rr == RR_CNAME) {
+		if (__dn_expand(packet, (const unsigned char *)packet + plen,
+		    data, tmp, sizeof tmp) > 0 && is_valid_hostname(tmp))
+			strcpy(ctx->canon, tmp);
+		return 0;
+	}
+	if (ctx->cnt >= MAXADDRS) return 0;
+	if (rr != ctx->rrtype) return 0;
 	switch (rr) {
 	case RR_A:
 		if (len != 4) return -1;
-		ctx->addrs[ctx->cnt].family = AF_INET;
-		ctx->addrs[ctx->cnt].scopeid = 0;
-		memcpy(ctx->addrs[ctx->cnt++].addr, data, 4);
+		family = AF_INET;
 		break;
 	case RR_AAAA:
 		if (len != 16) return -1;
-		ctx->addrs[ctx->cnt].family = AF_INET6;
-		ctx->addrs[ctx->cnt].scopeid = 0;
-		memcpy(ctx->addrs[ctx->cnt++].addr, data, 16);
-		break;
-	case RR_CNAME:
-		if (__dn_expand(packet, (const unsigned char *)packet + 512,
-		    data, tmp, sizeof tmp) > 0 && is_valid_hostname(tmp))
-			strcpy(ctx->canon, tmp);
+		family = AF_INET6;
 		break;
 	}
+	ctx->addrs[ctx->cnt].family = family;
+	ctx->addrs[ctx->cnt].scopeid = 0;
+	memcpy(ctx->addrs[ctx->cnt++].addr, data, len);
 	return 0;
 }
 
 static int name_from_dns(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family, const struct resolvconf *conf)
 {
-	unsigned char qbuf[2][280], abuf[2][512];
+	unsigned char qbuf[2][280], abuf[2][ABUF_SIZE];
 	const unsigned char *qp[2] = { qbuf[0], qbuf[1] };
 	unsigned char *ap[2] = { abuf[0], abuf[1] };
-	int qlens[2], alens[2];
+	int qlens[2], alens[2], qtypes[2];
 	int i, nq = 0;
 	struct dpc_ctx ctx = { .addrs = buf, .canon = canon };
 	static const struct { int af; int rr; } afrr[2] = {
@@ -153,7 +158,8 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static
 			qlens[nq] = __res_mkquery(0, name, 1, afrr[i].rr,
 				0, 0, 0, qbuf[nq], sizeof *qbuf);
 			if (qlens[nq] == -1)
-				return EAI_NONAME;
+				return 0;
+			qtypes[nq] = afrr[i].rr;
 			qbuf[nq][3] = 0; /* don't need AD flag */
 			/* Ensure query IDs are distinct. */
 			if (nq && qbuf[nq][0] == qbuf[0][0])
@@ -171,11 +177,14 @@ static int name_from_dns(struct address buf[static MAXADDRS], char canon[static
 		if ((abuf[i][3] & 15) != 0) return EAI_FAIL;
 	}
 
-	for (i=0; i<nq; i++)
+	for (i=nq-1; i>=0; i--) {
+		ctx.rrtype = qtypes[i];
+		if (alens[i] > sizeof(abuf[i])) alens[i] = sizeof abuf[i];
 		__dns_parse(abuf[i], alens[i], dns_parse_callback, &ctx);
+	}
 
 	if (ctx.cnt) return ctx.cnt;
-	return EAI_NONAME;
+	return EAI_NODATA;
 }
 
 static int name_from_dns_search(struct address buf[static MAXADDRS], char canon[static 256], const char *name, int family)
diff --git a/src/network/res_mkquery.c b/src/network/res_mkquery.c
index 33f50cb9..614bf786 100644
--- a/src/network/res_mkquery.c
+++ b/src/network/res_mkquery.c
@@ -13,6 +13,7 @@ int __res_mkquery(int op, const char *dname, int class, int type,
 	int n;
 
 	if (l && dname[l-1]=='.') l--;
+	if (l && dname[l-1]=='.') return -1;
 	n = 17+l+!!l;
 	if (l>253 || buflen<n || op>15u || class>255u || type>255u)
 		return -1;
diff --git a/src/network/res_msend.c b/src/network/res_msend.c
index 3e018009..86c2fcf4 100644
--- a/src/network/res_msend.c
+++ b/src/network/res_msend.c
@@ -1,5 +1,6 @@
 #include <sys/socket.h>
 #include <netinet/in.h>
+#include <netinet/tcp.h>
 #include <netdb.h>
 #include <arpa/inet.h>
 #include <stdint.h>
@@ -16,17 +17,65 @@
 
 static void cleanup(void *p)
 {
-	__syscall(SYS_close, (intptr_t)p);
+	struct pollfd *pfd = p;
+	for (int i=0; pfd[i].fd >= -1; i++)
+		if (pfd[i].fd >= 0) __syscall(SYS_close, pfd[i].fd);
 }
 
 static unsigned long mtime()
 {
 	struct timespec ts;
-	clock_gettime(CLOCK_REALTIME, &ts);
+	if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0 && errno == ENOSYS)
+		clock_gettime(CLOCK_REALTIME, &ts);
 	return (unsigned long)ts.tv_sec * 1000
 		+ ts.tv_nsec / 1000000;
 }
 
+static int start_tcp(struct pollfd *pfd, int family, const void *sa, socklen_t sl, const unsigned char *q, int ql)
+{
+	struct msghdr mh = {
+		.msg_name = (void *)sa,
+		.msg_namelen = sl,
+		.msg_iovlen = 2,
+		.msg_iov = (struct iovec [2]){
+			{ .iov_base = (uint8_t[]){ ql>>8, ql }, .iov_len = 2 },
+			{ .iov_base = (void *)q, .iov_len = ql } }
+	};
+	int r;
+	int fd = socket(family, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
+	pfd->fd = fd;
+	pfd->events = POLLOUT;
+	if (!setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN_CONNECT,
+	    &(int){1}, sizeof(int))) {
+		r = sendmsg(fd, &mh, MSG_FASTOPEN|MSG_NOSIGNAL);
+		if (r == ql+2) pfd->events = POLLIN;
+		if (r >= 0) return r;
+		if (errno == EINPROGRESS) return 0;
+	}
+	r = connect(fd, sa, sl);
+	if (!r || errno == EINPROGRESS) return 0;
+	close(fd);
+	pfd->fd = -1;
+	return -1;
+}
+
+static void step_mh(struct msghdr *mh, size_t n)
+{
+	/* Adjust iovec in msghdr to skip first n bytes. */
+	while (mh->msg_iovlen && n >= mh->msg_iov->iov_len) {
+		n -= mh->msg_iov->iov_len;
+		mh->msg_iov++;
+		mh->msg_iovlen--;
+	}
+	if (!mh->msg_iovlen) return;
+	mh->msg_iov->iov_base = (char *)mh->msg_iov->iov_base + n;
+	mh->msg_iov->iov_len -= n;
+}
+
+/* Internal contract for __res_msend[_rc]: asize must be >=512, nqueries
+ * must be sufficiently small to be safe as VLA size. In practice it's
+ * either 1 or 2, anyway. */
+
 int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 	const int *qlens, unsigned char *const *answers, int *alens, int asize,
 	const struct resolvconf *conf)
@@ -44,7 +93,10 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 	int next;
 	int i, j;
 	int cs;
-	struct pollfd pfd;
+	struct pollfd pfd[nqueries+2];
+	int qpos[nqueries], apos[nqueries];
+	unsigned char alen_buf[nqueries][2];
+	int r;
 	unsigned long t0, t1, t2;
 
 	pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cs);
@@ -68,29 +120,22 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 	}
 
 	/* Get local address and open/bind a socket */
-	sa.sin.sin_family = family;
 	fd = socket(family, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
 
 	/* Handle case where system lacks IPv6 support */
 	if (fd < 0 && family == AF_INET6 && errno == EAFNOSUPPORT) {
+		for (i=0; i<nns && conf->ns[nns].family == AF_INET6; i++);
+		if (i==nns) {
+			pthread_setcancelstate(cs, 0);
+			return -1;
+		}
 		fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
 		family = AF_INET;
+		sl = sizeof sa.sin;
 	}
-	if (fd < 0 || bind(fd, (void *)&sa, sl) < 0) {
-		if (fd >= 0) close(fd);
-		pthread_setcancelstate(cs, 0);
-		return -1;
-	}
-
-	/* Past this point, there are no errors. Each individual query will
-	 * yield either no reply (indicated by zero length) or an answer
-	 * packet which is up to the caller to interpret. */
-
-	pthread_cleanup_push(cleanup, (void *)(intptr_t)fd);
-	pthread_setcancelstate(cs, 0);
 
 	/* Convert any IPv4 addresses in a mixed environment to v4-mapped */
-	if (family == AF_INET6) {
+	if (fd >= 0 && family == AF_INET6) {
 		setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &(int){0}, sizeof 0);
 		for (i=0; i<nns; i++) {
 			if (ns[i].sin.sin_family != AF_INET) continue;
@@ -104,16 +149,38 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 		}
 	}
 
+	sa.sin.sin_family = family;
+	if (fd < 0 || bind(fd, (void *)&sa, sl) < 0) {
+		if (fd >= 0) close(fd);
+		pthread_setcancelstate(cs, 0);
+		return -1;
+	}
+
+	/* Past this point, there are no errors. Each individual query will
+	 * yield either no reply (indicated by zero length) or an answer
+	 * packet which is up to the caller to interpret. */
+
+	for (i=0; i<nqueries; i++) pfd[i].fd = -1;
+	pfd[nqueries].fd = fd;
+	pfd[nqueries].events = POLLIN;
+	pfd[nqueries+1].fd = -2;
+
+	pthread_cleanup_push(cleanup, pfd);
+	pthread_setcancelstate(cs, 0);
+
 	memset(alens, 0, sizeof *alens * nqueries);
 
-	pfd.fd = fd;
-	pfd.events = POLLIN;
 	retry_interval = timeout / attempts;
 	next = 0;
 	t0 = t2 = mtime();
 	t1 = t2 - retry_interval;
 
 	for (; t2-t0 < timeout; t2=mtime()) {
+		/* This is the loop exit condition: that all queries
+		 * have an accepted answer. */
+		for (i=0; i<nqueries && alens[i]>0; i++);
+		if (i==nqueries) break;
+
 		if (t2-t1 >= retry_interval) {
 			/* Query all configured namservers in parallel */
 			for (i=0; i<nqueries; i++)
@@ -127,10 +194,20 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 		}
 
 		/* Wait for a response, or until time to retry */
-		if (poll(&pfd, 1, t1+retry_interval-t2) <= 0) continue;
+		if (poll(pfd, nqueries+1, t1+retry_interval-t2) <= 0) continue;
 
-		while ((rlen = recvfrom(fd, answers[next], asize, 0,
-		  (void *)&sa, (socklen_t[1]){sl})) >= 0) {
+		while (next < nqueries) {
+			struct msghdr mh = {
+				.msg_name = (void *)&sa,
+				.msg_namelen = sl,
+				.msg_iovlen = 1,
+				.msg_iov = (struct iovec []){
+					{ .iov_base = (void *)answers[next],
+					  .iov_len = asize }
+				}
+			};
+			rlen = recvmsg(fd, &mh, 0);
+			if (rlen < 0) break;
 
 			/* Ignore non-identifiable packets */
 			if (rlen < 4) continue;
@@ -170,12 +247,72 @@ int __res_msend_rc(int nqueries, const unsigned char *const *queries,
 			else
 				memcpy(answers[i], answers[next], rlen);
 
-			if (next == nqueries) goto out;
+			/* Ignore further UDP if all slots full or TCP-mode */
+			if (next == nqueries) pfd[nqueries].events = 0;
+
+			/* If answer is truncated (TC bit), fallback to TCP */
+			if ((answers[i][2] & 2) || (mh.msg_flags & MSG_TRUNC)) {
+				alens[i] = -1;
+				pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, 0);
+				r = start_tcp(pfd+i, family, ns+j, sl, queries[i], qlens[i]);
+				pthread_setcancelstate(cs, 0);
+				if (r >= 0) {
+					qpos[i] = r;
+					apos[i] = 0;
+				}
+				continue;
+			}
+		}
+
+		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLOUT) {
+			struct msghdr mh = {
+				.msg_iovlen = 2,
+				.msg_iov = (struct iovec [2]){
+					{ .iov_base = (uint8_t[]){ qlens[i]>>8, qlens[i] }, .iov_len = 2 },
+					{ .iov_base = (void *)queries[i], .iov_len = qlens[i] } }
+			};
+			step_mh(&mh, qpos[i]);
+			r = sendmsg(pfd[i].fd, &mh, MSG_NOSIGNAL);
+			if (r < 0) goto out;
+			qpos[i] += r;
+			if (qpos[i] == qlens[i]+2)
+				pfd[i].events = POLLIN;
+		}
+
+		for (i=0; i<nqueries; i++) if (pfd[i].revents & POLLIN) {
+			struct msghdr mh = {
+				.msg_iovlen = 2,
+				.msg_iov = (struct iovec [2]){
+					{ .iov_base = alen_buf[i], .iov_len = 2 },
+					{ .iov_base = answers[i], .iov_len = asize } }
+			};
+			step_mh(&mh, apos[i]);
+			r = recvmsg(pfd[i].fd, &mh, 0);
+			if (r <= 0) goto out;
+			apos[i] += r;
+			if (apos[i] < 2) continue;
+			int alen = alen_buf[i][0]*256 + alen_buf[i][1];
+			if (alen < 13) goto out;
+			if (apos[i] < alen+2 && apos[i] < asize+2)
+				continue;
+			int rcode = answers[i][3] & 15;
+			if (rcode != 0 && rcode != 3)
+				goto out;
+
+			/* Storing the length here commits the accepted answer.
+			 * Immediately close TCP socket so as not to consume
+			 * resources we no longer need. */
+			alens[i] = alen;
+			__syscall(SYS_close, pfd[i].fd);
+			pfd[i].fd = -1;
 		}
 	}
 out:
 	pthread_cleanup_pop(1);
 
+	/* Disregard any incomplete TCP results */
+	for (i=0; i<nqueries; i++) if (alens[i]<0) alens[i] = 0;
+
 	return 0;
 }
 
diff --git a/src/network/res_send.c b/src/network/res_send.c
index ee4abf1f..9593164d 100644
--- a/src/network/res_send.c
+++ b/src/network/res_send.c
@@ -1,8 +1,16 @@
 #include <resolv.h>
+#include <string.h>
 
 int __res_send(const unsigned char *msg, int msglen, unsigned char *answer, int anslen)
 {
-	int r = __res_msend(1, &msg, &msglen, &answer, &anslen, anslen);
+	int r;
+	if (anslen < 512) {
+		unsigned char buf[512];
+		r = __res_send(msg, msglen, buf, sizeof buf);
+		if (r >= 0) memcpy(answer, buf, r < anslen ? r : anslen);
+		return r;
+	}
+	r = __res_msend(1, &msg, &msglen, &answer, &anslen, anslen);
 	return r<0 || !anslen ? -1 : anslen;
 }
 
diff --git a/src/network/sendmsg.c b/src/network/sendmsg.c
index 80cc5f41..acdfdf29 100644
--- a/src/network/sendmsg.c
+++ b/src/network/sendmsg.c
@@ -8,13 +8,16 @@ ssize_t sendmsg(int fd, const struct msghdr *msg, int flags)
 {
 #if LONG_MAX > INT_MAX
 	struct msghdr h;
-	struct cmsghdr chbuf[1024/sizeof(struct cmsghdr)+1], *c;
+	/* Kernels before 2.6.38 set SCM_MAX_FD to 255, allocate enough
+	 * space to support an SCM_RIGHTS ancillary message with 255 fds.
+	 * Kernels since 2.6.38 set SCM_MAX_FD to 253. */
+	struct cmsghdr chbuf[CMSG_SPACE(255*sizeof(int))/sizeof(struct cmsghdr)+1], *c;
 	if (msg) {
 		h = *msg;
 		h.__pad1 = h.__pad2 = 0;
 		msg = &h;
 		if (h.msg_controllen) {
-			if (h.msg_controllen > 1024) {
+			if (h.msg_controllen > sizeof chbuf) {
 				errno = ENOMEM;
 				return -1;
 			}