about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRich Felker <dalias@aerifal.cx>2014-06-07 04:09:21 -0400
committerRich Felker <dalias@aerifal.cx>2014-06-07 04:09:21 -0400
commit246e752d9e7c472735444815163f0a22e5bc4161 (patch)
tree98f25f819b88a2200e63613d1b5bd5d7665c0cda
parentf616294914e7c289791d856dca636bbccad5fef7 (diff)
downloadmusl-246e752d9e7c472735444815163f0a22e5bc4161.tar.gz
musl-246e752d9e7c472735444815163f0a22e5bc4161.tar.xz
musl-246e752d9e7c472735444815163f0a22e5bc4161.zip
avoid spurious lookup failures from badly-behaved nameservers
the results of a dns query, whether it's performed as part of one of
the standard name-resolving functions or directly by res_send, should
be a function of the query, not of the particular nameserver that
responds to it. thus, all responses which indicate a failure or
refusal by the nameserver, as opposed to a positive or negative result
for the query, should be ignored.

the strategy used is to re-issue the query immediately (but with a
limit on the number of retries, in case the server is really broken)
when a response code of 2 (server failure, typically transient) is
seen, and otherwise take no action on bad responses (which generally
indicate a misconfigured nameserver or one which the client does not
have permission to use), allowing the normal retry interval to apply
and of course accepting responses from other nameservers queried in
parallel.

empirically this matches the traditional resolver behavior for
nameservers that respond with a code of 2 in the case where there is
just a single nameserver configured. the behavior diverges when
multiple nameservers are available, since musl is querying them in
parallel. in this case we are mildly more aggressive at retrying.
-rw-r--r--src/network/res_msend.c27
1 files changed, 22 insertions, 5 deletions
diff --git a/src/network/res_msend.c b/src/network/res_msend.c
index 5192b4d8..35f106dd 100644
--- a/src/network/res_msend.c
+++ b/src/network/res_msend.c
@@ -34,7 +34,7 @@ int __res_msend(int nqueries, const unsigned char *const *queries,
 	FILE *f, _f;
 	unsigned char _buf[256];
 	char line[64], *s, *z;
-	int timeout = 5000, attempts = 2, retry_interval;
+	int timeout = 5000, attempts = 2, retry_interval, servfail_retry;
 	union {
 		struct sockaddr_in sin;
 		struct sockaddr_in6 sin6;
@@ -152,6 +152,7 @@ int __res_msend(int nqueries, const unsigned char *const *queries,
 							qlens[i], MSG_NOSIGNAL,
 							(void *)&ns[j], sl);
 			t1 = t2;
+			servfail_retry = 2 * nqueries;
 		}
 
 		/* Wait for a response, or until time to retry */
@@ -160,12 +161,12 @@ int __res_msend(int nqueries, const unsigned char *const *queries,
 		while ((rlen = recvfrom(fd, answers[next], asize, 0,
 		  (void *)&sa, (socklen_t[1]){sl})) >= 0) {
 
-			/* Ignore non-identifiable packets (no query id) */
-			if (rlen < 2) continue;
+			/* Ignore non-identifiable packets */
+			if (rlen < 4) continue;
 
 			/* Ignore replies from addresses we didn't send to */
-			for (i=0; i<nns && memcmp(ns+i, &sa, sl); i++);
-			if (i==nns) continue;
+			for (j=0; j<nns && memcmp(ns+j, &sa, sl); j++);
+			if (j==nns) continue;
 
 			/* Find which query this answer goes with, if any */
 			for (i=next; i<nqueries && (
@@ -174,6 +175,22 @@ int __res_msend(int nqueries, const unsigned char *const *queries,
 			if (i==nqueries) continue;
 			if (alens[i]) continue;
 
+			/* Only accept positive or negative responses;
+			 * retry immediately on server failure, and ignore
+			 * all other codes such as refusal. */
+			switch (answers[next][3] & 15) {
+			case 0:
+			case 3:
+				break;
+			case 2:
+				if (servfail_retry && servfail_retry--)
+					sendto(fd, queries[i],
+						qlens[i], MSG_NOSIGNAL,
+						(void *)&ns[j], sl);
+			default:
+				continue;
+			}
+
 			/* Store answer in the right slot, or update next
 			 * available temp slot if it's already in place. */
 			alens[i] = rlen;