summary refs log tree commit diff
path: root/nscd
diff options
context:
space:
mode:
authorCarlos O'Donell <carlos@systemhalted.org>2015-03-13 09:49:24 -0400
committerCarlos O'Donell <carlos@systemhalted.org>2015-03-13 09:49:24 -0400
commitcf9313e7d1dd42addd6cf8c9277f0f18a62cdeff (patch)
tree6bbf8900b476073a3d30cf24aae7b317b65344fc /nscd
parent7d67a196b6548562070ac11fc673c0d3d263d846 (diff)
downloadglibc-cf9313e7d1dd42addd6cf8c9277f0f18a62cdeff.tar.gz
glibc-cf9313e7d1dd42addd6cf8c9277f0f18a62cdeff.tar.xz
glibc-cf9313e7d1dd42addd6cf8c9277f0f18a62cdeff.zip
Enhance nscd's inotify support (Bug 14906).
In bug 14906 the user complains that the inotify support in nscd
is not sufficient when it comes to detecting changes in the
configurationfiles that should be watched for the various databases.

The current nscd implementation uses inotify to watch for changes in
the configuration files, but adds watches only for IN_DELETE_SELF and
IN_MODIFY. These watches are insufficient to cover even the most basic
uses by a system administrator. For example using emacs or vim to edit
a configuration file should trigger a reload but it might not if
the editors use move to atomically update the file. This atomic update
changes the inode and thus removes the notification on the file (as
inotify is based on inodes). Thus the inotify support in nscd for
configuration files is insufficient to account for the average use
cases of system administrators and users.

The inotify support is significantly enhanced and described here:
https://www.sourceware.org/ml/libc-alpha/2015-02/msg00504.html

Tested on x86_64 with and without inotify support.
Diffstat (limited to 'nscd')
-rw-r--r--nscd/cache.c28
-rw-r--r--nscd/connections.c358
-rw-r--r--nscd/nscd.h60
3 files changed, 338 insertions, 108 deletions
diff --git a/nscd/cache.c b/nscd/cache.c
index ea9f72311b..f0200c303c 100644
--- a/nscd/cache.c
+++ b/nscd/cache.c
@@ -272,28 +272,38 @@ prune_cache (struct database_dyn *table, time_t now, int fd)
       while (runp != NULL)
 	{
 #ifdef HAVE_INOTIFY
-	  if (runp->inotify_descr == -1)
+	  if (runp->inotify_descr[TRACED_FILE] == -1)
 #endif
 	    {
 	      struct stat64 st;
 
 	      if (stat64 (runp->fname, &st) < 0)
 		{
+		  /* Print a diagnostic that the traced file was missing.
+		     We must not disable tracing since the file might return
+		     shortly and we want to reload it at the next pruning.
+		     Disabling tracing here would go against the configuration
+		     as specified by the user via check-files.  */
 		  char buf[128];
-		  /* We cannot stat() the file, disable file checking if the
-		     file does not exist.  */
-		  dbg_log (_("cannot stat() file `%s': %s"),
+		  dbg_log (_("checking for monitored file `%s': %s"),
 			   runp->fname, strerror_r (errno, buf, sizeof (buf)));
-		  if (errno == ENOENT)
-		    table->check_file = 0;
 		}
 	      else
 		{
-		  if (st.st_mtime != table->file_mtime)
+		  /* This must be `!=` to catch cases where users turn the
+		     clocks back and we still want to detect any time difference
+		     in mtime.  */
+		  if (st.st_mtime != runp->mtime)
 		    {
-		      /* The file changed.  Invalidate all entries.  */
+		      dbg_log (_("monitored file `%s` changed (mtime)"),
+			       runp->fname);
+		      /* The file changed. Invalidate all entries.  */
 		      now = LONG_MAX;
-		      table->file_mtime = st.st_mtime;
+		      runp->mtime = st.st_mtime;
+#ifdef HAVE_INOTIFY
+		      /* Attempt to install a watch on the file.  */
+		      install_watches (runp);
+#endif
 		    }
 		}
 	    }
diff --git a/nscd/connections.c b/nscd/connections.c
index 985eab6a30..cba5e6ad9d 100644
--- a/nscd/connections.c
+++ b/nscd/connections.c
@@ -957,6 +957,44 @@ cannot change socket to nonblocking mode: %s"),
     finish_drop_privileges ();
 }
 
+#ifdef HAVE_INOTIFY
+#define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
+#define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
+void
+install_watches (struct traced_file *finfo)
+{
+  /* Use inotify support if we have it.  */
+  if (finfo->inotify_descr[TRACED_FILE] < 0)
+    finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
+							   finfo->fname,
+							   TRACED_FILE_MASK);
+  if (finfo->inotify_descr[TRACED_FILE] < 0)
+    {
+      dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
+		 finfo->fname, strerror (errno));
+      return;
+    }
+  dbg_log (_("monitoring file `%s` (%d)"),
+	   finfo->fname, finfo->inotify_descr[TRACED_FILE]);
+  /* Additionally listen for events in the file's parent directory.
+     We do this because the file to be watched might be
+     deleted and then added back again.  When it is added back again
+     we must re-add the watch.  We must also cover IN_MOVED_TO to
+     detect a file being moved into the directory.  */
+  if (finfo->inotify_descr[TRACED_DIR] < 0)
+    finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
+							  finfo->dname,
+							  TRACED_DIR_MASK);
+  if (finfo->inotify_descr[TRACED_DIR] < 0)
+    {
+      dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
+		 finfo->fname, strerror (errno));
+      return;
+    }
+  dbg_log (_("monitoring directory `%s` (%d)"),
+	   finfo->dname, finfo->inotify_descr[TRACED_DIR]);
+}
+#endif
 
 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
 
@@ -981,30 +1019,22 @@ register_traced_file (size_t dbidx, struct traced_file *finfo)
     return;
 
   if (__glibc_unlikely (debug_level > 0))
-    dbg_log (_("register trace file %s for database %s"),
+    dbg_log (_("monitoring file %s for database %s"),
 	     finfo->fname, dbnames[dbidx]);
 
 #ifdef HAVE_INOTIFY
-  if (inotify_fd < 0
-      || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
-						    IN_DELETE_SELF
-						    | IN_MODIFY)) < 0)
+  install_watches (finfo);
 #endif
+  struct stat64 st;
+  if (stat64 (finfo->fname, &st) < 0)
     {
-      /* We need the modification date of the file.  */
-      struct stat64 st;
-
-      if (stat64 (finfo->fname, &st) < 0)
-	{
-	  /* We cannot stat() the file, disable file checking.  */
-	  dbg_log (_("cannot stat() file `%s': %s"),
-		   finfo->fname, strerror (errno));
-	  return;
-	}
-
-      finfo->inotify_descr = -1;
-      finfo->mtime = st.st_mtime;
+      /* We cannot stat() the file. Set mtime to zero and try again later.  */
+      dbg_log (_("stat failed for file `%s'; will try again later: %s"),
+	       finfo->fname, strerror (errno));
+      finfo->mtime = 0;
     }
+  else
+    finfo->mtime = st.st_mtime;
 
   /* Queue up the file name.  */
   finfo->next = dbs[dbidx].traced_files;
@@ -1029,20 +1059,27 @@ invalidate_cache (char *key, int fd)
   for (number = pwddb; number < lastdb; ++number)
     if (strcmp (key, dbnames[number]) == 0)
       {
-	if (number == hstdb)
+	struct traced_file *runp = dbs[number].traced_files;
+	while (runp != NULL)
 	  {
-	    struct traced_file *runp = dbs[hstdb].traced_files;
-	    while (runp != NULL)
-	      if (runp->call_res_init)
-		{
-		  res_init ();
-		  break;
-		}
-	      else
-		runp = runp->next;
+	    /* Make sure we reload from file when checking mtime.  */
+	    runp->mtime = 0;
+#ifdef HAVE_INOTIFY
+	    /* During an invalidation we try to reload the traced
+	       file watches.  This allows the user to re-sync if
+	       inotify events were lost.  Similar to what we do during
+	       pruning.  */
+	    install_watches (runp);
+#endif
+	    if (runp->call_res_init)
+	      {
+		res_init ();
+		break;
+	      }
+	    runp = runp->next;
 	  }
 	break;
-    }
+      }
 
   if (number == lastdb)
     {
@@ -1880,11 +1917,28 @@ union __inev
   char buf[sizeof (struct inotify_event) + PATH_MAX];
 };
 
+/* Returns 0 if the file is there otherwise -1.  */
+int
+check_file (struct traced_file *finfo)
+{
+  struct stat64 st;
+  /* We could check mtime and if different re-add
+     the watches, and invalidate the database, but we
+     don't because we are called from inotify_check_files
+     which should be doing that work.  If sufficient inotify
+     events were lost then the next pruning or invalidation
+     will do the stat and mtime check.  We don't do it here to
+     keep the logic simple.  */
+  if (stat64 (finfo->fname, &st) < 0)
+    return -1;
+  return 0;
+}
+
 /* Process the inotify event in INEV. If the event matches any of the files
    registered with a database then mark that database as requiring its cache
    to be cleared. We indicate the cache needs clearing by setting
    TO_CLEAR[DBCNT] to true for the matching database.  */
-static inline void
+static void
 inotify_check_files (bool *to_clear, union __inev *inev)
 {
   /* Check which of the files changed.  */
@@ -1894,16 +1948,124 @@ inotify_check_files (bool *to_clear, union __inev *inev)
 
       while (finfo != NULL)
 	{
-	  /* Inotify event watch descriptor matches.  */
-	  if (finfo->inotify_descr == inev->i.wd)
+	  /* The configuration file was moved or deleted.
+	     We stop watching it at that point, and reinitialize.  */
+	  if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
+	      && ((inev->i.mask & IN_MOVE_SELF)
+		  || (inev->i.mask & IN_DELETE_SELF)
+		  || (inev->i.mask & IN_IGNORED)))
+	    {
+	      int ret;
+	      bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
+
+	      if (check_file (finfo) == 0)
+	        {
+		  dbg_log (_("ignored inotify event for `%s` (file exists)"),
+			   finfo->fname);
+		  return;
+		}
+
+	      dbg_log (_("monitored file `%s` was %s, removing watch"),
+		       finfo->fname, moved ? "moved" : "deleted");
+	      /* File was moved out, remove the watch.  Watches are
+		 automatically removed when the file is deleted.  */
+	      if (moved)
+		{
+		  ret = inotify_rm_watch (inotify_fd, inev->i.wd);
+		  if (ret < 0)
+		    dbg_log (_("failed to remove file watch `%s`: %s"),
+			     finfo->fname, strerror (errno));
+		}
+	      finfo->inotify_descr[TRACED_FILE] = -1;
+	      to_clear[dbcnt] = true;
+	      if (finfo->call_res_init)
+	        res_init ();
+	      return;
+	    }
+	  /* The configuration file was open for writing and has just closed.
+	     We reset the cache and reinitialize.  */
+	  if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
+	      && inev->i.mask & IN_CLOSE_WRITE)
 	    {
 	      /* Mark cache as needing to be cleared and reinitialize.  */
+	      dbg_log (_("monitored file `%s` was written to"), finfo->fname);
 	      to_clear[dbcnt] = true;
 	      if (finfo->call_res_init)
 	        res_init ();
 	      return;
 	    }
+	  /* The parent directory was moved or deleted.  We trigger one last
+	     invalidation.  At the next pruning or invalidation we may add
+	     this watch back if the file is present again.  */
+	  if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
+	      && ((inev->i.mask & IN_DELETE_SELF)
+		  || (inev->i.mask & IN_MOVE_SELF)
+		  || (inev->i.mask & IN_IGNORED)))
+	    {
+	      bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
+	      /* The directory watch may have already been removed
+		 but we don't know so we just remove it again and
+		 ignore the error.  Then we remove the file watch.
+		 Note: watches are automatically removed for deleted
+		 files.  */
+	      if (moved)
+		inotify_rm_watch (inotify_fd, inev->i.wd);
+	      if (finfo->inotify_descr[TRACED_FILE] != -1)
+		{
+		  dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
+			   finfo->dname, moved ? "moved" : "deleted", finfo->fname);
+		  if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
+		    dbg_log (_("failed to remove file watch `%s`: %s"),
+			     finfo->dname, strerror (errno));
+		}
+	      finfo->inotify_descr[TRACED_FILE] = -1;
+	      finfo->inotify_descr[TRACED_DIR] = -1;
+	      to_clear[dbcnt] = true;
+	      if (finfo->call_res_init)
+	        res_init ();
+	      /* Continue to the next entry since this might be the
+		 parent directory for multiple registered files and
+		 we want to remove watches for all registered files.  */
+	      continue;
+	    }
+	  /* The parent directory had a create or moved to event.  */
+	  if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
+	      && ((inev->i.mask & IN_MOVED_TO)
+		  || (inev->i.mask & IN_CREATE))
+	      && strcmp (inev->i.name, finfo->sfname) == 0)
+	    {
+	      /* We detected a directory change.  We look for the creation
+		 of the file we are tracking or the move of the same file
+		 into the directory.  */
+	      int ret;
+	      dbg_log (_("monitored file `%s` was %s, adding watch"),
+		       finfo->fname,
+		       inev->i.mask & IN_CREATE ? "created" : "moved into place");
+	      /* File was moved in or created.  Regenerate the watch.  */
+	      if (finfo->inotify_descr[TRACED_FILE] != -1)
+		inotify_rm_watch (inotify_fd,
+				  finfo->inotify_descr[TRACED_FILE]);
+
+	      ret = inotify_add_watch (inotify_fd,
+				       finfo->fname,
+				       TRACED_FILE_MASK);
+	      if (ret < 0)
+		dbg_log (_("failed to add file watch `%s`: %s"),
+			 finfo->fname, strerror (errno));
+
+	      finfo->inotify_descr[TRACED_FILE] = ret;
+
+	      /* The file is new or moved so mark cache as needing to
+		 be cleared and reinitialize.  */
+	      to_clear[dbcnt] = true;
+	      if (finfo->call_res_init)
+		res_init ();
 
+	      /* Done re-adding the watch.  Don't return, we may still
+		 have other files in this same directory, same watch
+		 descriptor, and need to process them.  */
+	    }
+	  /* Other events are ignored, and we move on to the next file.  */
 	  finfo = finfo->next;
         }
     }
@@ -1925,6 +2087,51 @@ clear_db_cache (bool *to_clear)
       }
 }
 
+int
+handle_inotify_events (void)
+{
+  bool to_clear[lastdb] = { false, };
+  union __inev inev;
+
+  /* Read all inotify events for files registered via
+     register_traced_file().  */
+  while (1)
+    {
+      /* Potentially read multiple events into buf.  */
+      ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
+					     &inev.buf,
+					     sizeof (inev)));
+      if (nb < (ssize_t) sizeof (struct inotify_event))
+	{
+	  /* Not even 1 event.  */
+	  if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
+	    return -1;
+	  /* Done reading events that are ready.  */
+	  break;
+	}
+      /* Process all events.  The normal inotify interface delivers
+	 complete events on a read and never a partial event.  */
+      char *eptr = &inev.buf[0];
+      ssize_t count;
+      while (1)
+	{
+	  /* Check which of the files changed.  */
+	  inotify_check_files (to_clear, &inev);
+	  count = sizeof (struct inotify_event) + inev.i.len;
+	  eptr += count;
+	  nb -= count;
+	  if (nb >= (ssize_t) sizeof (struct inotify_event))
+	    memcpy (&inev, eptr, nb);
+	  else
+	    break;
+	}
+      continue;
+    }
+  /* Actually perform the cache clearing.  */
+  clear_db_cache (to_clear);
+  return 0;
+}
+
 #endif
 
 static void
@@ -2031,42 +2238,20 @@ main_loop_poll (void)
 	    {
 	      if (conns[1].revents != 0)
 		{
-		  bool to_clear[lastdb] = { false, };
-		  union __inev inev;
-
-		  /* Read all inotify events for files registered via
-		     register_traced_file().  */
-		  while (1)
+		  int ret;
+		  ret = handle_inotify_events ();
+		  if (ret == -1)
 		    {
-		      ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
-							     sizeof (inev)));
-		      if (nb < (ssize_t) sizeof (struct inotify_event))
-			{
-			  if (__builtin_expect (nb == -1 && errno != EAGAIN,
-						0))
-			    {
-			      /* Something went wrong when reading the inotify
-				 data.  Better disable inotify.  */
-			      dbg_log (_("\
-disabled inotify after read error %d"),
-				       errno);
-			      conns[1].fd = -1;
-			      firstfree = 1;
-			      if (nused == 2)
-				nused = 1;
-			      close (inotify_fd);
-			      inotify_fd = -1;
-			    }
-			  break;
-			}
-
-		      /* Check which of the files changed.  */
-		      inotify_check_files (to_clear, &inev);
+		      /* Something went wrong when reading the inotify
+			 data.  Better disable inotify.  */
+		      dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
+		      conns[1].fd = -1;
+		      firstfree = 1;
+		      if (nused == 2)
+			nused = 1;
+		      close (inotify_fd);
+		      inotify_fd = -1;
 		    }
-
-		  /* Actually perform the cache clearing.  */
-		  clear_db_cache (to_clear);
-
 		  --n;
 		}
 
@@ -2234,37 +2419,18 @@ main_loop_epoll (int efd)
 # ifdef HAVE_INOTIFY
 	else if (revs[cnt].data.fd == inotify_fd)
 	  {
-	    bool to_clear[lastdb] = { false, };
-	    union __inev inev;
-
-	    /* Read all inotify events for files registered via
-	       register_traced_file().  */
-	    while (1)
+	    int ret;
+	    ret = handle_inotify_events ();
+	    if (ret == -1)
 	      {
-		ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
-						 sizeof (inev)));
-		if (nb < (ssize_t) sizeof (struct inotify_event))
-		  {
-		    if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
-		      {
-			/* Something went wrong when reading the inotify
-			   data.  Better disable inotify.  */
-			dbg_log (_("disabled inotify after read error %d"),
-				 errno);
-			(void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
-					  NULL);
-			close (inotify_fd);
-			inotify_fd = -1;
-		      }
-		    break;
-		  }
-
-		/* Check which of the files changed.  */
-		inotify_check_files(to_clear, &inev);
+		/* Something went wrong when reading the inotify
+		   data.  Better disable inotify.  */
+		dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
+		(void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
+		close (inotify_fd);
+		inotify_fd = -1;
+		break;
 	      }
-
-	    /* Actually perform the cache clearing.  */
-	    clear_db_cache (to_clear);
 	  }
 # endif
 # ifdef HAVE_NETLINK
@@ -2301,7 +2467,9 @@ main_loop_epoll (int efd)
 	  no reply in too long of a time.  */
       time_t laststart = now - ACCEPT_TIMEOUT;
       assert (starttime[sock] == 0);
+# ifdef HAVE_INOTIFY
       assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
+# endif
       assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
       for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
 	if (starttime[cnt] != 0 && starttime[cnt] < laststart)
diff --git a/nscd/nscd.h b/nscd/nscd.h
index 17a0a96278..75c2eccea0 100644
--- a/nscd/nscd.h
+++ b/nscd/nscd.h
@@ -61,17 +61,67 @@ typedef enum
    80% of the thread stack size.  */
 #define MAX_STACK_USE ((8 * NSCD_THREAD_STACKSIZE) / 10)
 
-
-/* Registered filename used to fill database.  */
+/* Records the file registered per database that when changed
+   or modified requires invalidating the database.  */
 struct traced_file
 {
+  /* Tracks the last modified time of the traced file.  */
   time_t mtime;
+  /* Support multiple registered files per database.  */
   struct traced_file *next;
   int call_res_init;
-  int inotify_descr;
+  /* Requires Inotify support to do anything useful.  */
+#define TRACED_FILE	0
+#define TRACED_DIR	1
+  int inotify_descr[2];
+# ifndef PATH_MAX
+#  define PATH_MAX 1024
+# endif
+  /* The parent directory is used to scan for creation/deletion.  */
+  char dname[PATH_MAX];
+  /* Just the name of the file with no directory component.  */
+  char *sfname;
+  /* The full-path name of the registered file.  */
   char fname[];
 };
 
+/* Initialize a `struct traced_file`.  As input we need the name
+   of the file, and if invalidation requires calling res_init.
+   If CRINIT is 1 then res_init will be called after invalidation
+   or if the traced file is changed in any way, otherwise it will
+   not.  */
+static inline void
+init_traced_file(struct traced_file *file, const char *fname, int crinit)
+{
+   char *dname;
+   file->mtime = 0;
+   file->inotify_descr[TRACED_FILE] = -1;
+   file->inotify_descr[TRACED_DIR] = -1;
+   strcpy (file->fname, fname);
+   /* Compute the parent directory name and store a copy.  The copy makes
+      it much faster to add/remove watches while nscd is running instead
+      of computing this over and over again in a temp buffer.  */
+   file->dname[0] = '\0';
+   dname = strrchr (fname, '/');
+   if (dname != NULL)
+     {
+       size_t len = (size_t)(dname - fname);
+       if (len > sizeof (file->dname))
+	 abort ();
+       strncpy (file->dname, file->fname, len);
+       file->dname[len] = '\0';
+     }
+   /* The basename is the name just after the last forward slash.  */
+   file->sfname = &dname[1];
+   file->call_res_init = crinit;
+}
+
+#define define_traced_file(id, filename) 			\
+static union							\
+{								\
+  struct traced_file file;					\
+  char buf[sizeof (struct traced_file) + sizeof (filename)];	\
+} id##_traced_file;
 
 /* Structure describing dynamic part of one database.  */
 struct database_dyn
@@ -90,7 +140,6 @@ struct database_dyn
   int propagate;
   struct traced_file *traced_files;
   const char *db_filename;
-  time_t file_mtime;
   size_t suggested_module;
   size_t max_db_size;
 
@@ -211,6 +260,9 @@ void do_exit (int child_ret, int errnum, const char *format, ...);
 /* connections.c */
 extern void nscd_init (void);
 extern void register_traced_file (size_t dbidx, struct traced_file *finfo);
+#ifdef HAVE_INOTIFY
+extern void install_watches (struct traced_file *finfo);
+#endif
 extern void close_sockets (void);
 extern void start_threads (void) __attribute__ ((__noreturn__));