about summary refs log tree commit diff
path: root/nptl/perf.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-11-26 22:50:54 +0000
committerUlrich Drepper <drepper@redhat.com>2002-11-26 22:50:54 +0000
commit76a50749f7af5935ba3739e815aa6a16ae4440d1 (patch)
treec16eac47f220f03fea74d80ef0a4e774809e94b7 /nptl/perf.c
parent6938e63f714b15c377d8cbf8e97b6f15b0e1b692 (diff)
downloadglibc-76a50749f7af5935ba3739e815aa6a16ae4440d1.tar.gz
glibc-76a50749f7af5935ba3739e815aa6a16ae4440d1.tar.xz
glibc-76a50749f7af5935ba3739e815aa6a16ae4440d1.zip
Initial revision cvs/initial
2002-11-26  Ulrich Drepper  <drepper@redhat.com>
	* allocatestack.c (queue_stack): Don't remove stack from list here.
	Do it in the caller.  Correct condition to prematurely terminate
	loop to free stacks.
	(__deallocate_stack): Remove stack from list here.
2002-11-26  Ulrich Drepper  <drepper@redhat.com>
	* Makefile (tests): Add tst-stack1.
	* tst-stack1.c: New file.
	* allocatestack.c (allocate_stack): Initialize the TCB on a user
	provided stack.
	* pthread_attr_getstack.c: Return bottom of the thread area.
2002-11-25  Ulrich Drepper  <drepper@redhat.com>
	* Makefile (libpthread-routines): Add pt-allocrtsig and
	pthread_kill_other_threads.
	* pt-allocrtsig.c: New file.
	* pthread_kill_other_threads.c: New file.
	* sysdeps/unix/sysv/linux/allocrtsig.c: Add additional aliases for
	all three functions.
	* sysdeps/unix/sysv/linux/Makefile (sysdep_routines): Remove
	allocrtsig.
	* sysdeps/unix/sysv/linux/Versions (libc:GLIBC_PRIVATE): Export
	__libc_current_sigrtmin_private, __libc_current_sigrtmax_private,
	and __libc_allocate_rtsig_private.
	* Versions (libpthread): Export pthread_kill_other_threads_np,
	__libc_current_sigrtmin, and __libc_current_sigrtmax.
2002-11-24  Ulrich Drepper  <drepper@redhat.com>

	* allocatestack.c (allocate_stack): stackaddr in attribute points to
	the end of the stack.  Adjust computations.
	When mprotect call fails dequeue stack and free it.
	* pthread_attr_setstack.c: Store top of the stack in stackaddr
	attribute.
	* pthread_getattr_np.c: Likewise.

	* descr.h (IS_DETACHED): Add some more parenthesis to prevent
	surprises.

2002-11-23  Ulrich Drepper  <drepper@redhat.com>

	* sysdeps/pthread/pthread.h (pthread_self): __THROW must come before
	attribute definitions.  Patch by Luca Barbieri <ldb@ldb.ods.org>.

2002-11-22  Ulrich Drepper  <drepper@redhat.com>

	* pthread_getspecific.c: Optimize access to first 2nd-level array.
	* pthread_setspecific.c: Likewise.

2002-11-21  Ulrich Drepper  <drepper@redhat.com>

	* sysdeps/unix/sysv/linux/i386/createthread.c: Remove CLONE_ flags
	definitions.  Get them from the official place.
	* sysdeps/unix/sysv/linux/i386/fork.c: Likewise.

	* sysdeps/unix/sysv/linux/i386/createthread.c: Update CLONE_* flags.
	Use new CLONE_ flags in clone() calls.

	* sysdeps/unix/sysv/linux/fork.c: Use ARCH_FORK to actually fork.
	* sysdeps/unix/sysv/linux/i386/fork.c: New file.

	* Versions: Add pthread_* functions for libc.
	* forward.c: New file.

	* sysdeps/pthread/Makefile (libpthread-sysdeps_routines): Add
	errno-loc.
	* herrno.c: New file.
	* res.c: New file.

	* Makefile (libpthread-routines): Remove sem_post, sem_wait,
	sem_trywait, and sem_timedwait.  Add herrno and res.
	* sem_init.c: Don't initialize lock and waiters members.
	* sem_open.c: Likewise.
	* sem_post.c: Removed.
	* sem_wait.c: Removed.
	* sem_trywait.c: Removed.
	* sem_timedwait.c: Removed.
	* sysdeps/unix/sysv/linux/i386/i486/lowlevelsem.S: Complete rewrite.
	Includes full implementations of sem_post, sem_wait, sem_trywait,
	and sem_timedwait.
	* sysdeps/unix/sysv/linux/i386/lowlevelsem.h (lll_sem_post): Adjust
	for new implementation.
	* sysdeps/unix/sysv/linux/internaltypes.h (struct sem): Remove lock
	and waiters fields.

	* tst-sem3.c: Improve error message.
	* tst-signal3.c: Likewise.

	* init.c (__pthread_initialize_minimal): Use set_tid_address syscall
	to tell the kernel about the termination futex and to initialize tid
	member.  Don't initialize main_thread.
	* descr.h (struct pthread): Remove main_thread member.
	* cancelllation.c (__do_cancel): Remove code handling main thread.
	The main thread is not special anymore.

	* allocatestack.c (__reclaim_stacks): Mark stacks as unused.  Add
	size of the stacks to stack_cache_actsize.

	* pt-readv.c: Add missing "defined".
	* pt-sigwait.c: Likewise.
	* pt-writev.c: Likewise.

2002-11-09  Ulrich Drepper  <drepper@redhat.com>

	* Versions: Export __connect from libpthread.
	Patch by Luca Barbieri <ldb@ldb.ods.org>.

	* Makefile (libpthread-routines): Add pt-raise.
	* sysdeps/unix/sysv/linux/raise.c: New file.
	* sysdeps/unix/sysv/linux/pt-raise.c: New file.
	* sysdeps/generic/pt-raise.c: New file.

	* pthread_cond_init.c: Initialize all data elements of the condvar
	structure.  Patch by Luca Barbieri <ldb@ldb.ods.org>.

	* pthread_attr_init.c: Actually implement 2.0 compatibility version.
	* pthread_create.c: Likewise.

	* Makefile (tests): Add tst-key1, tst-key2, tst-key3.
	* tst-key1.c: New file.
	* tst-key2.c: New file.
	* tst-key3.c: New file.

	* Versions: Export pthread_detach for version GLIBC_2.0.
	Reported by Saurabh Desai <sdesai@austin.ibm.com>.

2002-11-08  Ulrich Drepper  <drepper@redhat.com>

	* pthread_key_create.c: Terminate search after an unused key was found.
	Patch by Luca Barbieri <ldb@ldb.ods.org>.

	* sysdeps/unix/sysv/linux/i386/pthread_once.S: Return zero.
	Patch by Luca Barbieri <ldb@ldb.ods.org>.

2002-10-10  Ulrich Drepper  <drepper@redhat.com>

	* sysdeps/unix/sysv/linux/i386/i486/lowlevelsem.S: Use slow generic
	dynamic lookup for errno in PIC.

	* allocatestack.c (get_cached_stack): Rearrange code slightly to
	release the stack lock as soon as possible.
	Call _dl_allocate_tls_init for TCB from the cache to re-initialize
	the static TLS block.
	(allocate_stack): Call _dl_allocate_tls_init for user-provided stack.

	* cancellation.c: Renamed from cancelation.c.
	* Makefile: Adjust accordingly.
	* pthreadP.h (CANCELLATION_P): Renamed from CANCELATION_P.
	* cleanup_defer.c: Use CANCELLATION_P.
	* pthread_testcancel.c: Likewise.
	* descr.h: Fix spelling in comments.
	* init.c: Likewise.
	* pthread_getattr_np.c: Likewise.
	* pthread_getschedparam.c: Likewise.
	* pthread_setschedparam.c: Likewise.
	* Versions: Likewise.

	* pt-pselect.c: New file.
	* Makefile (libpthread-routines): Add pt-pselect.
	* Versions: Add pselect.

	* tst-cancel4.c: New file.
	* Makefile (tests): Add tst-cancel4.

2002-10-09  Ulrich Drepper  <drepper@redhat.com>

	* pthread_mutex_lock.c: Always record lock ownership.
	* pthread_mutex_timedlock.c: Likewise.
	* pthread_mutex_trylock.c: Likewise.

	* pt-readv.c: New file.
	* pt-writev.c: New file.
	* pt-creat.c: New file.
	* pt-msgrcv.c: New file.
	* pt-msgsnd.c: New file.
	* pt-poll.c: New file.
	* pt-select.c: New file.
	* pt-sigpause.c: New file.
	* pt-sigsuspend.c: New file.
	* pt-sigwait.c: New file.
	* pt-sigwaitinfo.c: New file.
	* pt-waitid.c: New file.
	* Makefile (libpthread-routines): Add pt-readv, pt-writev, pt-creat,
	pt-msgrcv, pt-msgsnd, pt-poll, pt-select, pt-sigpause, pt-sigsuspend,
	pt-sigwait, pt-sigwaitinfo, and pt-waitid.
	* Versions: Add all the new functions.

	* tst-exit1.c: New file.
	* Makefile (tests): Add tst-exit1.

	* sem_timedwait.c: Minor optimization for more optimal fastpath.

2002-10-08  Ulrich Drepper  <drepper@redhat.com>

	* pt-fcntl.c: Only enable asynchronous cancellation for F_SETLKW.

	* pthread_join.c: Enable asynchronous cancellation around lll_wait_tid
	call.  pthread_join is an official cancellation point.
	* pthread_timedjoin.c: Likewise.

	* pthread_cond_wait.c: Revert order in which internal lock are dropped
	and the condvar's mutex are retrieved.
	* pthread_cond_timedwait.c: Likewise.
	Reported by dice@saros.East.Sun.COM.

2002-10-07  Ulrich Drepper  <drepper@redhat.com>

	* pthreadP.h: Cut out all type definitions and move them...
	* sysdeps/unix/sysv/linux/internaltypes.h: ...here.  New file.
	* pthreadP.h: Include <internaltypes.h>.

	* sysdeps/unix/sysv/linux/i386/lowlevelsem.h (lll_sem_post): Little
	performance tweaks.

	* sem_trywait.c: Shuffle #includes around to get right order.
	* sem_timedwait.c: Likewise.
	* sem_post.c: Likewise.
	* sem_wait.c: Likewise.

	* nptl 0.3 released.

	* Makefile (tests): Add tst-signal3.
	* tst-signal3.c: New file.

2002-10-05  Ulrich Drepper  <drepper@redhat.com>

	* sysdeps/unix/sysv/linux/i386/lowlevelsem.h: Tell the compiler that
	the asms modify the sem object.
	(__lll_sem_timedwait): Now takes struct sem* as first parameter.

	* sysdeps/unix/sysv/linux/i386/bits/semaphore.h (sem_t): Don't expose
	the actual members.
	* pthreadP.h (struct sem): New type.  Actual semaphore type.
	* semaphoreP.h: Include pthreadP.h.
	* sem_getvalue.c: Adjust to sem_t change.
	* sem_init.c: Likewise.
	* sem_open.c: Likewise.
	* sem_post.c: Likewise.
	* sem_timedwait.c: Likewise.
	* sem_trywait.c: Likewise.
	* sem_wait.c: Likewise.

2002-10-04  Ulrich Drepper  <drepper@redhat.com>

	* Makefile (tests): Add tst-basic2, tst-exec1, tst-exec3, tst-exec3.
	* tst-basic2.c: New file.
	* tst-exec1.c: New file.
	* tst-exec2.c: New file.
	* tst-exec3.c: New file.

	* tst-fork1.c: Remove extra */.

	* nptl 0.2 released.  The API for IA-32 is complete.
Diffstat (limited to 'nptl/perf.c')
-rw-r--r--nptl/perf.c749
1 files changed, 749 insertions, 0 deletions
diff --git a/nptl/perf.c b/nptl/perf.c
new file mode 100644
index 0000000000..e94ccf8a6b
--- /dev/null
+++ b/nptl/perf.c
@@ -0,0 +1,749 @@
+/* Copyright (C) 2002 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#define _GNU_SOURCE	1
+#include <argp.h>
+#include <error.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/param.h>
+#include <sys/types.h>
+
+#ifndef MAX_THREADS
+# define MAX_THREADS		100000
+#endif
+#ifndef DEFAULT_THREADS
+# define DEFAULT_THREADS	50
+#endif
+
+
+#define OPT_TO_THREAD		300
+#define OPT_TO_PROCESS		301
+#define OPT_SYNC_SIGNAL		302
+#define OPT_SYNC_JOIN		303
+#define OPT_TOPLEVEL		304
+
+
+static const struct argp_option options[] =
+  {
+    { NULL, 0, NULL, 0, "\
+This is a test for threads so we allow ther user to selection the number of \
+threads which are used at any one time.  Independently the total number of \
+rounds can be selected.  This is the total number of threads which will have \
+run when the process terminates:" },
+    { "threads", 't', "NUMBER", 0, "Number of threads used at once" },
+    { "starts", 's', "NUMBER", 0, "Total number of working threads" },
+    { "toplevel", OPT_TOPLEVEL, "NUMBER", 0,
+      "Number of toplevel threads which start the other threads; this \
+implies --sync-join" },
+
+    { NULL, 0, NULL, 0, "\
+Each thread can do one of two things: sleep or do work.  The latter is 100% \
+CPU bound.  The work load is the probability a thread does work.  All values \
+from zero to 100 (inclusive) are valid.  How often each thread repeats this \
+can be determined by the number of rounds.  The work cost determines how long \
+each work session (not sleeping) takes.  If it is zero a thread would \
+effectively nothing.  By setting the number of rounds to zero the thread \
+does no work at all and pure thread creation times can be measured." },
+    { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" },
+    { "workcost", 'c', "NUMBER", 0,
+      "Factor in the cost of each round of working" },
+    { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" },
+
+    { NULL, 0, NULL, 0, "\
+There are a number of different methods how thread creation can be \
+synchronized.  Synchronization is necessary since the number of concurrently \
+running threads is limited." },
+    { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0,
+      "Synchronize using a signal (default)" },
+    { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" },
+
+    { NULL, 0, NULL, 0, "\
+One parameter for each threads execution is the size of the stack.  If this \
+parameter is not used the system's default stack size is used.  If many \
+threads are used the stack size should be chosen quite small." },
+    { "stacksize", 'S', "BYTES", 0, "Size of threads stack" },
+    { "guardsize", 'g', "BYTES", 0,
+      "Size of stack guard area; must fit into the stack" },
+
+    { NULL, 0, NULL, 0, "Signal options:" },
+    { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" },
+    { "to-process", OPT_TO_PROCESS, NULL, 0,
+      "Send signal to process (default)" },
+
+    { NULL, 0, NULL, 0, "Administrative options:" },
+    { "progress", 'p', NULL, 0, "Show signs of progress" },
+    { "timing", 'T', NULL, 0,
+      "Measure time from startup to the last thread finishing" },
+    { NULL, 0, NULL, 0, NULL }
+  };
+
+/* Prototype for option handler.  */
+static error_t parse_opt (int key, char *arg, struct argp_state *state);
+
+/* Data structure to communicate with argp functions.  */
+static struct argp argp =
+{
+  options, parse_opt
+};
+
+
+static unsigned long int threads = DEFAULT_THREADS;
+static unsigned long int workload = 75;
+static unsigned long int workcost = 20;
+static unsigned long int rounds = 10;
+static long int starts = 5000;
+static unsigned long int stacksize;
+static long int guardsize = -1;
+static bool progress;
+static bool timing;
+static bool to_thread;
+static unsigned long int toplevel = 1;
+
+
+static long int running;
+static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static pid_t pid;
+static pthread_t tmain;
+
+static clockid_t cl;
+static struct timespec start_time;
+
+
+static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER;
+unsigned int sum;
+
+static enum
+  {
+    sync_signal,
+    sync_join
+  }
+sync_method;
+
+
+/* We use 64bit values for the times.  */
+typedef unsigned long long int hp_timing_t;
+
+
+/* Attributes for all created threads.  */
+static pthread_attr_t attr;
+
+
+static void *
+work (void *arg)
+{
+  unsigned long int i;
+  unsigned int state = (unsigned long int) arg;
+
+  for (i = 0; i < rounds; ++i)
+    {
+      /* Determine what to do.  */
+      unsigned int rnum;
+
+      /* Equal distribution.  */
+      do
+	rnum = rand_r (&state);
+      while (rnum >= UINT_MAX - (UINT_MAX % 100));
+
+      rnum %= 100;
+
+      if (rnum < workload)
+	{
+	  int j;
+	  int a[4] = { i, rnum, i + rnum, rnum - i };
+
+	  if (progress)
+	    write (STDERR_FILENO, "c", 1);
+
+	  for (j = 0; j < workcost; ++j)
+	    {
+	      a[0] += a[3] >> 12;
+	      a[1] += a[2] >> 20;
+	      a[2] += a[1] ^ 0x3423423;
+	      a[3] += a[0] - a[1];
+	    }
+
+	  pthread_mutex_lock (&sum_mutex);
+	  sum += a[0] + a[1] + a[2] + a[3];
+	  pthread_mutex_unlock (&sum_mutex);
+	}
+      else
+	{
+	  /* Just sleep.  */
+	  struct timespec tv;
+
+	  tv.tv_sec = 0;
+	  tv.tv_nsec = 10000000;
+
+	  if (progress)
+	    write (STDERR_FILENO, "w", 1);
+
+	  nanosleep (&tv, NULL);
+	}
+    }
+
+  return NULL;
+}
+
+
+static void *
+thread_function (void *arg)
+{
+  work (arg);
+
+  pthread_mutex_lock (&running_mutex);
+  if (--running <= 0 && starts <= 0)
+    {
+      /* We are done.  */
+      if (progress)
+	write (STDERR_FILENO, "\n", 1);
+
+      if (timing)
+	{
+	  struct timespec end_time;
+
+	  if (clock_gettime (cl, &end_time) == 0)
+	    {
+	      end_time.tv_sec -= start_time.tv_sec;
+	      end_time.tv_nsec -= start_time.tv_nsec;
+	      if (end_time.tv_nsec < 0)
+		{
+		  end_time.tv_nsec += 1000000000;
+		  --end_time.tv_sec;
+		}
+
+	      printf ("\nRuntime: %lu.%09lu seconds\n",
+		      (unsigned long int) end_time.tv_sec,
+		      (unsigned long int) end_time.tv_nsec);
+	    }
+	}
+
+      printf ("Result: %08x\n", sum);
+
+      exit (0);
+    }
+  pthread_mutex_unlock (&running_mutex);
+
+  if (sync_method == sync_signal)
+    {
+      if (to_thread)
+	/* This code sends a signal to the main thread.  */
+	pthread_kill (tmain, SIGUSR1);
+      else
+	/* Use this code to test sending a signal to the process.  */
+	kill (pid, SIGUSR1);
+    }
+
+  if (progress)
+    write (STDERR_FILENO, "f", 1);
+
+  return NULL;
+}
+
+
+struct start_info
+{
+  unsigned int starts;
+  unsigned int threads;
+};
+
+
+static void *
+start_threads (void *arg)
+{
+  struct start_info *si = arg;
+  unsigned int starts = si->starts;
+  pthread_t ths[si->threads];
+  unsigned int state = starts;
+  unsigned int n;
+  unsigned int i = 0;
+  int err;
+
+  if (progress)
+    write (STDERR_FILENO, "T", 1);
+
+  memset (ths, '\0', sizeof (pthread_t) * si->threads);
+
+  while (starts-- > 0)
+    {
+      if (ths[i] != 0)
+	{
+	  /* Wait for the threads in the order they were created.  */
+	  err = pthread_join (ths[i], NULL);
+	  if (err != 0)
+	    error (EXIT_FAILURE, err, "cannot join thread");
+
+	  if (progress)
+	    write (STDERR_FILENO, "f", 1);
+	}
+
+      err = pthread_create (&ths[i], &attr, work,
+			    (void *) (rand_r (&state) + starts + i));
+
+      if (err != 0)
+	error (EXIT_FAILURE, err, "cannot start thread");
+
+      if (progress)
+	write (STDERR_FILENO, "t", 1);
+
+      if (++i == si->threads)
+	i = 0;
+    }
+
+  n = i;
+  do
+    {
+      if (ths[i] != 0)
+	{
+	  err = pthread_join (ths[i], NULL);
+	  if (err != 0)
+	    error (EXIT_FAILURE, err, "cannot join thread");
+
+	  if (progress)
+	    write (STDERR_FILENO, "f", 1);
+	}
+
+      if (++i == si->threads)
+	i = 0;
+    }
+  while (i != n);
+
+  if (progress)
+    write (STDERR_FILENO, "F", 1);
+
+  return NULL;
+}
+
+
+int
+main (int argc, char *argv[])
+{
+  int remaining;
+  sigset_t ss;
+  pthread_t th;
+  pthread_t *ths = NULL;
+  int empty = 0;
+  int last;
+  bool cont = true;
+
+  /* Parse and process arguments.  */
+  argp_parse (&argp, argc, argv, 0, &remaining, NULL);
+
+  if (sync_method == sync_join)
+    {
+      ths = (pthread_t *) calloc (threads, sizeof (pthread_t));
+      if (ths == NULL)
+	error (EXIT_FAILURE, errno,
+	       "cannot allocate memory for thread descriptor array");
+
+      last = threads;
+    }
+  else
+    {
+      ths = &th;
+      last = 1;
+    }
+
+  if (toplevel > threads)
+    {
+      printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
+	      threads);
+      toplevel = threads;
+    }
+
+  if (timing)
+    {
+      if (clock_getcpuclockid (0, &cl) != 0
+	  || clock_gettime (cl, &start_time) != 0)
+	timing = false;
+    }
+
+  /* We need this later.  */
+  pid = getpid ();
+  tmain = pthread_self ();
+
+  /* We use signal SIGUSR1 for communication between the threads and
+     the main thread.  We only want sychronous notification.  */
+  if (sync_method == sync_signal)
+    {
+      sigemptyset (&ss);
+      sigaddset (&ss, SIGUSR1);
+      if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0)
+	error (EXIT_FAILURE, errno, "cannot set signal mask");
+    }
+
+  /* Create the thread attributes.  */
+  pthread_attr_init (&attr);
+
+  /* If the user provided a stack size use it.  */
+  if (stacksize != 0
+      && pthread_attr_setstacksize (&attr, stacksize) != 0)
+    puts ("could not set stack size; will use default");
+  /* And stack guard size.  */
+  if (guardsize != -1
+      && pthread_attr_setguardsize (&attr, guardsize) != 0)
+    puts ("invalid stack guard size; will use default");
+
+  /* All threads are created detached if we are not using pthread_join
+     to synchronize.  */
+  if (sync_method != sync_join)
+    pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
+
+  if (sync_method == sync_signal)
+    {
+      while (1)
+	{
+	  int err;
+	  bool do_wait = false;
+
+	  pthread_mutex_lock (&running_mutex);
+	  if (starts-- < 0)
+	    cont = false;
+	  else
+	    do_wait = ++running >= threads && starts > 0;
+
+	  pthread_mutex_unlock (&running_mutex);
+
+	  if (! cont)
+	    break;
+
+	  if (progress)
+	    write (STDERR_FILENO, "t", 1);
+
+	  err = pthread_create (&ths[empty], &attr, thread_function,
+				(void *) starts);
+	  if (err != 0)
+	    error (EXIT_FAILURE, err, "cannot start thread %lu", starts);
+
+	  if (++empty == last)
+	    empty = 0;
+
+	  if (do_wait)
+	    sigwaitinfo (&ss, NULL);
+	}
+
+      /* Do nothing anymore.  On of the threads will terminate the program.  */
+      sigfillset (&ss);
+      sigdelset (&ss, SIGINT);
+      while (1)
+	sigsuspend (&ss);
+    }
+  else
+    {
+      pthread_t ths[toplevel];
+      struct start_info si[toplevel];
+      unsigned int i;
+
+      for (i = 0; i < toplevel; ++i)
+	{
+	  unsigned int child_starts = starts / (toplevel - i);
+	  unsigned int child_threads = threads / (toplevel - i);
+	  int err;
+
+	  si[i].starts = child_starts;
+	  si[i].threads = child_threads;
+
+	  err = pthread_create (&ths[i], &attr, start_threads, &si[i]);
+	  if (err != 0)
+	    error (EXIT_FAILURE, err, "cannot start thread");
+
+	  starts -= child_starts;
+	  threads -= child_threads;
+	}
+
+      for (i = 0; i < toplevel; ++i)
+	{
+	  int err = pthread_join (ths[i], NULL);
+
+	  if (err != 0)
+	    error (EXIT_FAILURE, err, "cannot join thread");
+	}
+
+      /* We are done.  */
+      if (progress)
+	write (STDERR_FILENO, "\n", 1);
+
+      if (timing)
+	{
+	  struct timespec end_time;
+
+	  if (clock_gettime (cl, &end_time) == 0)
+	    {
+	      end_time.tv_sec -= start_time.tv_sec;
+	      end_time.tv_nsec -= start_time.tv_nsec;
+	      if (end_time.tv_nsec < 0)
+		{
+		  end_time.tv_nsec += 1000000000;
+		  --end_time.tv_sec;
+		}
+
+	      printf ("\nRuntime: %lu.%09lu seconds\n",
+		      (unsigned long int) end_time.tv_sec,
+		      (unsigned long int) end_time.tv_nsec);
+	    }
+	}
+
+      printf ("Result: %08x\n", sum);
+
+      exit (0);
+    }
+
+  /* NOTREACHED */
+  return 0;
+}
+
+
+/* Handle program arguments.  */
+static error_t
+parse_opt (int key, char *arg, struct argp_state *state)
+{
+  unsigned long int num;
+  long int snum;
+
+  switch (key)
+    {
+    case 't':
+      num = strtoul (arg, NULL, 0);
+      if (num < MAX_THREADS)
+	threads = num;
+      else
+	printf ("\
+number of threads limited to %u; recompile with a higher limit if necessary",
+		MAX_THREADS);
+      break;
+
+    case 'w':
+      num = strtoul (arg, NULL, 0);
+      if (num <= 100)
+	workload = num;
+      else
+	puts ("workload must be between 0 and 100 percent");
+      break;
+
+    case 'c':
+      workcost = strtoul (arg, NULL, 0);
+      break;
+
+    case 'r':
+      rounds = strtoul (arg, NULL, 0);
+      break;
+
+    case 's':
+      starts = strtoul (arg, NULL, 0);
+      break;
+
+    case 'S':
+      num = strtoul (arg, NULL, 0);
+      if (num >= PTHREAD_STACK_MIN)
+	stacksize = num;
+      else
+	printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN);
+      break;
+
+    case 'g':
+      snum = strtol (arg, NULL, 0);
+      if (snum < 0)
+	printf ("invalid guard size %s\n", arg);
+      else
+	guardsize = snum;
+      break;
+
+    case 'p':
+      progress = true;
+      break;
+
+    case 'T':
+      timing = true;
+      break;
+
+    case OPT_TO_THREAD:
+      to_thread = true;
+      break;
+
+    case OPT_TO_PROCESS:
+      to_thread = false;
+      break;
+
+    case OPT_SYNC_SIGNAL:
+      sync_method = sync_signal;
+      break;
+
+    case OPT_SYNC_JOIN:
+      sync_method = sync_join;
+      break;
+
+    case OPT_TOPLEVEL:
+      num = strtoul (arg, NULL, 0);
+      if (num < MAX_THREADS)
+	toplevel = num;
+      else
+	printf ("\
+number of threads limited to %u; recompile with a higher limit if necessary",
+		MAX_THREADS);
+      sync_method = sync_join;
+      break;
+
+    default:
+      return ARGP_ERR_UNKNOWN;
+    }
+
+  return 0;
+}
+
+
+static hp_timing_t
+get_clockfreq (void)
+{
+  /* We read the information from the /proc filesystem.  It contains at
+     least one line like
+	cpu MHz         : 497.840237
+     or also
+	cpu MHz         : 497.841
+     We search for this line and convert the number in an integer.  */
+  static hp_timing_t result;
+  int fd;
+
+  /* If this function was called before, we know the result.  */
+  if (result != 0)
+    return result;
+
+  fd = open ("/proc/cpuinfo", O_RDONLY);
+  if (__builtin_expect (fd != -1, 1))
+    {
+      /* XXX AFAIK the /proc filesystem can generate "files" only up
+         to a size of 4096 bytes.  */
+      char buf[4096];
+      ssize_t n;
+
+      n = read (fd, buf, sizeof buf);
+      if (__builtin_expect (n, 1) > 0)
+	{
+	  char *mhz = memmem (buf, n, "cpu MHz", 7);
+
+	  if (__builtin_expect (mhz != NULL, 1))
+	    {
+	      char *endp = buf + n;
+	      int seen_decpoint = 0;
+	      int ndigits = 0;
+
+	      /* Search for the beginning of the string.  */
+	      while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
+		++mhz;
+
+	      while (mhz < endp && *mhz != '\n')
+		{
+		  if (*mhz >= '0' && *mhz <= '9')
+		    {
+		      result *= 10;
+		      result += *mhz - '0';
+		      if (seen_decpoint)
+			++ndigits;
+		    }
+		  else if (*mhz == '.')
+		    seen_decpoint = 1;
+
+		  ++mhz;
+		}
+
+	      /* Compensate for missing digits at the end.  */
+	      while (ndigits++ < 6)
+		result *= 10;
+	    }
+	}
+
+      close (fd);
+    }
+
+  return result;
+}
+
+
+int
+clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
+{
+  /* We don't allow any process ID but our own.  */
+  if (pid != 0 && pid != getpid ())
+    return EPERM;
+
+#ifdef CLOCK_PROCESS_CPUTIME_ID
+  /* Store the number.  */
+  *clock_id = CLOCK_PROCESS_CPUTIME_ID;
+
+  return 0;
+#else
+  /* We don't have a timer for that.  */
+  return ENOENT;
+#endif
+}
+
+
+#define HP_TIMING_NOW(Var)	__asm__ __volatile__ ("rdtsc" : "=A" (Var))
+
+/* Get current value of CLOCK and store it in TP.  */
+int
+clock_gettime (clockid_t clock_id, struct timespec *tp)
+{
+  int retval = -1;
+
+  switch (clock_id)
+    {
+    case CLOCK_PROCESS_CPUTIME_ID:
+      {
+
+	static hp_timing_t freq;
+	hp_timing_t tsc;
+
+	/* Get the current counter.  */
+	HP_TIMING_NOW (tsc);
+
+	if (freq == 0)
+	  {
+	    freq = get_clockfreq ();
+	    if (freq == 0)
+	      return EINVAL;
+	  }
+
+	/* Compute the seconds.  */
+	tp->tv_sec = tsc / freq;
+
+	/* And the nanoseconds.  This computation should be stable until
+	   we get machines with about 16GHz frequency.  */
+	tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;
+
+	retval = 0;
+      }
+    break;
+
+    default:
+      errno = EINVAL;
+      break;
+    }
+
+  return retval;
+}