about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--README.md17
-rw-r--r--ministat.112
-rw-r--r--ministat.c175
3 files changed, 127 insertions, 77 deletions
diff --git a/README.md b/README.md
index 9b94bf8..a9685cc 100644
--- a/README.md
+++ b/README.md
@@ -15,14 +15,15 @@ There should be no dependencies besides the standard libraries and a functional
 ## Usage
 The FreeBSD man page is very relevant, pursue it [here](http://www.freebsd.org/cgi/man.cgi?ministat).
 
-	Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-ns] [-w width] [file [file ...]]
-		confidence = {80%, 90%, 95%, 98%, 99%, 99.5%}
-		-C : column number to extract (starts and defaults to 1)
-		-d : delimiter(s) string, default to " \t"
-		-n : print summary statistics only, no graph/test
-		-q : print summary statistics and test only, no graph
-		-s : print avg/median/stddev bars on separate lines
-		-w : width of graph/test output (default 74 or terminal width)
+        Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-Anqs] [-w width] [file [file ...]]
+                confidence = {80%, 90%, 95%, 98%, 99%, 99.5%}
+                -A : print statistics only. suppress the graph.
+                -C : column number to extract (starts and defaults to 1)
+                -d : delimiter(s) string, default to " \t"
+                -n : print summary statistics only, no graph/test
+                -q : suppress printing summary-statistics headers and data-set names
+                -s : print avg/median/stddev bars on separate lines
+                -w : width of graph/test output (default 74 or terminal width)
 
 ## Example
 From the FreeBSD [man page](http://www.freebsd.org/cgi/man.cgi?ministat)
diff --git a/ministat.1 b/ministat.1
index ea31c23..cef7caa 100644
--- a/ministat.1
+++ b/ministat.1
@@ -23,8 +23,6 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.\" $FreeBSD$
-.\"
 .Dd November 10, 2012
 .Dt MINISTAT 1
 .Os
@@ -33,7 +31,7 @@
 .Nd statistics utility
 .Sh SYNOPSIS
 .Nm
-.Op Fl Ans
+.Op Fl Anqs
 .Op Fl C Ar column
 .Op Fl c Ar confidence_level
 .Op Fl d Ar delimiter
@@ -53,6 +51,10 @@ suppress the ASCII-art plot.
 .It Fl n
 Just report the raw statistics of the input, suppress the ASCII-art plot
 and the relative comparisons.
+.It Fl q
+Suppress printing of summary statistics and data-set names; typically for use
+alongside
+.Fl n .
 .It Fl s
 Print the average/median/stddev bars on separate lines in the ASCII-art
 plot, to avoid overlap.
@@ -68,7 +70,9 @@ See
 .Xr strtok 3
 for details.
 .It Fl w Ar width
-Width of ASCII-art plot in characters, default is 74.
+Width of ASCII-art plot in characters.
+The default is the terminal width, or 74 if standard output is not a
+terminal.
 .El
 .Pp
 A sample output could look like this:
diff --git a/ministat.c b/ministat.c
index 198bf53..c30a5f9 100644
--- a/ministat.c
+++ b/ministat.c
@@ -1,4 +1,6 @@
-/*
+/*-
+ * SPDX-License-Identifier: Beerware
+ *
  * ----------------------------------------------------------------------------
  * "THE BEER-WARE LICENSE" (Revision 42):
  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
@@ -8,19 +10,22 @@
  *
  */
 
-#include <stdio.h>
-#include <math.h>
+#include <assert.h>
+#include <ctype.h>
 #include <err.h>
-#include <string.h>
+#include <errno.h>
+#include <math.h>
+#include <stdio.h>
 #include <stdlib.h>
-#include <unistd.h>
+#include <string.h>
 #include <sys/ioctl.h>
+#include <unistd.h>
 #include "queue.h"
 
 #define NSTUDENT 100
 #define NCONF 6
 static double const studentpct[] = { 80, 90, 95, 98, 99, 99.5 };
-static double student[NSTUDENT + 1][NCONF] = {
+static double const student[NSTUDENT + 1][NCONF] = {
 /* inf */	{	1.282,	1.645,	1.960,	2.326,	2.576,	3.090  },
 /* 1. */	{	3.078,	6.314,	12.706,	31.821,	63.657,	318.313  },
 /* 2. */	{	1.886,	2.920,	4.303,	6.965,	9.925,	22.327  },
@@ -130,9 +135,9 @@ static char symbol[MAX_DS] = { ' ', 'x', '+', '*', '%', '#', '@', 'O' };
 struct dataset {
 	char *name;
 	double	*points;
-	unsigned lpoints;
+	size_t lpoints;
 	double sy, syy;
-	unsigned n;
+	size_t n;
 };
 
 static struct dataset *
@@ -141,8 +146,11 @@ NewSet(void)
 	struct dataset *ds;
 
 	ds = calloc(1, sizeof *ds);
+	assert(ds != NULL);
 	ds->lpoints = 100000;
-	ds->points = calloc(sizeof *ds->points, ds->lpoints);
+	ds->points = calloc(ds->lpoints, sizeof(*ds->points));
+	assert(ds->points != NULL);
+	ds->syy = NAN;
 	return(ds);
 }
 
@@ -154,48 +162,59 @@ AddPoint(struct dataset *ds, double a)
 	if (ds->n >= ds->lpoints) {
 		dp = ds->points;
 		ds->lpoints *= 4;
-		ds->points = calloc(sizeof *ds->points, ds->lpoints);
+		ds->points = calloc(ds->lpoints, sizeof(*ds->points));
+		assert(ds->points != NULL);
 		memcpy(ds->points, dp, sizeof *dp * ds->n);
 		free(dp);
 	}
 	ds->points[ds->n++] = a;
 	ds->sy += a;
-	ds->syy += a * a;
 }
 
 static double
-Min(struct dataset *ds)
+Min(const struct dataset *ds)
 {
 
 	return (ds->points[0]);
 }
 
 static double
-Max(struct dataset *ds)
+Max(const struct dataset *ds)
 {
 
 	return (ds->points[ds->n -1]);
 }
 
 static double
-Avg(struct dataset *ds)
+Avg(const struct dataset *ds)
 {
 
 	return(ds->sy / ds->n);
 }
 
 static double
-Median(struct dataset *ds)
+Median(const struct dataset *ds)
 {
+	const size_t m = ds->n / 2;
 
-	return (ds->points[ds->n / 2]);
+	if ((ds->n % 2) == 0)
+		return ((ds->points[m] + (ds->points[m - 1])) / 2);
+	return (ds->points[m]);
 }
 
 static double
 Var(struct dataset *ds)
 {
+	size_t z;
+	const double a = Avg(ds);
+
+	if (isnan(ds->syy)) {
+		ds->syy = 0.0;
+		for (z = 0; z < ds->n; z++)
+			ds->syy += (ds->points[z] - a) * (ds->points[z] - a);
+	}
 
-	return (ds->syy - ds->sy * ds->sy / ds->n) / (ds->n - 1.0);
+	return (ds->syy / (ds->n - 1.0));
 }
 
 static double
@@ -216,7 +235,7 @@ static void
 Vitals(struct dataset *ds, int flag)
 {
 
-	printf("%c %3d %13.8g %13.8g %13.8g %13.8g %13.8g", symbol[flag],
+	printf("%c %3zu %13.8g %13.8g %13.8g %13.8g %13.8g", symbol[flag],
 	    ds->n, Min(ds), Max(ds), Median(ds), Avg(ds), Stddev(ds));
 	printf("\n");
 }
@@ -225,13 +244,14 @@ static void
 Relative(struct dataset *ds, struct dataset *rs, int confidx)
 {
 	double spool, s, d, e, t;
-	int i;
+	double re;
+	size_t z;
 
-	i = ds->n + rs->n - 2;
-	if (i > NSTUDENT)
+	z = ds->n + rs->n - 2;
+	if (z > NSTUDENT)
 		t = student[0][confidx];
 	else
-		t = student[i][confidx];
+		t = student[z][confidx];
 	spool = (ds->n - 1) * Var(ds) + (rs->n - 1) * Var(rs);
 	spool /= ds->n + rs->n - 2;
 	spool = sqrt(spool);
@@ -239,11 +259,15 @@ Relative(struct dataset *ds, struct dataset *rs, int confidx)
 	d = Avg(ds) - Avg(rs);
 	e = t * s;
 
+	re = (ds->n - 1) * Var(ds) + (rs->n - 1) * Var(rs) *
+	    (Avg(ds) * Avg(ds)) / (Avg(rs) * Avg(rs));
+	re *= (ds->n + rs->n) / (ds->n * rs->n * (ds->n + rs->n - 2.0));
+	re = t * sqrt(re);
+
 	if (fabs(d) > e) {
-	
 		printf("Difference at %.1f%% confidence\n", studentpct[confidx]);
 		printf("	%g +/- %g\n", d, e);
-		printf("	%g%% +/- %g%%\n", d * 100 / Avg(rs), e * 100 / Avg(rs));
+		printf("	%g%% +/- %g%%\n", d * 100 / Avg(rs), re * 100 / Avg(rs));
 		printf("	(Student's t, pooled s = %g)\n", spool);
 	} else {
 		printf("No difference proven at %.1f%% confidence\n",
@@ -258,7 +282,7 @@ struct plot {
 	int		width;
 
 	double		x0, dx;
-	int		height;
+	size_t		height;
 	char		*data;
 	char		**bar;
 	int		separate_bars;
@@ -311,9 +335,11 @@ static void
 PlotSet(struct dataset *ds, int val)
 {
 	struct plot *pl;
-	int i, j, m, x;
-	unsigned n;
+	int i, x;
+	size_t m, j, z;
+	size_t n;
 	int bar;
+	double av, sd;
 
 	pl = &plot;
 	if (pl->span == 0)
@@ -325,17 +351,20 @@ PlotSet(struct dataset *ds, int val)
 		bar = 0;
 
 	if (pl->bar == NULL) {
-		pl->bar = malloc(sizeof(char *) * pl->num_datasets);
-		memset(pl->bar, 0, sizeof(char*) * pl->num_datasets);
+		pl->bar = calloc(pl->num_datasets, sizeof(char *));
+		assert(pl->bar != NULL);
 	}
+
 	if (pl->bar[bar] == NULL) {
 		pl->bar[bar] = malloc(pl->width);
+		assert(pl->bar[bar] != NULL);
 		memset(pl->bar[bar], 0, pl->width);
 	}
-	
+
 	m = 1;
 	i = -1;
 	j = 0;
+	/* Set m to max(j) + 1, to allocate required memory */
 	for (n = 0; n < ds->n; n++) {
 		x = (ds->points[n] - pl->x0) / pl->dx;
 		if (x == i) {
@@ -350,6 +379,7 @@ PlotSet(struct dataset *ds, int val)
 	m += 1;
 	if (m > pl->height) {
 		pl->data = realloc(pl->data, pl->width * m);
+		assert(pl->data != NULL);
 		memset(pl->data + pl->height * pl->width, 0,
 		    (m - pl->height) * pl->width);
 	}
@@ -365,18 +395,20 @@ PlotSet(struct dataset *ds, int val)
 		}
 		pl->data[j * pl->width + x] |= val;
 	}
-	if (!isnan(Stddev(ds))) {
-		x = ((Avg(ds) - Stddev(ds)) - pl->x0) / pl->dx;
-		m = ((Avg(ds) + Stddev(ds)) - pl->x0) / pl->dx;
+	av = Avg(ds);
+	sd = Stddev(ds);
+	if (!isnan(sd)) {
+		x = ((av - sd) - pl->x0) / pl->dx;
+		m = ((av + sd) - pl->x0) / pl->dx;
 		pl->bar[bar][m] = '|';
 		pl->bar[bar][x] = '|';
-		for (i = x + 1; i < m; i++)
-			if (pl->bar[bar][i] == 0)
-				pl->bar[bar][i] = '_';
+		for (z = x + 1; z < m; z++)
+			if (pl->bar[bar][z] == 0)
+				pl->bar[bar][z] = '_';
 	}
 	x = (Median(ds) - pl->x0) / pl->dx;
 	pl->bar[bar][x] = 'M';
-	x = (Avg(ds) - pl->x0) / pl->dx;
+	x = (av - pl->x0) / pl->dx;
 	pl->bar[bar][x] = 'A';
 }
 
@@ -385,6 +417,7 @@ DumpPlot(void)
 {
 	struct plot *pl;
 	int i, j, k;
+	size_t z;
 
 	pl = &plot;
 	if (pl->span == 0) {
@@ -397,10 +430,10 @@ DumpPlot(void)
 		putchar('-');
 	putchar('+');
 	putchar('\n');
-	for (i = 1; i < pl->height; i++) {
+	for (z = 1; z < pl->height; z++) {
 		putchar('|');
 		for (j = 0; j < pl->width; j++) {
-			k = pl->data[(pl->height - i) * pl->width + j];
+			k = pl->data[(pl->height - z) * pl->width + j];
 			if (k >= 0 && k < MAX_DS)
 				putchar(symbol[k]);
 			else
@@ -444,35 +477,24 @@ dbl_cmp(const void *a, const void *b)
 }
 
 static struct dataset *
-ReadSet(const char *n, int column, const char *delim)
+ReadSet(FILE *f, const char *n, int column, const char *delim)
 {
-	FILE *f;
 	char buf[BUFSIZ], *p, *t;
 	struct dataset *s;
 	double d;
 	int line;
 	int i;
 
-	if (n == NULL) {
-		f = stdin;
-		n = "<stdin>";
-	} else if (!strcmp(n, "-")) {
-		f = stdin;
-		n = "<stdin>";
-	} else {
-		f = fopen(n, "r");
-	}
-	if (f == NULL)
-		err(1, "Cannot open %s", n);
 	s = NewSet();
 	s->name = strdup(n);
+	assert(s->name != NULL);
 	line = 0;
 	while (fgets(buf, sizeof buf, f) != NULL) {
 		line++;
 
 		i = strlen(buf);
-		if (buf[i-1] == '\n')
-			buf[i-1] = '\0';
+		while (i > 0 && isspace(buf[i - 1]))
+			buf[--i] = '\0';
 		for (i = 1, t = strtok(buf, delim);
 		     t != NULL && *t != '#';
 		     i++, t = strtok(NULL, delim)) {
@@ -484,11 +506,10 @@ ReadSet(const char *n, int column, const char *delim)
 
 		d = strtod(t, &p);
 		if (p != NULL && *p != '\0')
-			err(2, "Invalid data on line %d in %s\n", line, n);
+			errx(2, "Invalid data on line %d in %s", line, n);
 		if (*buf != '\0')
 			AddPoint(s, d);
 	}
-	fclose(f);
 	if (s->n < 3) {
 		fprintf(stderr,
 		    "Dataset %s must contain at least 3 data points\n", n);
@@ -505,7 +526,7 @@ usage(char const *whine)
 
 	fprintf(stderr, "%s\n", whine);
 	fprintf(stderr,
-	    "Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-Ans] [-w width] [file [file ...]]\n");
+	    "Usage: ministat [-C column] [-c confidence] [-d delimiter(s)] [-Anqs] [-w width] [file [file ...]]\n");
 	fprintf(stderr, "\tconfidence = {");
 	for (i = 0; i < NCONF; i++) {
 		fprintf(stderr, "%s%g%%",
@@ -517,6 +538,7 @@ usage(char const *whine)
 	fprintf(stderr, "\t-C : column number to extract (starts and defaults to 1)\n");
 	fprintf(stderr, "\t-d : delimiter(s) string, default to \" \\t\"\n");
 	fprintf(stderr, "\t-n : print summary statistics only, no graph/test\n");
+	fprintf(stderr, "\t-q : suppress printing summary-statistics headers and data-set names\n");
 	fprintf(stderr, "\t-s : print avg/median/stddev bars on separate lines\n");
 	fprintf(stderr, "\t-w : width of graph/test output (default 74 or terminal width)\n");
 	exit (2);
@@ -525,7 +547,9 @@ usage(char const *whine)
 int
 main(int argc, char **argv)
 {
-	struct dataset *ds[7];
+	const char *setfilenames[MAX_DS - 1];
+	struct dataset *ds[MAX_DS - 1];
+	FILE *setfiles[MAX_DS - 1];
 	int nds;
 	double a;
 	const char *delim = " \t";
@@ -534,6 +558,7 @@ main(int argc, char **argv)
 	int column = 1;
 	int flag_s = 0;
 	int flag_n = 0;
+	int flag_q = 0;
 	int termwidth = 74;
 	int suppress_plot = 0;
 
@@ -548,7 +573,7 @@ main(int argc, char **argv)
 	}
 
 	ci = -1;
-	while ((c = getopt(argc, argv, "AC:c:d:snw:")) != -1)
+	while ((c = getopt(argc, argv, "AC:c:d:snqw:")) != -1)
 		switch (c) {
 		case 'A':
 			suppress_plot = 1;
@@ -578,6 +603,9 @@ main(int argc, char **argv)
 		case 'n':
 			flag_n = 1;
 			break;
+		case 'q':
+			flag_q = 1;
+			break;
 		case 's':
 			flag_s = 1;
 			break;
@@ -598,18 +626,34 @@ main(int argc, char **argv)
 	argv += optind;
 
 	if (argc == 0) {
-		ds[0] = ReadSet("-", column, delim);
+		setfilenames[0] = "<stdin>";
+		setfiles[0] = stdin;
 		nds = 1;
 	} else {
 		if (argc > (MAX_DS - 1))
 			usage("Too many datasets.");
 		nds = argc;
-		for (i = 0; i < nds; i++)
-			ds[i] = ReadSet(argv[i], column, delim);
+		for (i = 0; i < nds; i++) {
+			setfilenames[i] = argv[i];
+			if (!strcmp(argv[i], "-"))
+				setfiles[0] = stdin;
+			else
+				setfiles[i] = fopen(argv[i], "r");
+			if (setfiles[i] == NULL)
+				err(2, "Cannot open %s", argv[i]);
+		}
+	}
+
+	for (i = 0; i < nds; i++) {
+		ds[i] = ReadSet(setfiles[i], setfilenames[i], column, delim);
+		if (setfiles[i] != stdin)
+			fclose(setfiles[i]);
 	}
 
-	for (i = 0; i < nds; i++) 
-		printf("%c %s\n", symbol[i+1], ds[i]->name);
+	if (!flag_q) {
+		for (i = 0; i < nds; i++)
+			printf("%c %s\n", symbol[i+1], ds[i]->name);
+	}
 
 	if (!flag_n && !suppress_plot) {
 		SetupPlot(termwidth, flag_s, nds);
@@ -619,7 +663,8 @@ main(int argc, char **argv)
 			PlotSet(ds[i], i + 1);
 		DumpPlot();
 	}
-	VitalsHead();
+	if (!flag_q)
+		VitalsHead();
 	Vitals(ds[0], 1);
 	for (i = 1; i < nds; i++) {
 		Vitals(ds[i], i + 1);