diff options
author | Leah Neukirchen <leah@vuxu.org> | 2020-05-07 22:16:57 +0200 |
---|---|---|
committer | Leah Neukirchen <leah@vuxu.org> | 2020-05-07 22:16:57 +0200 |
commit | d98c6e6bcb48bc7c910fe06dd9d94f2868bb1afb (patch) | |
tree | b4708c3f62b0958c7acc7a4ae8f643653a3ec1ba | |
download | hittpd-d98c6e6bcb48bc7c910fe06dd9d94f2868bb1afb.tar.gz hittpd-d98c6e6bcb48bc7c910fe06dd9d94f2868bb1afb.tar.xz hittpd-d98c6e6bcb48bc7c910fe06dd9d94f2868bb1afb.zip |
initial commit
-rw-r--r-- | LICENSE | 19 | ||||
-rw-r--r-- | Makefile | 27 | ||||
-rw-r--r-- | hittpd.c | 967 | ||||
-rw-r--r-- | http-parser/.gitignore | 30 | ||||
-rw-r--r-- | http-parser/.mailmap | 8 | ||||
-rw-r--r-- | http-parser/.travis.yml | 13 | ||||
-rw-r--r-- | http-parser/AUTHORS | 68 | ||||
-rw-r--r-- | http-parser/LICENSE-MIT | 19 | ||||
-rw-r--r-- | http-parser/Makefile | 160 | ||||
-rw-r--r-- | http-parser/README.md | 246 | ||||
-rw-r--r-- | http-parser/bench.c | 128 | ||||
-rw-r--r-- | http-parser/contrib/parsertrace.c | 157 | ||||
-rw-r--r-- | http-parser/contrib/url_parser.c | 47 | ||||
-rw-r--r-- | http-parser/fuzzers/fuzz_parser.c | 26 | ||||
-rw-r--r-- | http-parser/fuzzers/fuzz_url.c | 14 | ||||
-rw-r--r-- | http-parser/http_parser.c | 2568 | ||||
-rw-r--r-- | http-parser/http_parser.gyp | 111 | ||||
-rw-r--r-- | http-parser/http_parser.h | 445 | ||||
-rw-r--r-- | http-parser/test.c | 4600 |
19 files changed, 9653 insertions, 0 deletions
diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..20471fb --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +Copyright 2020 Leah Neukirchen <leah@vuxu.org> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..fbe0a26 --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +ALL=hittpd +OBJ=hittpd.o http-parser/http_parser.o + +CFLAGS=-g -O2 -Wall -Wno-switch -Wextra -Wwrite-strings +CPPFLAGS=-DUSE_SENDFILE -Ihttp-parser + +DESTDIR= +PREFIX=/usr/local +BINDIR=$(PREFIX)/bin +MANDIR=$(PREFIX)/share/man + +hittpd: $(OBJ) + +all: $(ALL) + +clean: FRC + rm -f $(ALL) $(OBJ) + +install: FRC all + mkdir -p $(DESTDIR)$(BINDIR) $(DESTDIR)$(MANDIR)/man8 + install -m0755 $(ALL) $(DESTDIR)$(BINDIR) + install -m0644 $(ALL:=.1) $(DESTDIR)$(MANDIR)/man8 + +README: hittpd.8 + mandoc -Tutf8 $< | col -bx >$@ + +FRC: diff --git a/hittpd.c b/hittpd.c new file mode 100644 index 0000000..73c7f73 --- /dev/null +++ b/hittpd.c @@ -0,0 +1,967 @@ +/* hittpd - efficient, no-frills HTTP 1.1 daemon */ + +/* Copyright 2020 Leah Neukirchen <leah@vuxu.org> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#define TIMEOUT 60 + +#define _XOPEN_SOURCE 700 +#define _DEFAULT_SOURCE + +#ifdef USE_SENDFILE +#include <sys/sendfile.h> +#endif +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include <ctype.h> +#include <dirent.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <netdb.h> +#include <netinet/in.h> +#include <poll.h> +#include <pwd.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <time.h> +#include <unistd.h> + +#include "http_parser.h" + +struct conn_data { + enum { NONE, HOST, IMS, RANGE, OTHER, BAD_REQUEST, SENDING } state; + char *host; + char *ims; + char *path; + int fd; + + /* state needed: + - if serving file: fd, range and offset + - if serving string: string, range and offset + */ + + off_t off, first, last; + int stream_fd; + char *buf; + + time_t deadline; +}; + +char mimetypes[] = + ":.html=text/html" + ":.htm=text/html" + ":.gif=image/gif" + ":.txt=text/plain"; + +char default_mimetype[] = "text/plain"; // "application/octet-stream" + +char wwwroot[] = "/tmp"; +char default_vhost[] = "_default"; + +int tilde = 0; +int vhost = 0; + +static int +on_url(http_parser *p, const char *s, size_t l) +{ + struct conn_data *data = p->data; + + if (l == 0) + return 1; + + char *path = malloc(l + 1); + if (!path) + return 1; + + char *t = path; + + // XXX move decoding below, to not show up in access log + + for (size_t i = 0; i < l; i++) { + if (s[i] == '%') { + char c1 = s[i+1]; + + if (c1 >= '0' && c1 <= '9') + c1 = c1 - '0'; + else if (c1 >= 'A' && c1 <= 'F') + c1 = c1 - 'A' + 10; + else if (c1 >= 'a' && c1 <= 'f') + c1 = c1 - 'a' + 10; + else { + data->state = BAD_REQUEST; + return 0; + } + + char c2 = s[i+2]; + + if (c2 >= '0' && c2 <= '9') + c2 = c2 - '0'; + else if (c2 >= 'A' && c2 <= 'F') + c2 = c2 - 'A' + 10; + else if (c2 >= 'a' && c2 <= 'f') + c2 = c2 - 'a' + 10; + else { + data->state = BAD_REQUEST; + return 0; + } + + char d = (c1 << 4) | c2; + + if (d == 0 || d == '/') + data->state = BAD_REQUEST; + + *t++ = d; + i += 2; + } else if (s[i] == 0) { + data->state = BAD_REQUEST; + } else { + *t++ = s[i]; + } + } + *t = 0; + + data->path = path; + return 0; +} + +static int +on_header_field(http_parser *p, const char *s, size_t l) +{ + struct conn_data *data = p->data; + + if (data->state == BAD_REQUEST) + return 0; + + if (l == 4 && strncasecmp(s, "host", l) == 0) + data->state = HOST; + else if (l == 17 && strncasecmp(s, "if-modified-since", l) == 0) + data->state = IMS; + else if (l == 5 && strncasecmp(s, "range", l) == 0) + data->state = RANGE; + else + data->state = OTHER; // ignore others + + return 0; +} + +void +parse_range(struct conn_data *data, const char *s, size_t l) +{ + long n; + + if (sscanf(s, "bytes=%lu-%lu", &(data->first), &(data->last)) == 2) { + data->last++; // range counts inclusive + return; + } else if (sscanf(s, "bytes=-%lu", &n) == 1 && n > 0) { + data->first = -n; + data->last = -1; + return; + } else if (sscanf(s, "bytes=%lu-", &(data->first)) == 1 && s[l-1] == '-') { + data->last = -1; + return; + } else { + data->first = data->last = -666; + } +} + +static int +on_header_value(http_parser *p, const char *s, size_t l) +{ + struct conn_data *data = p->data; + + if (data->state == HOST) + data->host = strndup(s, l); + else if (data->state == IMS) + data->ims = strndup(s, l); + else if (data->state == RANGE) + parse_range(data, s, l); + + // ignore others + + return 0; +} + +void +httpdate(time_t t, char *buf) +{ + strftime(buf, 64, "%a, %d %b %Y %H:%M:%S %Z", gmtime(&t)); +} + +static time_t +parse_http_date(char *s) +{ + struct tm tm; + + if (strlen(s) != 29) + return 0; + + if (!strptime(s, "%a, %d %b %Y %T GMT", &tm)) + return 0; + + return timegm(&tm); +} + + +const char * +peername(http_parser *p) +{ + struct conn_data *data = p->data; + + struct sockaddr_storage ss; + socklen_t slen = sizeof ss; + static char addrbuf[NI_MAXHOST]; + + if (getpeername(data->fd, (struct sockaddr *)(void *)&ss, &slen) < 0) + return "0.0.0.0"; + if (getnameinfo((struct sockaddr *)(void *)&ss, slen, + addrbuf, sizeof addrbuf, 0, 0, NI_NUMERICHOST) < 0) + return "0.0.0.0"; + + if (strncmp("::ffff:", addrbuf, 7) == 0) + return addrbuf + 7; + + return addrbuf; +} + +void +accesslog(http_parser *p, int status) +{ + struct conn_data *data = p->data; + + char buf[64]; + time_t t = time(0); + strftime(buf, 64, "[%d/%b/%Y:%H:%M:%S %z]", localtime(&t)); + +// REMOTEHOST - - [DD/MON/YYYY:HH:MM:SS -TZ] "METHOD PATH" STATUS BYTES +// ? REFERER USER_AGENT + printf("%s - - %s \"%s %s\" %d %ld\n", + peername(p), + buf, + http_method_str(p->method), + data->path, + status, + p->method == HTTP_HEAD ? 0 : data->last - data->first); +} + +void +send_dir_redirect(http_parser *p) +{ + struct conn_data *data = p->data; + char buf[512]; + + char now[64]; + httpdate(time(0), now); + + int len = snprintf(buf, sizeof buf, + "HTTP/1.%d 301 Moved Permanently\r\n" + "Content-Length: 0\r\n" + "Date: %s\r\n" + "Location: %s/\r\n" + "\r\n", + p->http_minor, + now, + data->path); + + // XXX include redirect link? + + write(data->fd, buf, len); + accesslog(p, 301); +} + +void +send_not_modified(http_parser *p, time_t modified) +{ + struct conn_data *data = p->data; + char buf[512]; + + char now[64], lastmod[64]; + httpdate(time(0), now); + httpdate(modified, lastmod); + + int len = snprintf(buf, sizeof buf, + "HTTP/1.%d 304 Not Modified\r\n" + "Date: %s\r\n" + "Last-Modified: %s\r\n" + "\r\n", + p->http_minor, + now, + lastmod); + + write(data->fd, buf, len); + accesslog(p, 304); +} + +void +send_error(http_parser *p, int status, const char *msg) +{ + struct conn_data *data = p->data; + char buf[512]; + + char now[64]; + httpdate(time(0), now); + + int len = snprintf(buf, sizeof buf, + "HTTP/1.%d %d %s\r\n" + "Content-Length: %ld\r\n" + "Date: %s\r\n" + "\r\n", + p->http_minor, + status, msg, + 4 + strlen(msg) + 2, + now); + + if (p->method != HTTP_HEAD) + len += snprintf(buf + len, sizeof buf - len, + "%03d %s\r\n", + status, msg); + + write(data->fd, buf, len); + accesslog(p, status); +} + +void +send_rns(http_parser *p, off_t filesize) +{ + struct conn_data *data = p->data; + char buf[512]; + + char now[64]; + httpdate(time(0), now); + + int len = snprintf(buf, sizeof buf, + "HTTP/1.%d 416 Requested Range Not Satisfiable\r\n" + "Content-Length: 0\r\n" + "Date: %s\r\n" + "Content-Range: bytes */%ld\r\n" + "\r\n", + p->http_minor, + now, + filesize); + + data->first = data->last = 0; + + write(data->fd, buf, len); + accesslog(p, 416); +} + +void +print_urlencoded(FILE *stream, char *s) +{ + while (*s) + switch(*s) { + case ';': + case '/': + case '?': + case ':': + case '@': + case '=': + case '&': + case '"': + case '<': + case '>': + case '%': + escape: + fprintf(stream, "%%%02x", (unsigned char)*s++); + break; + default: + if (*s <= 32 || (unsigned char)*s >= 127) + goto escape; + fputc(*s++, stream); + } +} + +void +print_htmlencoded(FILE *stream, char *s) +{ + while (*s) + switch(*s) { + case '&': + case '"': + case '<': + case '>': + fprintf(stream, "&#x%x;", *s++); + break; + default: + fputc(*s++, stream); + } +} + +void +send_ok(http_parser *p, time_t modified, const char *mimetype, off_t filesize) +{ + struct conn_data *data = p->data; + char buf[512]; + + char now[64], lastmod[64]; + httpdate(time(0), now); + httpdate(modified, lastmod); + + int len; + + if (data->first == 0 && data->last == filesize) { + len = snprintf(buf, sizeof buf, + "HTTP/1.%d 200 OK\r\n" + "Content-Type: %s\r\n" + "Content-Length: %ld\r\n" + "Last-Modified: %s\r\n" + "Date: %s\r\n" + "\r\n", + p->http_minor, + mimetype, + data->last - data->first, + lastmod, + now); + + write(data->fd, buf, len); + accesslog(p, 200); + } else { + len = snprintf(buf, sizeof buf, + "HTTP/1.%d 206 Partial content\r\n" + "Content-Type: %s\r\n" + "Content-Length: %ld\r\n" + "Last-Modified: %s\r\n" + "Date: %s\r\n" + "Content-Range: bytes %ld-%ld/%ld\r\n" + "\r\n", + p->http_minor, + mimetype, + data->last - data->first, + lastmod, + now, + data->first, data->last - 1, filesize); + + write(data->fd, buf, len); + accesslog(p, 206); + } +} + +char * +mimetype(char *ext) +{ + static char type[16]; + + if (!ext) + return default_mimetype; + + char *x = strstr(mimetypes, ext); + + if (x && x[-1] == ':' && x[strlen(ext)] == '=') { + char *t = type; + for (char *c = x + strlen(ext) + 1; *c && *c != ':'; ) + *t++ = *c++; + *t = 0; + return type; + } + + return default_mimetype; +} + +static int +on_message_complete(http_parser *p) { + struct conn_data *data = p->data; + printf("complete. host: %s path: %s\n", data->host, data->path); + + if (data->state == BAD_REQUEST) { + data->state = SENDING; + send_error(p, 400, "Bad Request"); + return 0; + } + + data->state = SENDING; + + if (!(p->method == HTTP_GET || p->method == HTTP_HEAD)) { + send_error(p, 405, "Method Not Allowed"); + return 0; + } + + if (data->path[0] != '/' || strstr(data->path, "/../")) { + send_error(p, 403, "Forbidden"); + return 0; + } + + char name[PATH_MAX]; + + if (tilde && data->path[1] == '~' && data->path[2]) { + char *e = strchr(data->path + 1, '/'); + if (e) + *e = 0; + + struct passwd *pw = getpwnam(data->path + 2); + if (!pw || pw->pw_uid < 1000) { + send_error(p, 404, "Not Found"); + return 0; + } + +// snprintf(name, sizeof name, "%s/tmp/%s", + snprintf(name, sizeof name, "%s/public_html/%s", + pw->pw_dir, e ? e + 1 : ""); + + if (e) + *e = '/'; + } else if (vhost) { + char *host = data->host; + if (!host) { + host = default_vhost; + } else { + char *s = host; + for (; *s && *s != ':' && *s != '/'; s++) + *s = tolower(*s); + *s = 0; + } + if (strstr(host, "..")) { + send_error(p, 403, "Forbidden"); + return 0; + } + + struct stat dst; + snprintf(name, sizeof name, "%s/%s", wwwroot, host); + if (stat(name, &dst) < 0 || !S_ISDIR(dst.st_mode)) + host = default_vhost; + + snprintf(name, sizeof name, "%s/%s%s", + wwwroot, host, data->path); + } else { + snprintf(name, sizeof name, "%s%s", + wwwroot, data->path); + } + + int stream_fd = open(name, O_RDONLY); + + if (stream_fd < 0) { + if (errno == EACCES || errno == EPERM) + send_error(p, 403, "Forbidden"); + else if (errno == ENOENT || errno == ENOTDIR) + send_error(p, 404, "Not Found"); + else { + perror("open"); + send_error(p, 500, "Internal Server Error"); + } + return 0; + } + + struct stat st; + if (fstat(stream_fd, &st) < 0) { + send_error(p, 500, "Internal Server Error"); + return 0; + } + + if (S_ISDIR(st.st_mode)) { + int x; + if (data->path[strlen(data->path)-1] == '/' && + (x = openat(stream_fd, "index.html", O_RDONLY)) >= 0) { + close(stream_fd); + stream_fd = x; + if (fstat(stream_fd, &st) < 0) { + send_error(p, 500, "Internal Server Error"); + return 0; + } + goto file; + } + + close(stream_fd); + data->stream_fd = -1; + + if (data->path[strlen(data->path)-1] != '/') { + send_dir_redirect(p); + return 0; + } + + char *buf; + size_t len; + + FILE *stream = open_memstream(&buf, &len); + if (!stream) + return 1; + + + fprintf(stream, "<!doctype html><meta charset=\"utf-8\">" + "<title>Index of "); + print_htmlencoded(stream, data->path); + fprintf(stream, "</title>" + "<h1>Index of "); + print_htmlencoded(stream, data->path); + fprintf(stream, "</h1>\n<ul>\n"); + + struct dirent **namelist; + int n = scandir(name, &namelist, 0, alphasort); + + for (int i = 0; i < n; i++) { + if (namelist[i]->d_name[0] == '.' && + namelist[i]->d_name[1] == 0) + continue; + + fprintf(stream, "<li><a href=\""); + print_urlencoded(stream, namelist[i]->d_name); + fprintf(stream, "%s\">", + namelist[i]->d_type == DT_DIR ? "/" : ""); + print_htmlencoded(stream, namelist[i]->d_name); + fprintf(stream, "%s</a></li>\n", + namelist[i]->d_type == DT_DIR ? "/" : ""); + } + + while (n--) + free(namelist[n]); + free(namelist); + + fprintf(stream, "</ul>\n"); + fclose(stream); + + data->buf = buf; + data->first = 0; + data->last = len; + send_ok(p, time(0), "text/html", len); + + return 0; + } + +file: + if (data->ims) { + time_t t = parse_http_date(data->ims); + if (t >= st.st_mtime) { + send_not_modified(p, st.st_mtime); + return 0; + } + } + + data->stream_fd = stream_fd; + + char *ext = strrchr(data->path, '.'); + if (ext && strchr(ext, '/')) + ext = 0; + + if (data->first == -666 && data->last == -666) { + send_rns(p, st.st_size); + return 0; + } + + if (data->first < 0) + data->first = st.st_size + data->first; + if (data->last == -1) + data->last = st.st_size; + + + if (data->first > data->last) { + send_rns(p, st.st_size); + return 0; + } + + if (data->first < 0) + data->first = 0; + if (data->last > st.st_size) + data->last = st.st_size; + + send_ok(p, st.st_mtime, mimetype(ext), st.st_size); + + // XXX send short file directly? + + return 0; +} + +static http_parser_settings settings = { + .on_message_complete = on_message_complete, + .on_header_field = on_header_field, + .on_header_value = on_header_value, + .on_url = on_url, +}; + +#define OPEN_MAX 1024 + +struct pollfd client[OPEN_MAX]; +struct http_parser parsers[OPEN_MAX]; +struct conn_data datas[OPEN_MAX]; + +void +close_connection(int i) +{ + if (client[i].fd >= 0) + close(client[i].fd); + client[i].fd = -1; + + free(datas[i].buf); + free(datas[i].path); + free(datas[i].ims); + free(datas[i].host); + + datas[i] = (struct conn_data){ 0 }; +} + +void +finish_response(int i) +{ + if (datas[i].stream_fd >= 0) + close(datas[i].stream_fd); + datas[i].stream_fd = -1; + + free(datas[i].buf); + free(datas[i].path); + free(datas[i].ims); + free(datas[i].host); + + datas[i].buf = 0; + datas[i].path = 0; + datas[i].ims = 0; + datas[i].host = 0; + + client[i].events = POLLRDNORM; + + // HTTP 1.0 needs to close connection by server + // XXX unless explicit keep-alive is set + if (parsers[i].http_major == 1 && parsers[i].http_minor == 0) + close_connection(i); +} + +void +accept_client(int i, int fd) +{ + fcntl(fd, F_SETFL, O_NONBLOCK); + + client[i].fd = fd; + + http_parser_init(&parsers[i], HTTP_REQUEST); + datas[i] = (struct conn_data){ 0 }; + datas[i].fd = fd; + datas[i].stream_fd = -1; + datas[i].last = -1; + datas[i].deadline = time(0) + TIMEOUT; + + parsers[i].data = &datas[i]; + + client[i].events = POLLRDNORM; +} + +void +write_client(int i) +{ + struct conn_data *data = &datas[i]; + int sockfd = client[i].fd; + ssize_t w = 0; + + if (data->stream_fd >= 0) { +#ifndef USE_SENDFILE + char buf[16*4096]; + size_t n = pread(data->stream_fd, buf, sizeof buf, data->off); + if (n < 0) + ; // XXX + else if (n == 0) { + finish_response(i); + } else if (n > 0) { + w = write(sockfd, buf, n); + if (w > 0) + data->off += w; + } +#else + w = sendfile(sockfd, data->stream_fd, + &(data->off), data->last - data->off); + if (w == 0 || data->off == data->last) + finish_response(i); +#endif + } else if (data->buf) { + if (data->off == data->last) { + finish_response(i); + } else { + w = write(sockfd, data->buf, data->last - data->off); + if (w > 0) + data->off += w; + } + } else { + finish_response(i); + w = 0; + } + + if (w < 0) { + if (errno == EPIPE) + close_connection(i); + // XXX other error handling + } +} + +void +read_client(int i) +{ + struct conn_data *data = &datas[i]; + int sockfd = client[i].fd; + ssize_t n; + char buf[1024]; + + if ((n = read(sockfd, buf, sizeof buf)) < 0) { + if (errno == ECONNRESET) { + close_connection(i); + } else if (errno == EAGAIN) { + // try again + } else { + perror("read error"); + close_connection(i); + } + } else if (n == 0) { + close_connection(i); + } else { + http_parser_execute(&parsers[i], &settings, buf, n); + + if (parsers[i].http_errno) { + printf("err=%s\n", + http_errno_name(parsers[i].http_errno)); + close_connection(i); + } else { + // switch to write mode when needed + if (data->state == SENDING) { + client[i].events = POLLRDNORM | POLLWRNORM; + data->off = data->first; + + if (parsers[i].method == HTTP_HEAD) + finish_response(i); + } + } + + } +} + +int +main() +{ + int i, maxi, listenfd, sockfd; + int nready; + int r = 0; + + signal(SIGPIPE, SIG_IGN); + + struct sockaddr_in6 cliaddr, servaddr = { 0 }; + + listenfd = socket(AF_INET6, SOCK_STREAM, 0); + if (r < 0) { + perror("socket"); + exit(111); + } + + if (setsockopt(listenfd, SOL_SOCKET, SO_REUSEADDR, + &(int){1}, sizeof (int)) < 0) { + perror("setsockopt(SO_REUSEADDR)"); + exit(111); + } + + servaddr.sin6_family = AF_INET6; + servaddr.sin6_port = htons(8081); + servaddr.sin6_addr = in6addr_any; + + r = bind(listenfd, (struct sockaddr *)&servaddr, sizeof servaddr); + if (r < 0) + perror("bind"); + + errno = 0; + r = listen(listenfd, 32); + if (r < 0) + perror("listen"); + + client[0].fd = listenfd; + client[0].events = POLLRDNORM; + + for (i = 1; i < OPEN_MAX; i++) + client[i].fd = -1; /* -1 indicates available entry */ + + maxi = 0; /* max index into client[] array */ + + while (1) { + nready = poll(client, maxi + 1, maxi ? TIMEOUT*1000 : -1); + + time_t now = time(0); + + if (nready == 0) { + // clear timeouted + for (i = 1; i <= maxi; i++) + if (client[i].fd >= 0) + if (now > datas[i].deadline) + close_connection(i); + + // compress + int i = 1, j = maxi; + + while (i <= j) { + while (i <= maxi && client[i].fd >= 0) + i++; + + if (i <= maxi) { + while (j >= 1 && client[i].fd == -1) + j--; + + if (i < j) { + client[i] = client[j]; + datas[i] = datas[j]; + parsers[i] = parsers[j]; + parsers[i].data = &datas[i]; + + client[j].fd = -1; + + j--; + } + } + } + + maxi = j; + } + + if (client[0].revents & POLLRDNORM) { + /* new client connection */ + for (i = 1; i < OPEN_MAX; i++) + if (client[i].fd < 0) { + socklen_t clilen = sizeof cliaddr; + int connfd = accept(listenfd, + (struct sockaddr *)&cliaddr, &clilen); + accept_client(i, connfd); + break; + } + if (i == OPEN_MAX) + printf("too many clients\n"); + if (i > maxi) + maxi = i; /* max index in client[] array */ + if (--nready <= 0) + continue; /* no more readable descriptors */ + } + for (i = 1; i <= maxi; i++) { /* check all clients for data */ + if ((sockfd = client[i].fd) < 0) + continue; + + if (client[i].revents & POLLWRNORM) { + if (datas[i].state != SENDING) { + client[i].events = POLLRDNORM; + continue; + } + + write_client(i); + datas[i].deadline = now + TIMEOUT; + + if (--nready <= 0) + break; /* no more readable descriptors */ + } + else if (client[i].revents & (POLLRDNORM | POLLERR)) { + read_client(i); + datas[i].deadline = now + TIMEOUT; + + if (--nready <= 0) + break; /* no more readable descriptors */ + } + } + } +} diff --git a/http-parser/.gitignore b/http-parser/.gitignore new file mode 100644 index 0000000..c122e76 --- /dev/null +++ b/http-parser/.gitignore @@ -0,0 +1,30 @@ +/out/ +core +tags +*.o +test +test_g +test_fast +bench +url_parser +parsertrace +parsertrace_g +*.mk +*.Makefile +*.so.* +*.exe.* +*.exe +*.a + + +# Visual Studio uglies +*.suo +*.sln +*.vcxproj +*.vcxproj.filters +*.vcxproj.user +*.opensdf +*.ncrunchsolution* +*.sdf +*.vsp +*.psess diff --git a/http-parser/.mailmap b/http-parser/.mailmap new file mode 100644 index 0000000..278d141 --- /dev/null +++ b/http-parser/.mailmap @@ -0,0 +1,8 @@ +# update AUTHORS with: +# git log --all --reverse --format='%aN <%aE>' | perl -ne 'BEGIN{print "# Authors ordered by first contribution.\n"} print unless $h{$_}; $h{$_} = 1' > AUTHORS +Ryan Dahl <ry@tinyclouds.org> +Salman Haq <salman.haq@asti-usa.com> +Simon Zimmermann <simonz05@gmail.com> +Thomas LE ROUX <thomas@november-eleven.fr> LE ROUX Thomas <thomas@procheo.fr> +Thomas LE ROUX <thomas@november-eleven.fr> Thomas LE ROUX <thomas@procheo.fr> +Fedor Indutny <fedor@indutny.com> diff --git a/http-parser/.travis.yml b/http-parser/.travis.yml new file mode 100644 index 0000000..4b038e6 --- /dev/null +++ b/http-parser/.travis.yml @@ -0,0 +1,13 @@ +language: c + +compiler: + - clang + - gcc + +script: + - "make" + +notifications: + email: false + irc: + - "irc.freenode.net#node-ci" diff --git a/http-parser/AUTHORS b/http-parser/AUTHORS new file mode 100644 index 0000000..5323b68 --- /dev/null +++ b/http-parser/AUTHORS @@ -0,0 +1,68 @@ +# Authors ordered by first contribution. +Ryan Dahl <ry@tinyclouds.org> +Jeremy Hinegardner <jeremy@hinegardner.org> +Sergey Shepelev <temotor@gmail.com> +Joe Damato <ice799@gmail.com> +tomika <tomika_nospam@freemail.hu> +Phoenix Sol <phoenix@burninglabs.com> +Cliff Frey <cliff@meraki.com> +Ewen Cheslack-Postava <ewencp@cs.stanford.edu> +Santiago Gala <sgala@apache.org> +Tim Becker <tim.becker@syngenio.de> +Jeff Terrace <jterrace@gmail.com> +Ben Noordhuis <info@bnoordhuis.nl> +Nathan Rajlich <nathan@tootallnate.net> +Mark Nottingham <mnot@mnot.net> +Aman Gupta <aman@tmm1.net> +Tim Becker <tim.becker@kuriositaet.de> +Sean Cunningham <sean.cunningham@mandiant.com> +Peter Griess <pg@std.in> +Salman Haq <salman.haq@asti-usa.com> +Cliff Frey <clifffrey@gmail.com> +Jon Kolb <jon@b0g.us> +Fouad Mardini <f.mardini@gmail.com> +Paul Querna <pquerna@apache.org> +Felix Geisendörfer <felix@debuggable.com> +koichik <koichik@improvement.jp> +Andre Caron <andre.l.caron@gmail.com> +Ivo Raisr <ivosh@ivosh.net> +James McLaughlin <jamie@lacewing-project.org> +David Gwynne <loki@animata.net> +Thomas LE ROUX <thomas@november-eleven.fr> +Randy Rizun <rrizun@ortivawireless.com> +Andre Louis Caron <andre.louis.caron@usherbrooke.ca> +Simon Zimmermann <simonz05@gmail.com> +Erik Dubbelboer <erik@dubbelboer.com> +Martell Malone <martellmalone@gmail.com> +Bertrand Paquet <bpaquet@octo.com> +BogDan Vatra <bogdan@kde.org> +Peter Faiman <peter@thepicard.org> +Corey Richardson <corey@octayn.net> +Tóth Tamás <tomika_nospam@freemail.hu> +Cam Swords <cam.swords@gmail.com> +Chris Dickinson <christopher.s.dickinson@gmail.com> +Uli Köhler <ukoehler@btronik.de> +Charlie Somerville <charlie@charliesomerville.com> +Patrik Stutz <patrik.stutz@gmail.com> +Fedor Indutny <fedor.indutny@gmail.com> +runner <runner.mei@gmail.com> +Alexis Campailla <alexis@janeasystems.com> +David Wragg <david@wragg.org> +Vinnie Falco <vinnie.falco@gmail.com> +Alex Butum <alexbutum@linux.com> +Rex Feng <rexfeng@gmail.com> +Alex Kocharin <alex@kocharin.ru> +Mark Koopman <markmontymark@yahoo.com> +Helge Heß <me@helgehess.eu> +Alexis La Goutte <alexis.lagoutte@gmail.com> +George Miroshnykov <george.miroshnykov@gmail.com> +Maciej Małecki <me@mmalecki.com> +Marc O'Morain <github.com@marcomorain.com> +Jeff Pinner <jpinner@twitter.com> +Timothy J Fontaine <tjfontaine@gmail.com> +Akagi201 <akagi201@gmail.com> +Romain Giraud <giraud.romain@gmail.com> +Jay Satiro <raysatiro@yahoo.com> +Arne Steen <Arne.Steen@gmx.de> +Kjell Schubert <kjell.schubert@gmail.com> +Olivier Mengué <dolmen@cpan.org> diff --git a/http-parser/LICENSE-MIT b/http-parser/LICENSE-MIT new file mode 100644 index 0000000..1ec0ab4 --- /dev/null +++ b/http-parser/LICENSE-MIT @@ -0,0 +1,19 @@ +Copyright Joyent, Inc. and other Node contributors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. diff --git a/http-parser/Makefile b/http-parser/Makefile new file mode 100644 index 0000000..5d21221 --- /dev/null +++ b/http-parser/Makefile @@ -0,0 +1,160 @@ +# Copyright Joyent, Inc. and other Node contributors. All rights reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +PLATFORM ?= $(shell sh -c 'uname -s | tr "[A-Z]" "[a-z]"') +HELPER ?= +BINEXT ?= +SOLIBNAME = libhttp_parser +SOMAJOR = 2 +SOMINOR = 9 +SOREV = 4 +ifeq (darwin,$(PLATFORM)) +SOEXT ?= dylib +SONAME ?= $(SOLIBNAME).$(SOMAJOR).$(SOMINOR).$(SOEXT) +LIBNAME ?= $(SOLIBNAME).$(SOMAJOR).$(SOMINOR).$(SOREV).$(SOEXT) +else ifeq (wine,$(PLATFORM)) +CC = winegcc +BINEXT = .exe.so +HELPER = wine +else +SOEXT ?= so +SONAME ?= $(SOLIBNAME).$(SOEXT).$(SOMAJOR).$(SOMINOR) +LIBNAME ?= $(SOLIBNAME).$(SOEXT).$(SOMAJOR).$(SOMINOR).$(SOREV) +endif + +CC?=gcc +AR?=ar + +CPPFLAGS ?= +LDFLAGS ?= + +CPPFLAGS += -I. +CPPFLAGS_DEBUG = $(CPPFLAGS) -DHTTP_PARSER_STRICT=1 +CPPFLAGS_DEBUG += $(CPPFLAGS_DEBUG_EXTRA) +CPPFLAGS_FAST = $(CPPFLAGS) -DHTTP_PARSER_STRICT=0 +CPPFLAGS_FAST += $(CPPFLAGS_FAST_EXTRA) +CPPFLAGS_BENCH = $(CPPFLAGS_FAST) + +CFLAGS += -Wall -Wextra -Werror +CFLAGS_DEBUG = $(CFLAGS) -O0 -g $(CFLAGS_DEBUG_EXTRA) +CFLAGS_FAST = $(CFLAGS) -O3 $(CFLAGS_FAST_EXTRA) +CFLAGS_BENCH = $(CFLAGS_FAST) -Wno-unused-parameter +CFLAGS_LIB = $(CFLAGS_FAST) -fPIC + +LDFLAGS_LIB = $(LDFLAGS) -shared + +INSTALL ?= install +PREFIX ?= /usr/local +LIBDIR = $(PREFIX)/lib +INCLUDEDIR = $(PREFIX)/include + +ifeq (darwin,$(PLATFORM)) +LDFLAGS_LIB += -Wl,-install_name,$(LIBDIR)/$(SONAME) +else +# TODO(bnoordhuis) The native SunOS linker expects -h rather than -soname... +LDFLAGS_LIB += -Wl,-soname=$(SONAME) +endif + +test: test_g test_fast + $(HELPER) ./test_g$(BINEXT) + $(HELPER) ./test_fast$(BINEXT) + +test_g: http_parser_g.o test_g.o + $(CC) $(CFLAGS_DEBUG) $(LDFLAGS) http_parser_g.o test_g.o -o $@ + +test_g.o: test.c http_parser.h Makefile + $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c test.c -o $@ + +http_parser_g.o: http_parser.c http_parser.h Makefile + $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) -c http_parser.c -o $@ + +test_fast: http_parser.o test.o http_parser.h + $(CC) $(CFLAGS_FAST) $(LDFLAGS) http_parser.o test.o -o $@ + +test.o: test.c http_parser.h Makefile + $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c test.c -o $@ + +bench: http_parser.o bench.o + $(CC) $(CFLAGS_BENCH) $(LDFLAGS) http_parser.o bench.o -o $@ + +bench.o: bench.c http_parser.h Makefile + $(CC) $(CPPFLAGS_BENCH) $(CFLAGS_BENCH) -c bench.c -o $@ + +http_parser.o: http_parser.c http_parser.h Makefile + $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) -c http_parser.c + +test-run-timed: test_fast + while(true) do time $(HELPER) ./test_fast$(BINEXT) > /dev/null; done + +test-valgrind: test_g + valgrind ./test_g + +libhttp_parser.o: http_parser.c http_parser.h Makefile + $(CC) $(CPPFLAGS_FAST) $(CFLAGS_LIB) -c http_parser.c -o libhttp_parser.o + +library: libhttp_parser.o + $(CC) $(LDFLAGS_LIB) -o $(LIBNAME) $< + +package: http_parser.o + $(AR) rcs libhttp_parser.a http_parser.o + +url_parser: http_parser.o contrib/url_parser.c + $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o $@ + +url_parser_g: http_parser_g.o contrib/url_parser.c + $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o $@ + +parsertrace: http_parser.o contrib/parsertrace.c + $(CC) $(CPPFLAGS_FAST) $(CFLAGS_FAST) $^ -o parsertrace$(BINEXT) + +parsertrace_g: http_parser_g.o contrib/parsertrace.c + $(CC) $(CPPFLAGS_DEBUG) $(CFLAGS_DEBUG) $^ -o parsertrace_g$(BINEXT) + +tags: http_parser.c http_parser.h test.c + ctags $^ + +install: library + $(INSTALL) -D http_parser.h $(DESTDIR)$(INCLUDEDIR)/http_parser.h + $(INSTALL) -D $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(LIBNAME) + ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SONAME) + ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SOLIBNAME).$(SOEXT) + +install-strip: library + $(INSTALL) -D http_parser.h $(DESTDIR)$(INCLUDEDIR)/http_parser.h + $(INSTALL) -D -s $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(LIBNAME) + ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SONAME) + ln -sf $(LIBNAME) $(DESTDIR)$(LIBDIR)/$(SOLIBNAME).$(SOEXT) + +uninstall: + rm $(DESTDIR)$(INCLUDEDIR)/http_parser.h + rm $(DESTDIR)$(LIBDIR)/$(SOLIBNAME).$(SOEXT) + rm $(DESTDIR)$(LIBDIR)/$(SONAME) + rm $(DESTDIR)$(LIBDIR)/$(LIBNAME) + +clean: + rm -f *.o *.a tags test test_fast test_g \ + http_parser.tar libhttp_parser.so.* \ + url_parser url_parser_g parsertrace parsertrace_g \ + *.exe *.exe.so + +contrib/url_parser.c: http_parser.h +contrib/parsertrace.c: http_parser.h + +.PHONY: clean package test-run test-run-timed test-valgrind install install-strip uninstall diff --git a/http-parser/README.md b/http-parser/README.md new file mode 100644 index 0000000..b265d71 --- /dev/null +++ b/http-parser/README.md @@ -0,0 +1,246 @@ +HTTP Parser +=========== + +[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser) + +This is a parser for HTTP messages written in C. It parses both requests and +responses. The parser is designed to be used in performance HTTP +applications. It does not make any syscalls nor allocations, it does not +buffer data, it can be interrupted at anytime. Depending on your +architecture, it only requires about 40 bytes of data per message +stream (in a web server that is per connection). + +Features: + + * No dependencies + * Handles persistent streams (keep-alive). + * Decodes chunked encoding. + * Upgrade support + * Defends against buffer overflow attacks. + +The parser extracts the following information from HTTP messages: + + * Header fields and values + * Content-Length + * Request method + * Response status code + * Transfer-Encoding + * HTTP version + * Request URL + * Message body + + +Usage +----- + +One `http_parser` object is used per TCP connection. Initialize the struct +using `http_parser_init()` and set the callbacks. That might look something +like this for a request parser: +```c +http_parser_settings settings; +settings.on_url = my_url_callback; +settings.on_header_field = my_header_field_callback; +/* ... */ + +http_parser *parser = malloc(sizeof(http_parser)); +http_parser_init(parser, HTTP_REQUEST); +parser->data = my_socket; +``` + +When data is received on the socket execute the parser and check for errors. + +```c +size_t len = 80*1024, nparsed; +char buf[len]; +ssize_t recved; + +recved = recv(fd, buf, len, 0); + +if (recved < 0) { + /* Handle error. */ +} + +/* Start up / continue the parser. + * Note we pass recved==0 to signal that EOF has been received. + */ +nparsed = http_parser_execute(parser, &settings, buf, recved); + +if (parser->upgrade) { + /* handle new protocol */ +} else if (nparsed != recved) { + /* Handle error. Usually just close the connection. */ +} +``` + +`http_parser` needs to know where the end of the stream is. For example, sometimes +servers send responses without Content-Length and expect the client to +consume input (for the body) until EOF. To tell `http_parser` about EOF, give +`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors +can still be encountered during an EOF, so one must still be prepared +to receive them. + +Scalar valued message information such as `status_code`, `method`, and the +HTTP version are stored in the parser structure. This data is only +temporally stored in `http_parser` and gets reset on each new message. If +this information is needed later, copy it out of the structure during the +`headers_complete` callback. + +The parser decodes the transfer-encoding for both requests and responses +transparently. That is, a chunked encoding is decoded before being sent to +the on_body callback. + + +The Special Problem of Upgrade +------------------------------ + +`http_parser` supports upgrading the connection to a different protocol. An +increasingly common example of this is the WebSocket protocol which sends +a request like + + GET /demo HTTP/1.1 + Upgrade: WebSocket + Connection: Upgrade + Host: example.com + Origin: http://example.com + WebSocket-Protocol: sample + +followed by non-HTTP data. + +(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the +WebSocket protocol.) + +To support this, the parser will treat this as a normal HTTP message without a +body, issuing both on_headers_complete and on_message_complete callbacks. However +http_parser_execute() will stop parsing at the end of the headers and return. + +The user is expected to check if `parser->upgrade` has been set to 1 after +`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied +offset by the return value of `http_parser_execute()`. + + +Callbacks +--------- + +During the `http_parser_execute()` call, the callbacks set in +`http_parser_settings` will be executed. The parser maintains state and +never looks behind, so buffering the data is not necessary. If you need to +save certain data for later usage, you can do that from the callbacks. + +There are two types of callbacks: + +* notification `typedef int (*http_cb) (http_parser*);` + Callbacks: on_message_begin, on_headers_complete, on_message_complete. +* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);` + Callbacks: (requests only) on_url, + (common) on_header_field, on_header_value, on_body; + +Callbacks must return 0 on success. Returning a non-zero value indicates +error to the parser, making it exit immediately. + +For cases where it is necessary to pass local information to/from a callback, +the `http_parser` object's `data` field can be used. +An example of such a case is when using threads to handle a socket connection, +parse a request, and then give a response over that socket. By instantiation +of a thread-local struct containing relevant data (e.g. accepted socket, +allocated memory for callbacks to write into, etc), a parser's callbacks are +able to communicate data between the scope of the thread and the scope of the +callback in a threadsafe manner. This allows `http_parser` to be used in +multi-threaded contexts. + +Example: +```c + typedef struct { + socket_t sock; + void* buffer; + int buf_len; + } custom_data_t; + + +int my_url_callback(http_parser* parser, const char *at, size_t length) { + /* access to thread local custom_data_t struct. + Use this access save parsed data for later use into thread local + buffer, or communicate over socket + */ + parser->data; + ... + return 0; +} + +... + +void http_parser_thread(socket_t sock) { + int nparsed = 0; + /* allocate memory for user data */ + custom_data_t *my_data = malloc(sizeof(custom_data_t)); + + /* some information for use by callbacks. + * achieves thread -> callback information flow */ + my_data->sock = sock; + + /* instantiate a thread-local parser */ + http_parser *parser = malloc(sizeof(http_parser)); + http_parser_init(parser, HTTP_REQUEST); /* initialise parser */ + /* this custom data reference is accessible through the reference to the + parser supplied to callback functions */ + parser->data = my_data; + + http_parser_settings settings; /* set up callbacks */ + settings.on_url = my_url_callback; + + /* execute parser */ + nparsed = http_parser_execute(parser, &settings, buf, recved); + + ... + /* parsed information copied from callback. + can now perform action on data copied into thread-local memory from callbacks. + achieves callback -> thread information flow */ + my_data->buffer; + ... +} + +``` + +In case you parse HTTP message in chunks (i.e. `read()` request line +from socket, parse, read half headers, parse, etc) your data callbacks +may be called more than once. `http_parser` guarantees that data pointer is only +valid for the lifetime of callback. You can also `read()` into a heap allocated +buffer to avoid copying memory around if this fits your application. + +Reading headers may be a tricky task if you read/parse headers partially. +Basically, you need to remember whether last header callback was field or value +and apply the following logic: + + (on_header_field and on_header_value shortened to on_h_*) + ------------------------ ------------ -------------------------------------------- + | State (prev. callback) | Callback | Description/action | + ------------------------ ------------ -------------------------------------------- + | nothing (first call) | on_h_field | Allocate new buffer and copy callback data | + | | | into it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_field | New header started. | + | | | Copy current name,value buffers to headers | + | | | list and allocate new buffer for new name | + ------------------------ ------------ -------------------------------------------- + | field | on_h_field | Previous name continues. Reallocate name | + | | | buffer and append callback data to it | + ------------------------ ------------ -------------------------------------------- + | field | on_h_value | Value for current header started. Allocate | + | | | new buffer and copy callback data to it | + ------------------------ ------------ -------------------------------------------- + | value | on_h_value | Value continues. Reallocate value buffer | + | | | and append callback data to it | + ------------------------ ------------ -------------------------------------------- + + +Parsing URLs +------------ + +A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`. +Users of this library may wish to use it to parse URLs constructed from +consecutive `on_url` callbacks. + +See examples of reading in headers: + +* [partial example](http://gist.github.com/155877) in C +* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C +* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript diff --git a/http-parser/bench.c b/http-parser/bench.c new file mode 100644 index 0000000..678f555 --- /dev/null +++ b/http-parser/bench.c @@ -0,0 +1,128 @@ +/* Copyright Fedor Indutny. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include <assert.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <sys/time.h> + +/* 8 gb */ +static const int64_t kBytes = 8LL << 30; + +static const char data[] = + "POST /joyent/http-parser HTTP/1.1\r\n" + "Host: github.com\r\n" + "DNT: 1\r\n" + "Accept-Encoding: gzip, deflate, sdch\r\n" + "Accept-Language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4\r\n" + "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/39.0.2171.65 Safari/537.36\r\n" + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9," + "image/webp,*/*;q=0.8\r\n" + "Referer: https://github.com/joyent/http-parser\r\n" + "Connection: keep-alive\r\n" + "Transfer-Encoding: chunked\r\n" + "Cache-Control: max-age=0\r\n\r\nb\r\nhello world\r\n0\r\n"; +static const size_t data_len = sizeof(data) - 1; + +static int on_info(http_parser* p) { + return 0; +} + + +static int on_data(http_parser* p, const char *at, size_t length) { + return 0; +} + +static http_parser_settings settings = { + .on_message_begin = on_info, + .on_headers_complete = on_info, + .on_message_complete = on_info, + .on_header_field = on_data, + .on_header_value = on_data, + .on_url = on_data, + .on_status = on_data, + .on_body = on_data +}; + +int bench(int iter_count, int silent) { + struct http_parser parser; + int i; + int err; + struct timeval start; + struct timeval end; + + if (!silent) { + err = gettimeofday(&start, NULL); + assert(err == 0); + } + + fprintf(stderr, "req_len=%d\n", (int) data_len); + for (i = 0; i < iter_count; i++) { + size_t parsed; + http_parser_init(&parser, HTTP_REQUEST); + + parsed = http_parser_execute(&parser, &settings, data, data_len); + assert(parsed == data_len); + } + + if (!silent) { + double elapsed; + double bw; + double total; + + err = gettimeofday(&end, NULL); + assert(err == 0); + + fprintf(stdout, "Benchmark result:\n"); + + elapsed = (double) (end.tv_sec - start.tv_sec) + + (end.tv_usec - start.tv_usec) * 1e-6f; + + total = (double) iter_count * data_len; + bw = (double) total / elapsed; + + fprintf(stdout, "%.2f mb | %.2f mb/s | %.2f req/sec | %.2f s\n", + (double) total / (1024 * 1024), + bw / (1024 * 1024), + (double) iter_count / elapsed, + elapsed); + + fflush(stdout); + } + + return 0; +} + +int main(int argc, char** argv) { + int64_t iterations; + + iterations = kBytes / (int64_t) data_len; + if (argc == 2 && strcmp(argv[1], "infinite") == 0) { + for (;;) + bench(iterations, 1); + return 0; + } else { + return bench(iterations, 0); + } +} diff --git a/http-parser/contrib/parsertrace.c b/http-parser/contrib/parsertrace.c new file mode 100644 index 0000000..3daa7f4 --- /dev/null +++ b/http-parser/contrib/parsertrace.c @@ -0,0 +1,157 @@ +/* Copyright Joyent, Inc. and other Node contributors. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* Dump what the parser finds to stdout as it happen */ + +#include "http_parser.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +int on_message_begin(http_parser* _) { + (void)_; + printf("\n***MESSAGE BEGIN***\n\n"); + return 0; +} + +int on_headers_complete(http_parser* _) { + (void)_; + printf("\n***HEADERS COMPLETE***\n\n"); + return 0; +} + +int on_message_complete(http_parser* _) { + (void)_; + printf("\n***MESSAGE COMPLETE***\n\n"); + return 0; +} + +int on_url(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Url: %.*s\n", (int)length, at); + return 0; +} + +int on_header_field(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Header field: %.*s\n", (int)length, at); + return 0; +} + +int on_header_value(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Header value: %.*s\n", (int)length, at); + return 0; +} + +int on_body(http_parser* _, const char* at, size_t length) { + (void)_; + printf("Body: %.*s\n", (int)length, at); + return 0; +} + +void usage(const char* name) { + fprintf(stderr, + "Usage: %s $type $filename\n" + " type: -x, where x is one of {r,b,q}\n" + " parses file as a Response, reQuest, or Both\n", + name); + exit(EXIT_FAILURE); +} + +int main(int argc, char* argv[]) { + enum http_parser_type file_type; + + if (argc != 3) { + usage(argv[0]); + } + + char* type = argv[1]; + if (type[0] != '-') { + usage(argv[0]); + } + + switch (type[1]) { + /* in the case of "-", type[1] will be NUL */ + case 'r': + file_type = HTTP_RESPONSE; + break; + case 'q': + file_type = HTTP_REQUEST; + break; + case 'b': + file_type = HTTP_BOTH; + break; + default: + usage(argv[0]); + } + + char* filename = argv[2]; + FILE* file = fopen(filename, "r"); + if (file == NULL) { + perror("fopen"); + goto fail; + } + + fseek(file, 0, SEEK_END); + long file_length = ftell(file); + if (file_length == -1) { + perror("ftell"); + goto fail; + } + fseek(file, 0, SEEK_SET); + + char* data = malloc(file_length); + if (fread(data, 1, file_length, file) != (size_t)file_length) { + fprintf(stderr, "couldn't read entire file\n"); + free(data); + goto fail; + } + + http_parser_settings settings; + memset(&settings, 0, sizeof(settings)); + settings.on_message_begin = on_message_begin; + settings.on_url = on_url; + settings.on_header_field = on_header_field; + settings.on_header_value = on_header_value; + settings.on_headers_complete = on_headers_complete; + settings.on_body = on_body; + settings.on_message_complete = on_message_complete; + + http_parser parser; + http_parser_init(&parser, file_type); + size_t nparsed = http_parser_execute(&parser, &settings, data, file_length); + free(data); + + if (nparsed != (size_t)file_length) { + fprintf(stderr, + "Error: %s (%s)\n", + http_errno_description(HTTP_PARSER_ERRNO(&parser)), + http_errno_name(HTTP_PARSER_ERRNO(&parser))); + goto fail; + } + + return EXIT_SUCCESS; + +fail: + fclose(file); + return EXIT_FAILURE; +} diff --git a/http-parser/contrib/url_parser.c b/http-parser/contrib/url_parser.c new file mode 100644 index 0000000..f235bed --- /dev/null +++ b/http-parser/contrib/url_parser.c @@ -0,0 +1,47 @@ +#include "http_parser.h" +#include <stdio.h> +#include <string.h> + +void +dump_url (const char *url, const struct http_parser_url *u) +{ + unsigned int i; + + printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port); + for (i = 0; i < UF_MAX; i++) { + if ((u->field_set & (1 << i)) == 0) { + printf("\tfield_data[%u]: unset\n", i); + continue; + } + + printf("\tfield_data[%u]: off: %u, len: %u, part: %.*s\n", + i, + u->field_data[i].off, + u->field_data[i].len, + u->field_data[i].len, + url + u->field_data[i].off); + } +} + +int main(int argc, char ** argv) { + struct http_parser_url u; + int len, connect, result; + + if (argc != 3) { + printf("Syntax : %s connect|get url\n", argv[0]); + return 1; + } + len = strlen(argv[2]); + connect = strcmp("connect", argv[1]) == 0 ? 1 : 0; + printf("Parsing %s, connect %d\n", argv[2], connect); + + http_parser_url_init(&u); + result = http_parser_parse_url(argv[2], len, connect, &u); + if (result != 0) { + printf("Parse error : %d\n", result); + return result; + } + printf("Parse ok, result : \n"); + dump_url(argv[2], &u); + return 0; +} diff --git a/http-parser/fuzzers/fuzz_parser.c b/http-parser/fuzzers/fuzz_parser.c new file mode 100644 index 0000000..1a8442c --- /dev/null +++ b/http-parser/fuzzers/fuzz_parser.c @@ -0,0 +1,26 @@ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include "http_parser.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + static const http_parser_settings settings_null = { + .on_message_begin = 0 + , .on_header_field = 0 + ,.on_header_value = 0 + ,.on_url = 0 + ,.on_status = 0 + ,.on_body = 0 + ,.on_headers_complete = 0 + ,.on_message_complete = 0 + ,.on_chunk_header = 0 + ,.on_chunk_complete = 0 + }; + + http_parser parser; + http_parser_init(&parser, HTTP_BOTH); + http_parser_execute(&parser, &settings_null, (char*)data, size); + + return 0; +} diff --git a/http-parser/fuzzers/fuzz_url.c b/http-parser/fuzzers/fuzz_url.c new file mode 100644 index 0000000..eca11a2 --- /dev/null +++ b/http-parser/fuzzers/fuzz_url.c @@ -0,0 +1,14 @@ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include "http_parser.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + struct http_parser_url u; + http_parser_url_init(&u); + http_parser_parse_url((char*)data, size, 0, &u); + http_parser_parse_url((char*)data, size, 1, &u); + + return 0; +} diff --git a/http-parser/http_parser.c b/http-parser/http_parser.c new file mode 100644 index 0000000..95ff42f --- /dev/null +++ b/http-parser/http_parser.c @@ -0,0 +1,2568 @@ +/* Copyright Joyent, Inc. and other Node contributors. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include <assert.h> +#include <stddef.h> +#include <ctype.h> +#include <string.h> +#include <limits.h> + +static uint32_t max_header_size = HTTP_MAX_HEADER_SIZE; + +#ifndef ULLONG_MAX +# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ +#endif + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) +#endif + +#ifndef BIT_AT +# define BIT_AT(a, i) \ + (!!((unsigned int) (a)[(unsigned int) (i) >> 3] & \ + (1 << ((unsigned int) (i) & 7)))) +#endif + +#ifndef ELEM_AT +# define ELEM_AT(a, i, v) ((unsigned int) (i) < ARRAY_SIZE(a) ? (a)[(i)] : (v)) +#endif + +#define SET_ERRNO(e) \ +do { \ + parser->nread = nread; \ + parser->http_errno = (e); \ +} while(0) + +#define CURRENT_STATE() p_state +#define UPDATE_STATE(V) p_state = (enum state) (V); +#define RETURN(V) \ +do { \ + parser->nread = nread; \ + parser->state = CURRENT_STATE(); \ + return (V); \ +} while (0); +#define REEXECUTE() \ + goto reexecute; \ + + +#ifdef __GNUC__ +# define LIKELY(X) __builtin_expect(!!(X), 1) +# define UNLIKELY(X) __builtin_expect(!!(X), 0) +#else +# define LIKELY(X) (X) +# define UNLIKELY(X) (X) +#endif + + +/* Run the notify callback FOR, returning ER if it fails */ +#define CALLBACK_NOTIFY_(FOR, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != settings->on_##FOR(parser))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + UPDATE_STATE(parser->state); \ + \ + /* We either errored above or got paused; get out */ \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ + return (ER); \ + } \ + } \ +} while (0) + +/* Run the notify callback FOR and consume the current byte */ +#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) + +/* Run the notify callback FOR and don't consume the current byte */ +#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) + +/* Run data callback FOR with LEN bytes, returning ER if it fails */ +#define CALLBACK_DATA_(FOR, LEN, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (FOR##_mark) { \ + if (LIKELY(settings->on_##FOR)) { \ + parser->state = CURRENT_STATE(); \ + if (UNLIKELY(0 != \ + settings->on_##FOR(parser, FOR##_mark, (LEN)))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + UPDATE_STATE(parser->state); \ + \ + /* We either errored above or got paused; get out */ \ + if (UNLIKELY(HTTP_PARSER_ERRNO(parser) != HPE_OK)) { \ + return (ER); \ + } \ + } \ + FOR##_mark = NULL; \ + } \ +} while (0) + +/* Run the data callback FOR and consume the current byte */ +#define CALLBACK_DATA(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + +/* Run the data callback FOR and don't consume the current byte */ +#define CALLBACK_DATA_NOADVANCE(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) + +/* Don't allow the total size of the HTTP headers (including the status + * line) to exceed max_header_size. This check is here to protect + * embedders against denial-of-service attacks where the attacker feeds + * us a never-ending header that the embedder keeps buffering. + * + * This check is arguably the responsibility of embedders but we're doing + * it on the embedder's behalf because most won't bother and this way we + * make the web a little safer. max_header_size is still far bigger + * than any reasonable request or response so this should never affect + * day-to-day operation. + */ +#define COUNT_HEADER_SIZE(V) \ +do { \ + nread += (uint32_t)(V); \ + if (UNLIKELY(nread > max_header_size)) { \ + SET_ERRNO(HPE_HEADER_OVERFLOW); \ + goto error; \ + } \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { +#define XX(num, name, string) #string, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1*<any CHAR except CTLs or separators> + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + ' ', '!', 0, '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', 0, '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +#if HTTP_PARSER_STRICT +# define T(v) 0 +#else +# define T(v) v +#endif + + +static const uint8_t normal_url_char[32] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0 | T(2) | 0 | 0 | T(16) | 0 | 0 | 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0 | 2 | 4 | 0 | 16 | 32 | 64 | 128, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 128, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1 | 2 | 4 | 8 | 16 | 32 | 64 | 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_http_major + , s_res_http_dot + , s_res_http_minor + , s_res_http_end + , s_res_first_status_code + , s_res_status_code + , s_res_status_start + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_server_start + , s_req_server + , s_req_server_with_at + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_http_I + , s_req_http_IC + , s_req_http_major + , s_req_http_dot + , s_req_http_minor + , s_req_http_end + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_discard_ws + , s_header_value_discard_ws_almost_done + , s_header_value_discard_lws + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + + +#define PARSING_HEADER(state) (state <= s_headers_done) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_content_length_num + , h_content_length_ws + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_token_start + , h_matching_transfer_encoding_chunked + , h_matching_transfer_encoding_token + + , h_matching_connection_token_start + , h_matching_connection_keep_alive + , h_matching_connection_close + , h_matching_connection_upgrade + , h_matching_connection_token + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + , h_connection_upgrade + }; + +enum http_host_state + { + s_http_host_dead = 1 + , s_http_userinfo_start + , s_http_userinfo + , s_http_host_start + , s_http_host_v6_start + , s_http_host + , s_http_host_v6 + , s_http_host_v6_end + , s_http_host_v6_zone_start + , s_http_host_v6_zone + , s_http_host_port_start + , s_http_host_port +}; + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) +#define IS_MARK(c) ((c) == '-' || (c) == '_' || (c) == '.' || \ + (c) == '!' || (c) == '~' || (c) == '*' || (c) == '\'' || (c) == '(' || \ + (c) == ')') +#define IS_USERINFO_CHAR(c) (IS_ALPHANUM(c) || IS_MARK(c) || (c) == '%' || \ + (c) == ';' || (c) == ':' || (c) == '&' || (c) == '=' || (c) == '+' || \ + (c) == '$' || (c) == ',') + +#define STRICT_TOKEN(c) ((c == ' ') ? 0 : tokens[(unsigned char)c]) + +#if HTTP_PARSER_STRICT +#define TOKEN(c) STRICT_TOKEN(c) +#define IS_URL_CHAR(c) (BIT_AT(normal_url_char, (unsigned char)c)) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) tokens[(unsigned char)c] +#define IS_URL_CHAR(c) \ + (BIT_AT(normal_url_char, (unsigned char)c) || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + +/** + * Verify that a char is a valid visible (printable) US-ASCII + * character or %x80-FF + **/ +#define IS_HEADER_CHAR(ch) \ + (ch == CR || ch == LF || ch == 9 || ((unsigned char)ch > 31 && ch != 127)) + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) \ +do { \ + if (cond) { \ + SET_ERRNO(HPE_STRICT); \ + goto error; \ + } \ +} while (0) +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +/* Map errno values to strings for human-readable output */ +#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, +static struct { + const char *name; + const char *description; +} http_strerror_tab[] = { + HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) +}; +#undef HTTP_STRERROR_GEN + +int http_message_needs_eof(const http_parser *parser); + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_server_start; + } + + break; + + case s_req_server_with_at: + if (ch == '@') { + return s_dead; + } + + /* fall through */ + case s_req_server_start: + case s_req_server: + if (ch == '/') { + return s_req_path; + } + + if (ch == '?') { + return s_req_query_string_start; + } + + if (ch == '@') { + return s_req_server_with_at; + } + + if (IS_USERINFO_CHAR(ch) || ch == '[' || ch == ']') { + return s_req_server; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + int8_t unhex_val; + const char *p = data; + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *url_mark = 0; + const char *body_mark = 0; + const char *status_mark = 0; + enum state p_state = (enum state) parser->state; + const unsigned int lenient = parser->lenient_http_headers; + uint32_t nread = parser->nread; + + /* We're in an error state. Don't bother doing anything. */ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return 0; + } + + if (len == 0) { + switch (CURRENT_STATE()) { + case s_body_identity_eof: + /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if + * we got paused. + */ + CALLBACK_NOTIFY_NOADVANCE(message_complete); + return 0; + + case s_dead: + case s_start_req_or_res: + case s_start_res: + case s_start_req: + return 0; + + default: + SET_ERRNO(HPE_INVALID_EOF_STATE); + return 1; + } + } + + + if (CURRENT_STATE() == s_header_field) + header_field_mark = data; + if (CURRENT_STATE() == s_header_value) + header_value_mark = data; + switch (CURRENT_STATE()) { + case s_req_path: + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_server: + case s_req_server_with_at: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + url_mark = data; + break; + case s_res_status: + status_mark = data; + break; + default: + break; + } + + for (p=data; p != data + len; p++) { + ch = *p; + + if (PARSING_HEADER(CURRENT_STATE())) + COUNT_HEADER_SIZE(1); + +reexecute: + switch (CURRENT_STATE()) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + if (LIKELY(ch == CR || ch == LF)) + break; + + SET_ERRNO(HPE_CLOSED_CONNECTION); + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->extra_flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + UPDATE_STATE(s_res_or_resp_H); + + CALLBACK_NOTIFY(message_begin); + } else { + parser->type = HTTP_REQUEST; + UPDATE_STATE(s_start_req); + REEXECUTE(); + } + + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + UPDATE_STATE(s_res_HT); + } else { + if (UNLIKELY(ch != 'E')) { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + parser->index = 2; + UPDATE_STATE(s_req_method); + } + break; + + case s_start_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->extra_flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + UPDATE_STATE(s_res_H); + } else { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + CALLBACK_NOTIFY(message_begin); + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_res_HT); + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_res_HTT); + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + UPDATE_STATE(s_res_HTTP); + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + UPDATE_STATE(s_res_http_major); + break; + + case s_res_http_major: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + UPDATE_STATE(s_res_http_dot); + break; + + case s_res_http_dot: + { + if (UNLIKELY(ch != '.')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_res_http_minor); + break; + } + + case s_res_http_minor: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + UPDATE_STATE(s_res_http_end); + break; + + case s_res_http_end: + { + if (UNLIKELY(ch != ' ')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_res_first_status_code); + break; + } + + case s_res_first_status_code: + { + if (!IS_NUM(ch)) { + if (ch == ' ') { + break; + } + + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + parser->status_code = ch - '0'; + UPDATE_STATE(s_res_status_code); + break; + } + + case s_res_status_code: + { + if (!IS_NUM(ch)) { + switch (ch) { + case ' ': + UPDATE_STATE(s_res_status_start); + break; + case CR: + case LF: + UPDATE_STATE(s_res_status_start); + REEXECUTE(); + break; + default: + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (UNLIKELY(parser->status_code > 999)) { + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + + break; + } + + case s_res_status_start: + { + MARK(status); + UPDATE_STATE(s_res_status); + parser->index = 0; + + if (ch == CR || ch == LF) + REEXECUTE(); + + break; + } + + case s_res_status: + if (ch == CR) { + UPDATE_STATE(s_res_line_almost_done); + CALLBACK_DATA(status); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA(status); + break; + } + + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + UPDATE_STATE(s_header_field_start); + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->extra_flags = 0; + parser->content_length = ULLONG_MAX; + + if (UNLIKELY(!IS_ALPHA(ch))) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + parser->method = (enum http_method) 0; + parser->index = 1; + switch (ch) { + case 'A': parser->method = HTTP_ACL; break; + case 'B': parser->method = HTTP_BIND; break; + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; /* or LINK */ break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; + case 'N': parser->method = HTTP_NOTIFY; break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; + /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ + break; + case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break; + case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH, SOURCE */ break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break; + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + UPDATE_STATE(s_req_method); + + CALLBACK_NOTIFY(message_begin); + + break; + } + + case s_req_method: + { + const char *matcher; + if (UNLIKELY(ch == '\0')) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[parser->index] == '\0') { + UPDATE_STATE(s_req_spaces_before_url); + } else if (ch == matcher[parser->index]) { + ; /* nada */ + } else if ((ch >= 'A' && ch <= 'Z') || ch == '-') { + + switch (parser->method << 16 | parser->index << 8 | ch) { +#define XX(meth, pos, ch, new_meth) \ + case (HTTP_##meth << 16 | pos << 8 | ch): \ + parser->method = HTTP_##new_meth; break; + + XX(POST, 1, 'U', PUT) + XX(POST, 1, 'A', PATCH) + XX(POST, 1, 'R', PROPFIND) + XX(PUT, 2, 'R', PURGE) + XX(CONNECT, 1, 'H', CHECKOUT) + XX(CONNECT, 2, 'P', COPY) + XX(MKCOL, 1, 'O', MOVE) + XX(MKCOL, 1, 'E', MERGE) + XX(MKCOL, 1, '-', MSEARCH) + XX(MKCOL, 2, 'A', MKACTIVITY) + XX(MKCOL, 3, 'A', MKCALENDAR) + XX(SUBSCRIBE, 1, 'E', SEARCH) + XX(SUBSCRIBE, 1, 'O', SOURCE) + XX(REPORT, 2, 'B', REBIND) + XX(PROPFIND, 4, 'P', PROPPATCH) + XX(LOCK, 1, 'I', LINK) + XX(UNLOCK, 2, 'S', UNSUBSCRIBE) + XX(UNLOCK, 2, 'B', UNBIND) + XX(UNLOCK, 3, 'I', UNLINK) +#undef XX + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + ++parser->index; + break; + } + + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + MARK(url); + if (parser->method == HTTP_CONNECT) { + UPDATE_STATE(s_req_server_start); + } + + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + + break; + } + + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + { + switch (ch) { + /* No whitespace allowed here */ + case ' ': + case CR: + case LF: + SET_ERRNO(HPE_INVALID_URL); + goto error; + default: + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + + break; + } + + case s_req_server: + case s_req_server_with_at: + case s_req_path: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + { + switch (ch) { + case ' ': + UPDATE_STATE(s_req_http_start); + CALLBACK_DATA(url); + break; + case CR: + case LF: + parser->http_major = 0; + parser->http_minor = 9; + UPDATE_STATE((ch == CR) ? + s_req_line_almost_done : + s_header_field_start); + CALLBACK_DATA(url); + break; + default: + UPDATE_STATE(parse_url_char(CURRENT_STATE(), ch)); + if (UNLIKELY(CURRENT_STATE() == s_dead)) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + break; + } + + case s_req_http_start: + switch (ch) { + case ' ': + break; + case 'H': + UPDATE_STATE(s_req_http_H); + break; + case 'I': + if (parser->method == HTTP_SOURCE) { + UPDATE_STATE(s_req_http_I); + break; + } + /* fall through */ + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_req_http_HT); + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + UPDATE_STATE(s_req_http_HTT); + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + UPDATE_STATE(s_req_http_HTTP); + break; + + case s_req_http_I: + STRICT_CHECK(ch != 'C'); + UPDATE_STATE(s_req_http_IC); + break; + + case s_req_http_IC: + STRICT_CHECK(ch != 'E'); + UPDATE_STATE(s_req_http_HTTP); /* Treat "ICE" as "HTTP". */ + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + UPDATE_STATE(s_req_http_major); + break; + + case s_req_http_major: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + UPDATE_STATE(s_req_http_dot); + break; + + case s_req_http_dot: + { + if (UNLIKELY(ch != '.')) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + UPDATE_STATE(s_req_http_minor); + break; + } + + case s_req_http_minor: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + UPDATE_STATE(s_req_http_end); + break; + + case s_req_http_end: + { + if (ch == CR) { + UPDATE_STATE(s_req_line_almost_done); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_field_start); + break; + } + + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (UNLIKELY(ch != LF)) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + UPDATE_STATE(s_header_field_start); + break; + } + + case s_header_field_start: + { + if (ch == CR) { + UPDATE_STATE(s_headers_almost_done); + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + UPDATE_STATE(s_headers_almost_done); + REEXECUTE(); + } + + c = TOKEN(ch); + + if (UNLIKELY(!c)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + MARK(header_field); + + parser->index = 0; + UPDATE_STATE(s_header_field); + + switch (c) { + case 'c': + parser->header_state = h_C; + break; + + case 'p': + parser->header_state = h_matching_proxy_connection; + break; + + case 't': + parser->header_state = h_matching_transfer_encoding; + break; + + case 'u': + parser->header_state = h_matching_upgrade; + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + const char* start = p; + for (; p != data + len; p++) { + ch = *p; + c = TOKEN(ch); + + if (!c) + break; + + switch (parser->header_state) { + case h_general: { + size_t left = data + len - p; + const char* pe = p + MIN(left, max_header_size); + while (p+1 < pe && TOKEN(p[1])) { + p++; + } + break; + } + + case h_C: + parser->index++; + parser->header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + parser->index++; + parser->header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + parser->index++; + switch (c) { + case 'n': + parser->header_state = h_matching_connection; + break; + case 't': + parser->header_state = h_matching_content_length; + break; + default: + parser->header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + parser->index++; + if (parser->index > sizeof(CONNECTION)-1 + || c != CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + parser->index++; + if (parser->index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + parser->index++; + if (parser->index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { + parser->header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + parser->index++; + if (parser->index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { + parser->header_state = h_transfer_encoding; + parser->extra_flags |= F_TRANSFER_ENCODING >> 8; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE)-1 + || c != UPGRADE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(UPGRADE)-2) { + parser->header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + } + + if (p == data + len) { + --p; + COUNT_HEADER_SIZE(p - start); + break; + } + + COUNT_HEADER_SIZE(p - start); + + if (ch == ':') { + UPDATE_STATE(s_header_value_discard_ws); + CALLBACK_DATA(header_field); + break; + } + + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + case s_header_value_discard_ws: + if (ch == ' ' || ch == '\t') break; + + if (ch == CR) { + UPDATE_STATE(s_header_value_discard_ws_almost_done); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_value_discard_lws); + break; + } + + /* fall through */ + + case s_header_value_start: + { + MARK(header_value); + + UPDATE_STATE(s_header_value); + parser->index = 0; + + c = LOWER(ch); + + switch (parser->header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + parser->header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + parser->header_state = h_matching_transfer_encoding_chunked; + } else { + parser->header_state = h_matching_transfer_encoding_token; + } + break; + + /* Multi-value `Transfer-Encoding` header */ + case h_matching_transfer_encoding_token_start: + break; + + case h_content_length: + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + if (parser->flags & F_CONTENTLENGTH) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + + parser->flags |= F_CONTENTLENGTH; + parser->content_length = ch - '0'; + parser->header_state = h_content_length_num; + break; + + /* when obsolete line folding is encountered for content length + * continue to the s_header_value state */ + case h_content_length_ws: + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + parser->header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + parser->header_state = h_matching_connection_close; + } else if (c == 'u') { + parser->header_state = h_matching_connection_upgrade; + } else { + parser->header_state = h_matching_connection_token; + } + break; + + /* Multi-value `Connection` header */ + case h_matching_connection_token_start: + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + const char* start = p; + enum header_states h_state = (enum header_states) parser->header_state; + for (; p != data + len; p++) { + ch = *p; + if (ch == CR) { + UPDATE_STATE(s_header_almost_done); + parser->header_state = h_state; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + UPDATE_STATE(s_header_almost_done); + COUNT_HEADER_SIZE(p - start); + parser->header_state = h_state; + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); + } + + if (!lenient && !IS_HEADER_CHAR(ch)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + c = LOWER(ch); + + switch (h_state) { + case h_general: + { + size_t left = data + len - p; + const char* pe = p + MIN(left, max_header_size); + + for (; p != pe; p++) { + ch = *p; + if (ch == CR || ch == LF) { + --p; + break; + } + if (!lenient && !IS_HEADER_CHAR(ch)) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + } + if (p == data + len) + --p; + break; + } + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + if (ch == ' ') break; + h_state = h_content_length_num; + /* fall through */ + + case h_content_length_num: + { + uint64_t t; + + if (ch == ' ') { + h_state = h_content_length_ws; + break; + } + + if (UNLIKELY(!IS_NUM(ch))) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } + + t = parser->content_length; + t *= 10; + t += ch - '0'; + + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 10) / 10 < parser->content_length)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + } + + parser->content_length = t; + break; + } + + case h_content_length_ws: + if (ch == ' ') break; + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + parser->header_state = h_state; + goto error; + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_token_start: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + h_state = h_matching_transfer_encoding_chunked; + } else if (STRICT_TOKEN(c)) { + /* TODO(indutny): similar code below does this, but why? + * At the very least it seems to be inconsistent given that + * h_matching_transfer_encoding_token does not check for + * `STRICT_TOKEN` + */ + h_state = h_matching_transfer_encoding_token; + } else if (c == ' ' || c == '\t') { + /* Skip lws */ + } else { + h_state = h_general; + } + break; + + case h_matching_transfer_encoding_chunked: + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + h_state = h_matching_transfer_encoding_token; + } else if (parser->index == sizeof(CHUNKED)-2) { + h_state = h_transfer_encoding_chunked; + } + break; + + case h_matching_transfer_encoding_token: + if (ch == ',') { + h_state = h_matching_transfer_encoding_token_start; + parser->index = 0; + } + break; + + case h_matching_connection_token_start: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + h_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + h_state = h_matching_connection_close; + } else if (c == 'u') { + h_state = h_matching_connection_upgrade; + } else if (STRICT_TOKEN(c)) { + h_state = h_matching_connection_token; + } else if (c == ' ' || c == '\t') { + /* Skip lws */ + } else { + h_state = h_general; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + h_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(CLOSE)-2) { + h_state = h_connection_close; + } + break; + + /* looking for 'Connection: upgrade' */ + case h_matching_connection_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE) - 1 || + c != UPGRADE[parser->index]) { + h_state = h_matching_connection_token; + } else if (parser->index == sizeof(UPGRADE)-2) { + h_state = h_connection_upgrade; + } + break; + + case h_matching_connection_token: + if (ch == ',') { + h_state = h_matching_connection_token_start; + parser->index = 0; + } + break; + + case h_transfer_encoding_chunked: + if (ch != ' ') h_state = h_matching_transfer_encoding_token; + break; + + case h_connection_keep_alive: + case h_connection_close: + case h_connection_upgrade: + if (ch == ',') { + if (h_state == h_connection_keep_alive) { + parser->flags |= F_CONNECTION_KEEP_ALIVE; + } else if (h_state == h_connection_close) { + parser->flags |= F_CONNECTION_CLOSE; + } else if (h_state == h_connection_upgrade) { + parser->flags |= F_CONNECTION_UPGRADE; + } + h_state = h_matching_connection_token_start; + parser->index = 0; + } else if (ch != ' ') { + h_state = h_matching_connection_token; + } + break; + + default: + UPDATE_STATE(s_header_value); + h_state = h_general; + break; + } + } + parser->header_state = h_state; + + if (p == data + len) + --p; + + COUNT_HEADER_SIZE(p - start); + break; + } + + case s_header_almost_done: + { + if (UNLIKELY(ch != LF)) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + UPDATE_STATE(s_header_value_lws); + break; + } + + case s_header_value_lws: + { + if (ch == ' ' || ch == '\t') { + if (parser->header_state == h_content_length_num) { + /* treat obsolete line folding as space */ + parser->header_state = h_content_length_ws; + } + UPDATE_STATE(s_header_value_start); + REEXECUTE(); + } + + /* finished the header */ + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; + default: + break; + } + + UPDATE_STATE(s_header_field_start); + REEXECUTE(); + } + + case s_header_value_discard_ws_almost_done: + { + STRICT_CHECK(ch != LF); + UPDATE_STATE(s_header_value_discard_lws); + break; + } + + case s_header_value_discard_lws: + { + if (ch == ' ' || ch == '\t') { + UPDATE_STATE(s_header_value_discard_ws); + break; + } else { + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_connection_upgrade: + parser->flags |= F_CONNECTION_UPGRADE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + case h_content_length: + /* do not allow empty content length */ + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + break; + default: + break; + } + + /* header value was empty */ + MARK(header_value); + UPDATE_STATE(s_header_field_start); + CALLBACK_DATA_NOADVANCE(header_value); + REEXECUTE(); + } + } + + case s_headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + UPDATE_STATE(s_message_done); + CALLBACK_NOTIFY_NOADVANCE(chunk_complete); + REEXECUTE(); + } + + /* Cannot us transfer-encoding and a content-length header together + per the HTTP specification. (RFC 7230 Section 3.3.3) */ + if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) && + (parser->flags & F_CONTENTLENGTH)) { + /* Allow it for lenient parsing as long as `Transfer-Encoding` is + * not `chunked` + */ + if (!lenient || (parser->flags & F_CHUNKED)) { + SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH); + goto error; + } + } + + UPDATE_STATE(s_headers_done); + + /* Set this here so that on_headers_complete() callbacks can see it */ + if ((parser->flags & F_UPGRADE) && + (parser->flags & F_CONNECTION_UPGRADE)) { + /* For responses, "Upgrade: foo" and "Connection: upgrade" are + * mandatory only when it is a 101 Switching Protocols response, + * otherwise it is purely informational, to announce support. + */ + parser->upgrade = + (parser->type == HTTP_REQUEST || parser->status_code == 101); + } else { + parser->upgrade = (parser->method == HTTP_CONNECT); + } + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 2: + parser->upgrade = 1; + + /* fall through */ + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + SET_ERRNO(HPE_CB_headers_complete); + RETURN(p - data); /* Error */ + } + } + + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + RETURN(p - data); + } + + REEXECUTE(); + } + + case s_headers_done: + { + int hasBody; + STRICT_CHECK(ch != LF); + + parser->nread = 0; + nread = 0; + + hasBody = parser->flags & F_CHUNKED || + (parser->content_length > 0 && parser->content_length != ULLONG_MAX); + if (parser->upgrade && (parser->method == HTTP_CONNECT || + (parser->flags & F_SKIPBODY) || !hasBody)) { + /* Exit, the rest of the message is in a different protocol. */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + RETURN((p - data) + 1); + } + + if (parser->flags & F_SKIPBODY) { + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header, + * prepare for a chunk */ + UPDATE_STATE(s_chunk_size_start); + } else if (parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) { + if (parser->type == HTTP_REQUEST && !lenient) { + /* RFC 7230 3.3.3 */ + + /* If a Transfer-Encoding header field + * is present in a request and the chunked transfer coding is not + * the final encoding, the message body length cannot be determined + * reliably; the server MUST respond with the 400 (Bad Request) + * status code and then close the connection. + */ + SET_ERRNO(HPE_INVALID_TRANSFER_ENCODING); + RETURN(p - data); /* Error */ + } else { + /* RFC 7230 3.3.3 */ + + /* If a Transfer-Encoding header field is present in a response and + * the chunked transfer coding is not the final encoding, the + * message body length is determined by reading the connection until + * it is closed by the server. + */ + UPDATE_STATE(s_body_identity_eof); + } + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else if (parser->content_length != ULLONG_MAX) { + /* Content-Length header given and non-zero */ + UPDATE_STATE(s_body_identity); + } else { + if (!http_message_needs_eof(parser)) { + /* Assume content-length 0 - read the next */ + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + } else { + /* Read body until EOF */ + UPDATE_STATE(s_body_identity_eof); + } + } + } + + break; + } + + case s_body_identity: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* The difference between advancing content_length and p is because + * the latter will automaticaly advance on the next loop iteration. + * Further, if content_length ends up at 0, we want to see the last + * byte again for our message complete callback. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + UPDATE_STATE(s_message_done); + + /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. + * + * The alternative to doing this is to wait for the next byte to + * trigger the data callback, just as in every other case. The + * problem with this is that this makes it difficult for the test + * harness to distinguish between complete-on-EOF and + * complete-on-length. It's not clear that this distinction is + * important for applications, but let's keep it for now. + */ + CALLBACK_DATA_(body, p - body_mark + 1, p - data); + REEXECUTE(); + } + + break; + } + + /* read until EOF */ + case s_body_identity_eof: + MARK(body); + p = data + len - 1; + + break; + + case s_message_done: + UPDATE_STATE(NEW_MESSAGE()); + CALLBACK_NOTIFY(message_complete); + if (parser->upgrade) { + /* Exit, the rest of the message is in a different protocol. */ + RETURN((p - data) + 1); + } + break; + + case s_chunk_size_start: + { + assert(nread == 1); + assert(parser->flags & F_CHUNKED); + + unhex_val = unhex[(unsigned char)ch]; + if (UNLIKELY(unhex_val == -1)) { + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + parser->content_length = unhex_val; + UPDATE_STATE(s_chunk_size); + break; + } + + case s_chunk_size: + { + uint64_t t; + + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + UPDATE_STATE(s_chunk_size_almost_done); + break; + } + + unhex_val = unhex[(unsigned char)ch]; + + if (unhex_val == -1) { + if (ch == ';' || ch == ' ') { + UPDATE_STATE(s_chunk_parameters); + break; + } + + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + t = parser->content_length; + t *= 16; + t += unhex_val; + + /* Overflow? Test against a conservative limit for simplicity. */ + if (UNLIKELY((ULLONG_MAX - 16) / 16 < parser->content_length)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + UPDATE_STATE(s_chunk_size_almost_done); + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + parser->nread = 0; + nread = 0; + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + UPDATE_STATE(s_header_field_start); + } else { + UPDATE_STATE(s_chunk_data); + } + CALLBACK_NOTIFY(chunk_header); + break; + } + + case s_chunk_data: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->flags & F_CHUNKED); + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* See the explanation in s_body_identity for why the content + * length and data pointers are managed this way. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + UPDATE_STATE(s_chunk_data_almost_done); + } + + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + assert(parser->content_length == 0); + STRICT_CHECK(ch != CR); + UPDATE_STATE(s_chunk_data_done); + CALLBACK_DATA(body); + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + parser->nread = 0; + nread = 0; + UPDATE_STATE(s_chunk_size_start); + CALLBACK_NOTIFY(chunk_complete); + break; + + default: + assert(0 && "unhandled state"); + SET_ERRNO(HPE_INVALID_INTERNAL_STATE); + goto error; + } + } + + /* Run callbacks for any marks that we have leftover after we ran out of + * bytes. There should be at most one of these set, so it's OK to invoke + * them in series (unset marks will not result in callbacks). + * + * We use the NOADVANCE() variety of callbacks here because 'p' has already + * overflowed 'data' and this allows us to correct for the off-by-one that + * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' + * value that's in-bounds). + */ + + assert(((header_field_mark ? 1 : 0) + + (header_value_mark ? 1 : 0) + + (url_mark ? 1 : 0) + + (body_mark ? 1 : 0) + + (status_mark ? 1 : 0)) <= 1); + + CALLBACK_DATA_NOADVANCE(header_field); + CALLBACK_DATA_NOADVANCE(header_value); + CALLBACK_DATA_NOADVANCE(url); + CALLBACK_DATA_NOADVANCE(body); + CALLBACK_DATA_NOADVANCE(status); + + RETURN(len); + +error: + if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { + SET_ERRNO(HPE_UNKNOWN); + } + + RETURN(p - data); +} + + +/* Does the parser need to see an EOF to find the end of the message? */ +int +http_message_needs_eof (const http_parser *parser) +{ + if (parser->type == HTTP_REQUEST) { + return 0; + } + + /* See RFC 2616 section 4.4 */ + if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 || /* Not Modified */ + parser->flags & F_SKIPBODY) { /* response to a HEAD request */ + return 0; + } + + /* RFC 7230 3.3.3, see `s_headers_almost_done` */ + if ((parser->extra_flags & (F_TRANSFER_ENCODING >> 8)) && + (parser->flags & F_CHUNKED) == 0) { + return 1; + } + + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { + return 0; + } + + return 1; +} + + +int +http_should_keep_alive (const http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } + } else { + /* HTTP/1.0 or earlier */ + if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { + return 0; + } + } + + return !http_message_needs_eof(parser); +} + + +const char * +http_method_str (enum http_method m) +{ + return ELEM_AT(method_strings, m, "<unknown>"); +} + +const char * +http_status_str (enum http_status s) +{ + switch (s) { +#define XX(num, name, string) case HTTP_STATUS_##name: return #string; + HTTP_STATUS_MAP(XX) +#undef XX + default: return "<unknown>"; + } +} + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + void *data = parser->data; /* preserve application data */ + memset(parser, 0, sizeof(*parser)); + parser->data = data; + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->http_errno = HPE_OK; +} + +void +http_parser_settings_init(http_parser_settings *settings) +{ + memset(settings, 0, sizeof(*settings)); +} + +const char * +http_errno_name(enum http_errno err) { + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); + return http_strerror_tab[err].name; +} + +const char * +http_errno_description(enum http_errno err) { + assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab)); + return http_strerror_tab[err].description; +} + +static enum http_host_state +http_parse_host_char(enum http_host_state s, const char ch) { + switch(s) { + case s_http_userinfo: + case s_http_userinfo_start: + if (ch == '@') { + return s_http_host_start; + } + + if (IS_USERINFO_CHAR(ch)) { + return s_http_userinfo; + } + break; + + case s_http_host_start: + if (ch == '[') { + return s_http_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + break; + + case s_http_host: + if (IS_HOST_CHAR(ch)) { + return s_http_host; + } + + /* fall through */ + case s_http_host_v6_end: + if (ch == ':') { + return s_http_host_port_start; + } + + break; + + case s_http_host_v6: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_start: + if (IS_HEX(ch) || ch == ':' || ch == '.') { + return s_http_host_v6; + } + + if (s == s_http_host_v6 && ch == '%') { + return s_http_host_v6_zone_start; + } + break; + + case s_http_host_v6_zone: + if (ch == ']') { + return s_http_host_v6_end; + } + + /* fall through */ + case s_http_host_v6_zone_start: + /* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */ + if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' || + ch == '~') { + return s_http_host_v6_zone; + } + break; + + case s_http_host_port: + case s_http_host_port_start: + if (IS_NUM(ch)) { + return s_http_host_port; + } + + break; + + default: + break; + } + return s_http_host_dead; +} + +static int +http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { + enum http_host_state s; + + const char *p; + size_t buflen = u->field_data[UF_HOST].off + u->field_data[UF_HOST].len; + + assert(u->field_set & (1 << UF_HOST)); + + u->field_data[UF_HOST].len = 0; + + s = found_at ? s_http_userinfo_start : s_http_host_start; + + for (p = buf + u->field_data[UF_HOST].off; p < buf + buflen; p++) { + enum http_host_state new_s = http_parse_host_char(s, *p); + + if (new_s == s_http_host_dead) { + return 1; + } + + switch(new_s) { + case s_http_host: + if (s != s_http_host) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6: + if (s != s_http_host_v6) { + u->field_data[UF_HOST].off = (uint16_t)(p - buf); + } + u->field_data[UF_HOST].len++; + break; + + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + u->field_data[UF_HOST].len++; + break; + + case s_http_host_port: + if (s != s_http_host_port) { + u->field_data[UF_PORT].off = (uint16_t)(p - buf); + u->field_data[UF_PORT].len = 0; + u->field_set |= (1 << UF_PORT); + } + u->field_data[UF_PORT].len++; + break; + + case s_http_userinfo: + if (s != s_http_userinfo) { + u->field_data[UF_USERINFO].off = (uint16_t)(p - buf); + u->field_data[UF_USERINFO].len = 0; + u->field_set |= (1 << UF_USERINFO); + } + u->field_data[UF_USERINFO].len++; + break; + + default: + break; + } + s = new_s; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_http_host_start: + case s_http_host_v6_start: + case s_http_host_v6: + case s_http_host_v6_zone_start: + case s_http_host_v6_zone: + case s_http_host_port_start: + case s_http_userinfo: + case s_http_userinfo_start: + return 1; + default: + break; + } + + return 0; +} + +void +http_parser_url_init(struct http_parser_url *u) { + memset(u, 0, sizeof(*u)); +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + int found_at = 0; + + if (buflen == 0) { + return 1; + } + + u->port = u->field_set = 0; + s = is_connect ? s_req_server_start : s_req_spaces_before_url; + old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_server_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_server_with_at: + found_at = 1; + + /* fall through */ + case s_req_server: + uf = UF_HOST; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = (uint16_t)(p - buf); + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* host must be present if there is a schema */ + /* parsing http:///toto will fail */ + if ((u->field_set & (1 << UF_SCHEMA)) && + (u->field_set & (1 << UF_HOST)) == 0) { + return 1; + } + + if (u->field_set & (1 << UF_HOST)) { + if (http_parse_host(buf, u, found_at) != 0) { + return 1; + } + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + if (u->field_set & (1 << UF_PORT)) { + uint16_t off; + uint16_t len; + const char* p; + const char* end; + unsigned long v; + + off = u->field_data[UF_PORT].off; + len = u->field_data[UF_PORT].len; + end = buf + off + len; + + /* NOTE: The characters are already validated and are in the [0-9] range */ + assert(off + len <= buflen && "Port number overflow"); + v = 0; + for (p = buf + off; p < end; p++) { + v *= 10; + v += *p - '0'; + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + } + + u->port = (uint16_t) v; + } + + return 0; +} + +void +http_parser_pause(http_parser *parser, int paused) { + /* Users should only be pausing/unpausing a parser that is not in an error + * state. In non-debug builds, there's not much that we can do about this + * other than ignore it. + */ + if (HTTP_PARSER_ERRNO(parser) == HPE_OK || + HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { + uint32_t nread = parser->nread; /* used by the SET_ERRNO macro */ + SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); + } else { + assert(0 && "Attempting to pause parser in error state"); + } +} + +int +http_body_is_final(const struct http_parser *parser) { + return parser->state == s_message_done; +} + +unsigned long +http_parser_version(void) { + return HTTP_PARSER_VERSION_MAJOR * 0x10000 | + HTTP_PARSER_VERSION_MINOR * 0x00100 | + HTTP_PARSER_VERSION_PATCH * 0x00001; +} + +void +http_parser_set_max_header_size(uint32_t size) { + max_header_size = size; +} diff --git a/http-parser/http_parser.gyp b/http-parser/http_parser.gyp new file mode 100644 index 0000000..ef34eca --- /dev/null +++ b/http-parser/http_parser.gyp @@ -0,0 +1,111 @@ +# This file is used with the GYP meta build system. +# http://code.google.com/p/gyp/ +# To build try this: +# svn co http://gyp.googlecode.com/svn/trunk gyp +# ./gyp/gyp -f make --depth=`pwd` http_parser.gyp +# ./out/Debug/test +{ + 'target_defaults': { + 'default_configuration': 'Debug', + 'configurations': { + # TODO: hoist these out and put them somewhere common, because + # RuntimeLibrary MUST MATCH across the entire project + 'Debug': { + 'defines': [ 'DEBUG', '_DEBUG' ], + 'cflags': [ '-Wall', '-Wextra', '-O0', '-g', '-ftrapv' ], + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeLibrary': 1, # static debug + }, + }, + }, + 'Release': { + 'defines': [ 'NDEBUG' ], + 'cflags': [ '-Wall', '-Wextra', '-O3' ], + 'msvs_settings': { + 'VCCLCompilerTool': { + 'RuntimeLibrary': 0, # static release + }, + }, + } + }, + 'msvs_settings': { + 'VCCLCompilerTool': { + }, + 'VCLibrarianTool': { + }, + 'VCLinkerTool': { + 'GenerateDebugInformation': 'true', + }, + }, + 'conditions': [ + ['OS == "win"', { + 'defines': [ + 'WIN32' + ], + }] + ], + }, + + 'targets': [ + { + 'target_name': 'http_parser', + 'type': 'static_library', + 'include_dirs': [ '.' ], + 'direct_dependent_settings': { + 'defines': [ 'HTTP_PARSER_STRICT=0' ], + 'include_dirs': [ '.' ], + }, + 'defines': [ 'HTTP_PARSER_STRICT=0' ], + 'sources': [ './http_parser.c', ], + 'conditions': [ + ['OS=="win"', { + 'msvs_settings': { + 'VCCLCompilerTool': { + # Compile as C++. http_parser.c is actually C99, but C++ is + # close enough in this case. + 'CompileAs': 2, + }, + }, + }] + ], + }, + + { + 'target_name': 'http_parser_strict', + 'type': 'static_library', + 'include_dirs': [ '.' ], + 'direct_dependent_settings': { + 'defines': [ 'HTTP_PARSER_STRICT=1' ], + 'include_dirs': [ '.' ], + }, + 'defines': [ 'HTTP_PARSER_STRICT=1' ], + 'sources': [ './http_parser.c', ], + 'conditions': [ + ['OS=="win"', { + 'msvs_settings': { + 'VCCLCompilerTool': { + # Compile as C++. http_parser.c is actually C99, but C++ is + # close enough in this case. + 'CompileAs': 2, + }, + }, + }] + ], + }, + + { + 'target_name': 'test-nonstrict', + 'type': 'executable', + 'dependencies': [ 'http_parser' ], + 'sources': [ 'test.c' ] + }, + + { + 'target_name': 'test-strict', + 'type': 'executable', + 'dependencies': [ 'http_parser_strict' ], + 'sources': [ 'test.c' ] + } + ] +} diff --git a/http-parser/http_parser.h b/http-parser/http_parser.h new file mode 100644 index 0000000..60dc85d --- /dev/null +++ b/http-parser/http_parser.h @@ -0,0 +1,445 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +/* Also update SONAME in the Makefile whenever you change these. */ +#define HTTP_PARSER_VERSION_MAJOR 2 +#define HTTP_PARSER_VERSION_MINOR 9 +#define HTTP_PARSER_VERSION_PATCH 4 + +#include <stddef.h> +#if defined(_WIN32) && !defined(__MINGW32__) && \ + (!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__) +#include <BaseTsd.h> +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#elif (defined(__sun) || defined(__sun__)) && defined(__SunOS_5_9) +#include <sys/inttypes.h> +#else +#include <stdint.h> +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +/* Maximium header size allowed. If the macro is not defined + * before including this header then the default is used. To + * change the maximum header size, define the macro in the build + * environment (e.g. -DHTTP_MAX_HEADER_SIZE=<value>). To remove + * the effective limit on the size of the header, define the macro + * to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff) + */ +#ifndef HTTP_MAX_HEADER_SIZE +# define HTTP_MAX_HEADER_SIZE (80*1024) +#endif + +typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * Returning `2` from on_headers_complete will tell parser that it should not + * expect neither a body nor any futher responses on this connection. This is + * useful for handling responses to a CONNECT request which may not contain + * `Upgrade` or `Connection: upgrade` headers. + * + * http_data_cb does not return data chunks. It will be called arbitrarily + * many times for each string. E.G. you might get 10 callbacks for "on_url" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + + +/* Status Codes */ +#define HTTP_STATUS_MAP(XX) \ + XX(100, CONTINUE, Continue) \ + XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \ + XX(102, PROCESSING, Processing) \ + XX(200, OK, OK) \ + XX(201, CREATED, Created) \ + XX(202, ACCEPTED, Accepted) \ + XX(203, NON_AUTHORITATIVE_INFORMATION, Non-Authoritative Information) \ + XX(204, NO_CONTENT, No Content) \ + XX(205, RESET_CONTENT, Reset Content) \ + XX(206, PARTIAL_CONTENT, Partial Content) \ + XX(207, MULTI_STATUS, Multi-Status) \ + XX(208, ALREADY_REPORTED, Already Reported) \ + XX(226, IM_USED, IM Used) \ + XX(300, MULTIPLE_CHOICES, Multiple Choices) \ + XX(301, MOVED_PERMANENTLY, Moved Permanently) \ + XX(302, FOUND, Found) \ + XX(303, SEE_OTHER, See Other) \ + XX(304, NOT_MODIFIED, Not Modified) \ + XX(305, USE_PROXY, Use Proxy) \ + XX(307, TEMPORARY_REDIRECT, Temporary Redirect) \ + XX(308, PERMANENT_REDIRECT, Permanent Redirect) \ + XX(400, BAD_REQUEST, Bad Request) \ + XX(401, UNAUTHORIZED, Unauthorized) \ + XX(402, PAYMENT_REQUIRED, Payment Required) \ + XX(403, FORBIDDEN, Forbidden) \ + XX(404, NOT_FOUND, Not Found) \ + XX(405, METHOD_NOT_ALLOWED, Method Not Allowed) \ + XX(406, NOT_ACCEPTABLE, Not Acceptable) \ + XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required) \ + XX(408, REQUEST_TIMEOUT, Request Timeout) \ + XX(409, CONFLICT, Conflict) \ + XX(410, GONE, Gone) \ + XX(411, LENGTH_REQUIRED, Length Required) \ + XX(412, PRECONDITION_FAILED, Precondition Failed) \ + XX(413, PAYLOAD_TOO_LARGE, Payload Too Large) \ + XX(414, URI_TOO_LONG, URI Too Long) \ + XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type) \ + XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable) \ + XX(417, EXPECTATION_FAILED, Expectation Failed) \ + XX(421, MISDIRECTED_REQUEST, Misdirected Request) \ + XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity) \ + XX(423, LOCKED, Locked) \ + XX(424, FAILED_DEPENDENCY, Failed Dependency) \ + XX(426, UPGRADE_REQUIRED, Upgrade Required) \ + XX(428, PRECONDITION_REQUIRED, Precondition Required) \ + XX(429, TOO_MANY_REQUESTS, Too Many Requests) \ + XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \ + XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons) \ + XX(500, INTERNAL_SERVER_ERROR, Internal Server Error) \ + XX(501, NOT_IMPLEMENTED, Not Implemented) \ + XX(502, BAD_GATEWAY, Bad Gateway) \ + XX(503, SERVICE_UNAVAILABLE, Service Unavailable) \ + XX(504, GATEWAY_TIMEOUT, Gateway Timeout) \ + XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported) \ + XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates) \ + XX(507, INSUFFICIENT_STORAGE, Insufficient Storage) \ + XX(508, LOOP_DETECTED, Loop Detected) \ + XX(510, NOT_EXTENDED, Not Extended) \ + XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \ + +enum http_status + { +#define XX(num, name, string) HTTP_STATUS_##name = num, + HTTP_STATUS_MAP(XX) +#undef XX + }; + + +/* Request Methods */ +#define HTTP_METHOD_MAP(XX) \ + XX(0, DELETE, DELETE) \ + XX(1, GET, GET) \ + XX(2, HEAD, HEAD) \ + XX(3, POST, POST) \ + XX(4, PUT, PUT) \ + /* pathological */ \ + XX(5, CONNECT, CONNECT) \ + XX(6, OPTIONS, OPTIONS) \ + XX(7, TRACE, TRACE) \ + /* WebDAV */ \ + XX(8, COPY, COPY) \ + XX(9, LOCK, LOCK) \ + XX(10, MKCOL, MKCOL) \ + XX(11, MOVE, MOVE) \ + XX(12, PROPFIND, PROPFIND) \ + XX(13, PROPPATCH, PROPPATCH) \ + XX(14, SEARCH, SEARCH) \ + XX(15, UNLOCK, UNLOCK) \ + XX(16, BIND, BIND) \ + XX(17, REBIND, REBIND) \ + XX(18, UNBIND, UNBIND) \ + XX(19, ACL, ACL) \ + /* subversion */ \ + XX(20, REPORT, REPORT) \ + XX(21, MKACTIVITY, MKACTIVITY) \ + XX(22, CHECKOUT, CHECKOUT) \ + XX(23, MERGE, MERGE) \ + /* upnp */ \ + XX(24, MSEARCH, M-SEARCH) \ + XX(25, NOTIFY, NOTIFY) \ + XX(26, SUBSCRIBE, SUBSCRIBE) \ + XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \ + /* RFC-5789 */ \ + XX(28, PATCH, PATCH) \ + XX(29, PURGE, PURGE) \ + /* CalDAV */ \ + XX(30, MKCALENDAR, MKCALENDAR) \ + /* RFC-2068, section 19.6.1.2 */ \ + XX(31, LINK, LINK) \ + XX(32, UNLINK, UNLINK) \ + /* icecast */ \ + XX(33, SOURCE, SOURCE) \ + +enum http_method + { +#define XX(num, name, string) HTTP_##name = num, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + +/* Flag values for http_parser.flags field */ +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_CONNECTION_UPGRADE = 1 << 3 + , F_TRAILING = 1 << 4 + , F_UPGRADE = 1 << 5 + , F_SKIPBODY = 1 << 6 + , F_CONTENTLENGTH = 1 << 7 + , F_TRANSFER_ENCODING = 1 << 8 /* Never set in http_parser.flags */ + }; + + +/* Map for errno-related constants + * + * The provided argument should be a macro that takes 2 arguments. + */ +#define HTTP_ERRNO_MAP(XX) \ + /* No error */ \ + XX(OK, "success") \ + \ + /* Callback-related errors */ \ + XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_url, "the on_url callback failed") \ + XX(CB_header_field, "the on_header_field callback failed") \ + XX(CB_header_value, "the on_header_value callback failed") \ + XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_body, "the on_body callback failed") \ + XX(CB_message_complete, "the on_message_complete callback failed") \ + XX(CB_status, "the on_status callback failed") \ + XX(CB_chunk_header, "the on_chunk_header callback failed") \ + XX(CB_chunk_complete, "the on_chunk_complete callback failed") \ + \ + /* Parsing-related errors */ \ + XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ + XX(HEADER_OVERFLOW, \ + "too many header bytes seen; overflow detected") \ + XX(CLOSED_CONNECTION, \ + "data received after completed connection: close message") \ + XX(INVALID_VERSION, "invalid HTTP version") \ + XX(INVALID_STATUS, "invalid HTTP status code") \ + XX(INVALID_METHOD, "invalid HTTP method") \ + XX(INVALID_URL, "invalid URL") \ + XX(INVALID_HOST, "invalid host") \ + XX(INVALID_PORT, "invalid port") \ + XX(INVALID_PATH, "invalid path") \ + XX(INVALID_QUERY_STRING, "invalid query string") \ + XX(INVALID_FRAGMENT, "invalid fragment") \ + XX(LF_EXPECTED, "LF character expected") \ + XX(INVALID_HEADER_TOKEN, "invalid character in header") \ + XX(INVALID_CONTENT_LENGTH, \ + "invalid character in content-length header") \ + XX(UNEXPECTED_CONTENT_LENGTH, \ + "unexpected content-length header") \ + XX(INVALID_CHUNK_SIZE, \ + "invalid character in chunk size header") \ + XX(INVALID_CONSTANT, "invalid constant string") \ + XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ + XX(STRICT, "strict mode assertion failed") \ + XX(PAUSED, "parser is paused") \ + XX(UNKNOWN, "an unknown error occurred") \ + XX(INVALID_TRANSFER_ENCODING, \ + "request has invalid transfer-encoding") \ + + +/* Define HPE_* values for each errno value above */ +#define HTTP_ERRNO_GEN(n, s) HPE_##n, +enum http_errno { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) +}; +#undef HTTP_ERRNO_GEN + + +/* Get an http_errno value from an http_parser */ +#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) + + +struct http_parser { + /** PRIVATE **/ + unsigned int type : 2; /* enum http_parser_type */ + unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */ + unsigned int state : 7; /* enum state from http_parser.c */ + unsigned int header_state : 7; /* enum header_state from http_parser.c */ + unsigned int index : 5; /* index into current matcher */ + unsigned int extra_flags : 2; + unsigned int lenient_http_headers : 1; + + uint32_t nread; /* # bytes read in various scenarios */ + uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned int status_code : 16; /* responses only */ + unsigned int method : 8; /* requests only */ + unsigned int http_errno : 7; + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + unsigned int upgrade : 1; + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; + + +struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_url; + http_data_cb on_status; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; + /* When on_chunk_header is called, the current chunk length is stored + * in parser->content_length. + */ + http_cb on_chunk_header; + http_cb on_chunk_complete; +}; + + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_USERINFO = 6 + , UF_MAX = 7 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + +/* Returns the library version. Bits 16-23 contain the major version number, + * bits 8-15 the minor version number and bits 0-7 the patch level. + * Usage example: + * + * unsigned long version = http_parser_version(); + * unsigned major = (version >> 16) & 255; + * unsigned minor = (version >> 8) & 255; + * unsigned patch = version & 255; + * printf("http_parser v%u.%u.%u\n", major, minor, patch); + */ +unsigned long http_parser_version(void); + +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +/* Initialize http_parser_settings members to 0 + */ +void http_parser_settings_init(http_parser_settings *settings); + + +/* Executes the parser. Returns number of parsed bytes. Sets + * `parser->http_errno` on error. */ +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns 0, then this should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(const http_parser *parser); + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method m); + +/* Returns a string version of the HTTP status code. */ +const char *http_status_str(enum http_status s); + +/* Return a string name of the given error */ +const char *http_errno_name(enum http_errno err); + +/* Return a string description of the given error */ +const char *http_errno_description(enum http_errno err); + +/* Initialize all http_parser_url members to 0 */ +void http_parser_url_init(struct http_parser_url *u); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); + +/* Pause or un-pause the parser; a nonzero value pauses */ +void http_parser_pause(http_parser *parser, int paused); + +/* Checks if this is the final chunk of the body. */ +int http_body_is_final(const http_parser *parser); + +/* Change the maximum header size provided at compile time. */ +void http_parser_set_max_header_size(uint32_t size); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/http-parser/test.c b/http-parser/test.c new file mode 100644 index 0000000..0026a7f --- /dev/null +++ b/http-parser/test.c @@ -0,0 +1,4600 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> /* rand */ +#include <string.h> +#include <stdarg.h> + +#if defined(__APPLE__) +# undef strlncpy +#endif /* defined(__APPLE__) */ + +#undef TRUE +#define TRUE 1 +#undef FALSE +#define FALSE 0 + +#define MAX_HEADERS 13 +#define MAX_ELEMENT_SIZE 2048 +#define MAX_CHUNKS 16 + +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*x)) + +static http_parser parser; + +struct message { + const char *name; // for debugging purposes + const char *raw; + enum http_parser_type type; + enum http_method method; + int status_code; + char response_status[MAX_ELEMENT_SIZE]; + char request_path[MAX_ELEMENT_SIZE]; + char request_url[MAX_ELEMENT_SIZE]; + char fragment[MAX_ELEMENT_SIZE]; + char query_string[MAX_ELEMENT_SIZE]; + char body[MAX_ELEMENT_SIZE]; + size_t body_size; + const char *host; + const char *userinfo; + uint16_t port; + int num_headers; + enum { NONE=0, FIELD, VALUE } last_header_element; + char headers [MAX_HEADERS][2][MAX_ELEMENT_SIZE]; + int should_keep_alive; + + int num_chunks; + int num_chunks_complete; + int chunk_lengths[MAX_CHUNKS]; + + const char *upgrade; // upgraded body + + unsigned short http_major; + unsigned short http_minor; + + int message_begin_cb_called; + int headers_complete_cb_called; + int message_complete_cb_called; + int status_cb_called; + int message_complete_on_eof; + int body_is_final; +}; + +static int currently_parsing_eof; + +static struct message messages[5]; +static int num_messages; +static http_parser_settings *current_pause_parser; + +/* * R E Q U E S T S * */ +const struct message requests[] = +#define CURL_GET 0 +{ {.name= "curl get" + ,.type= HTTP_REQUEST + ,.raw= "GET /test HTTP/1.1\r\n" + "User-Agent: curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1\r\n" + "Host: 0.0.0.0=5000\r\n" + "Accept: */*\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 3 + ,.headers= + { { "User-Agent", "curl/7.18.0 (i486-pc-linux-gnu) libcurl/7.18.0 OpenSSL/0.9.8g zlib/1.2.3.3 libidn/1.1" } + , { "Host", "0.0.0.0=5000" } + , { "Accept", "*/*" } + } + ,.body= "" + } + +#define FIREFOX_GET 1 +, {.name= "firefox get" + ,.type= HTTP_REQUEST + ,.raw= "GET /favicon.ico HTTP/1.1\r\n" + "Host: 0.0.0.0=5000\r\n" + "User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0\r\n" + "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8\r\n" + "Accept-Language: en-us,en;q=0.5\r\n" + "Accept-Encoding: gzip,deflate\r\n" + "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7\r\n" + "Keep-Alive: 300\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/favicon.ico" + ,.request_url= "/favicon.ico" + ,.num_headers= 8 + ,.headers= + { { "Host", "0.0.0.0=5000" } + , { "User-Agent", "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9) Gecko/2008061015 Firefox/3.0" } + , { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" } + , { "Accept-Language", "en-us,en;q=0.5" } + , { "Accept-Encoding", "gzip,deflate" } + , { "Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.7" } + , { "Keep-Alive", "300" } + , { "Connection", "keep-alive" } + } + ,.body= "" + } + +#define DUMBLUCK 2 +, {.name= "dumbluck" + ,.type= HTTP_REQUEST + ,.raw= "GET /dumbluck HTTP/1.1\r\n" + "aaaaaaaaaaaaa:++++++++++\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/dumbluck" + ,.request_url= "/dumbluck" + ,.num_headers= 1 + ,.headers= + { { "aaaaaaaaaaaaa", "++++++++++" } + } + ,.body= "" + } + +#define FRAGMENT_IN_URI 3 +, {.name= "fragment in url" + ,.type= HTTP_REQUEST + ,.raw= "GET /forums/1/topics/2375?page=1#posts-17408 HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "page=1" + ,.fragment= "posts-17408" + ,.request_path= "/forums/1/topics/2375" + /* XXX request url does include fragment? */ + ,.request_url= "/forums/1/topics/2375?page=1#posts-17408" + ,.num_headers= 0 + ,.body= "" + } + +#define GET_NO_HEADERS_NO_BODY 4 +, {.name= "get no headers no body" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_no_headers_no_body/world HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE /* would need Connection: close */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_no_headers_no_body/world" + ,.request_url= "/get_no_headers_no_body/world" + ,.num_headers= 0 + ,.body= "" + } + +#define GET_ONE_HEADER_NO_BODY 5 +, {.name= "get one header no body" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_one_header_no_body HTTP/1.1\r\n" + "Accept: */*\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE /* would need Connection: close */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_one_header_no_body" + ,.request_url= "/get_one_header_no_body" + ,.num_headers= 1 + ,.headers= + { { "Accept" , "*/*" } + } + ,.body= "" + } + +#define GET_FUNKY_CONTENT_LENGTH 6 +, {.name= "get funky content length body hello" + ,.type= HTTP_REQUEST + ,.raw= "GET /get_funky_content_length_body_hello HTTP/1.0\r\n" + "conTENT-Length: 5\r\n" + "\r\n" + "HELLO" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/get_funky_content_length_body_hello" + ,.request_url= "/get_funky_content_length_body_hello" + ,.num_headers= 1 + ,.headers= + { { "conTENT-Length" , "5" } + } + ,.body= "HELLO" + } + +#define POST_IDENTITY_BODY_WORLD 7 +, {.name= "post identity body world" + ,.type= HTTP_REQUEST + ,.raw= "POST /post_identity_body_world?q=search#hey HTTP/1.1\r\n" + "Accept: */*\r\n" + "Content-Length: 5\r\n" + "\r\n" + "World" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "q=search" + ,.fragment= "hey" + ,.request_path= "/post_identity_body_world" + ,.request_url= "/post_identity_body_world?q=search#hey" + ,.num_headers= 2 + ,.headers= + { { "Accept", "*/*" } + , { "Content-Length", "5" } + } + ,.body= "World" + } + +#define POST_CHUNKED_ALL_YOUR_BASE 8 +, {.name= "post - chunked body: all your base are belong to us" + ,.type= HTTP_REQUEST + ,.raw= "POST /post_chunked_all_your_base HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "1e\r\nall your base are belong to us\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/post_chunked_all_your_base" + ,.request_url= "/post_chunked_all_your_base" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding" , "chunked" } + } + ,.body= "all your base are belong to us" + ,.num_chunks_complete= 2 + ,.chunk_lengths= { 0x1e } + } + +#define TWO_CHUNKS_MULT_ZERO_END 9 +, {.name= "two chunks ; triple zero ending" + ,.type= HTTP_REQUEST + ,.raw= "POST /two_chunks_mult_zero_end HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "000\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/two_chunks_mult_zero_end" + ,.request_url= "/two_chunks_mult_zero_end" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body= "hello world" + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 5, 6 } + } + +#define CHUNKED_W_TRAILING_HEADERS 10 +, {.name= "chunked with trailing headers. blech." + ,.type= HTTP_REQUEST + ,.raw= "POST /chunked_w_trailing_headers HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5\r\nhello\r\n" + "6\r\n world\r\n" + "0\r\n" + "Vary: *\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_trailing_headers" + ,.request_url= "/chunked_w_trailing_headers" + ,.num_headers= 3 + ,.headers= + { { "Transfer-Encoding", "chunked" } + , { "Vary", "*" } + , { "Content-Type", "text/plain" } + } + ,.body= "hello world" + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 5, 6 } + } + +#define CHUNKED_W_NONSENSE_AFTER_LENGTH 11 +, {.name= "with nonsense after the length" + ,.type= HTTP_REQUEST + ,.raw= "POST /chunked_w_nonsense_after_length HTTP/1.1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "5; ilovew3;whattheluck=aretheseparametersfor\r\nhello\r\n" + "6; blahblah; blah\r\n world\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/chunked_w_nonsense_after_length" + ,.request_url= "/chunked_w_nonsense_after_length" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body= "hello world" + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 5, 6 } + } + +#define WITH_QUOTES 12 +, {.name= "with quotes" + ,.type= HTTP_REQUEST + ,.raw= "GET /with_\"stupid\"_quotes?foo=\"bar\" HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "foo=\"bar\"" + ,.fragment= "" + ,.request_path= "/with_\"stupid\"_quotes" + ,.request_url= "/with_\"stupid\"_quotes?foo=\"bar\"" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define APACHEBENCH_GET 13 +/* The server receiving this request SHOULD NOT wait for EOF + * to know that content-length == 0. + * How to represent this in a unit test? message_complete_on_eof + * Compare with NO_CONTENT_LENGTH_RESPONSE. + */ +, {.name = "apachebench get" + ,.type= HTTP_REQUEST + ,.raw= "GET /test HTTP/1.0\r\n" + "Host: 0.0.0.0:5000\r\n" + "User-Agent: ApacheBench/2.3\r\n" + "Accept: */*\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 3 + ,.headers= { { "Host", "0.0.0.0:5000" } + , { "User-Agent", "ApacheBench/2.3" } + , { "Accept", "*/*" } + } + ,.body= "" + } + +#define QUERY_URL_WITH_QUESTION_MARK_GET 14 +/* Some clients include '?' characters in query strings. + */ +, {.name = "query url with question mark" + ,.type= HTTP_REQUEST + ,.raw= "GET /test.cgi?foo=bar?baz HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "foo=bar?baz" + ,.fragment= "" + ,.request_path= "/test.cgi" + ,.request_url= "/test.cgi?foo=bar?baz" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define PREFIX_NEWLINE_GET 15 +/* Some clients, especially after a POST in a keep-alive connection, + * will send an extra CRLF before the next request + */ +, {.name = "newline prefix get" + ,.type= HTTP_REQUEST + ,.raw= "\r\nGET /test HTTP/1.1\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define UPGRADE_REQUEST 16 +, {.name = "upgrade request" + ,.type= HTTP_REQUEST + ,.raw= "GET /demo HTTP/1.1\r\n" + "Host: example.com\r\n" + "Connection: Upgrade\r\n" + "Sec-WebSocket-Key2: 12998 5 Y3 1 .P00\r\n" + "Sec-WebSocket-Protocol: sample\r\n" + "Upgrade: WebSocket\r\n" + "Sec-WebSocket-Key1: 4 @1 46546xW%0l 1 5\r\n" + "Origin: http://example.com\r\n" + "\r\n" + "Hot diggity dogg" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 7 + ,.upgrade="Hot diggity dogg" + ,.headers= { { "Host", "example.com" } + , { "Connection", "Upgrade" } + , { "Sec-WebSocket-Key2", "12998 5 Y3 1 .P00" } + , { "Sec-WebSocket-Protocol", "sample" } + , { "Upgrade", "WebSocket" } + , { "Sec-WebSocket-Key1", "4 @1 46546xW%0l 1 5" } + , { "Origin", "http://example.com" } + } + ,.body= "" + } + +#define CONNECT_REQUEST 17 +, {.name = "connect request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT 0-home0.netscape.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + "some data\r\n" + "and yet even more data" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "0-home0.netscape.com:443" + ,.num_headers= 2 + ,.upgrade="some data\r\nand yet even more data" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } + +#define REPORT_REQ 18 +, {.name= "report request" + ,.type= HTTP_REQUEST + ,.raw= "REPORT /test HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_REPORT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/test" + ,.request_url= "/test" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define NO_HTTP_VERSION 19 +, {.name= "request with no http version" + ,.type= HTTP_REQUEST + ,.raw= "GET /\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 0 + ,.http_minor= 9 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define MSEARCH_REQ 20 +, {.name= "m-search request" + ,.type= HTTP_REQUEST + ,.raw= "M-SEARCH * HTTP/1.1\r\n" + "HOST: 239.255.255.250:1900\r\n" + "MAN: \"ssdp:discover\"\r\n" + "ST: \"ssdp:all\"\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_MSEARCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "*" + ,.request_url= "*" + ,.num_headers= 3 + ,.headers= { { "HOST", "239.255.255.250:1900" } + , { "MAN", "\"ssdp:discover\"" } + , { "ST", "\"ssdp:all\"" } + } + ,.body= "" + } + +#define LINE_FOLDING_IN_HEADER 21 +, {.name= "line folding in header value" + ,.type= HTTP_REQUEST + ,.raw= "GET / HTTP/1.1\r\n" + "Line1: abc\r\n" + "\tdef\r\n" + " ghi\r\n" + "\t\tjkl\r\n" + " mno \r\n" + "\t \tqrs\r\n" + "Line2: \t line2\t\r\n" + "Line3:\r\n" + " line3\r\n" + "Line4: \r\n" + " \r\n" + "Connection:\r\n" + " close\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 5 + ,.headers= { { "Line1", "abc\tdef ghi\t\tjkl mno \t \tqrs" } + , { "Line2", "line2\t" } + , { "Line3", "line3" } + , { "Line4", "" } + , { "Connection", "close" }, + } + ,.body= "" + } + + +#define QUERY_TERMINATED_HOST 22 +, {.name= "host terminated by a query string" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org?hail=all HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org?hail=all" + ,.host= "hypnotoad.org" + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define QUERY_TERMINATED_HOSTPORT 23 +, {.name= "host:port terminated by a query string" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org:1234?hail=all HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "hail=all" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org:1234?hail=all" + ,.host= "hypnotoad.org" + ,.port= 1234 + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define SPACE_TERMINATED_HOSTPORT 24 +, {.name= "host:port terminated by a space" + ,.type= HTTP_REQUEST + ,.raw= "GET http://hypnotoad.org:1234 HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "http://hypnotoad.org:1234" + ,.host= "hypnotoad.org" + ,.port= 1234 + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define PATCH_REQ 25 +, {.name = "PATCH request" + ,.type= HTTP_REQUEST + ,.raw= "PATCH /file.txt HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Content-Type: application/example\r\n" + "If-Match: \"e0023aa4e\"\r\n" + "Content-Length: 10\r\n" + "\r\n" + "cccccccccc" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_PATCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/file.txt" + ,.request_url= "/file.txt" + ,.num_headers= 4 + ,.headers= { { "Host", "www.example.com" } + , { "Content-Type", "application/example" } + , { "If-Match", "\"e0023aa4e\"" } + , { "Content-Length", "10" } + } + ,.body= "cccccccccc" + } + +#define CONNECT_CAPS_REQUEST 26 +, {.name = "connect caps request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT HOME0.NETSCAPE.COM:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "HOME0.NETSCAPE.COM:443" + ,.num_headers= 2 + ,.upgrade="" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } + +#if !HTTP_PARSER_STRICT +#define UTF8_PATH_REQ 27 +, {.name= "utf-8 path request" + ,.type= HTTP_REQUEST + ,.raw= "GET /δ¶/δt/pope?q=1#narf HTTP/1.1\r\n" + "Host: github.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "q=1" + ,.fragment= "narf" + ,.request_path= "/δ¶/δt/pope" + ,.request_url= "/δ¶/δt/pope?q=1#narf" + ,.num_headers= 1 + ,.headers= { {"Host", "github.com" } + } + ,.body= "" + } + +#define HOSTNAME_UNDERSCORE 28 +, {.name = "hostname underscore" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT home_0.netscape.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.query_string= "" + ,.fragment= "" + ,.request_path= "" + ,.request_url= "home_0.netscape.com:443" + ,.num_headers= 2 + ,.upgrade="" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + } + ,.body= "" + } +#endif /* !HTTP_PARSER_STRICT */ + +/* see https://github.com/ry/http-parser/issues/47 */ +#define EAT_TRAILING_CRLF_NO_CONNECTION_CLOSE 29 +, {.name = "eat CRLF between requests, no \"Connection: close\" header" + ,.raw= "POST / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Content-Type: application/x-www-form-urlencoded\r\n" + "Content-Length: 4\r\n" + "\r\n" + "q=42\r\n" /* note the trailing CRLF */ + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 3 + ,.upgrade= 0 + ,.headers= { { "Host", "www.example.com" } + , { "Content-Type", "application/x-www-form-urlencoded" } + , { "Content-Length", "4" } + } + ,.body= "q=42" + } + +/* see https://github.com/ry/http-parser/issues/47 */ +#define EAT_TRAILING_CRLF_WITH_CONNECTION_CLOSE 30 +, {.name = "eat CRLF between requests even if \"Connection: close\" is set" + ,.raw= "POST / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Content-Type: application/x-www-form-urlencoded\r\n" + "Content-Length: 4\r\n" + "Connection: close\r\n" + "\r\n" + "q=42\r\n" /* note the trailing CRLF */ + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE /* input buffer isn't empty when on_message_complete is called */ + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 4 + ,.upgrade= 0 + ,.headers= { { "Host", "www.example.com" } + , { "Content-Type", "application/x-www-form-urlencoded" } + , { "Content-Length", "4" } + , { "Connection", "close" } + } + ,.body= "q=42" + } + +#define PURGE_REQ 31 +, {.name = "PURGE request" + ,.type= HTTP_REQUEST + ,.raw= "PURGE /file.txt HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_PURGE + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/file.txt" + ,.request_url= "/file.txt" + ,.num_headers= 1 + ,.headers= { { "Host", "www.example.com" } } + ,.body= "" + } + +#define SEARCH_REQ 32 +, {.name = "SEARCH request" + ,.type= HTTP_REQUEST + ,.raw= "SEARCH / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_SEARCH + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 1 + ,.headers= { { "Host", "www.example.com" } } + ,.body= "" + } + +#define PROXY_WITH_BASIC_AUTH 33 +, {.name= "host:port and basic_auth" + ,.type= HTTP_REQUEST + ,.raw= "GET http://a%12:b!&*$@hypnotoad.org:1234/toto HTTP/1.1\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.fragment= "" + ,.request_path= "/toto" + ,.request_url= "http://a%12:b!&*$@hypnotoad.org:1234/toto" + ,.host= "hypnotoad.org" + ,.userinfo= "a%12:b!&*$" + ,.port= 1234 + ,.num_headers= 0 + ,.headers= { } + ,.body= "" + } + +#define LINE_FOLDING_IN_HEADER_WITH_LF 34 +, {.name= "line folding in header value" + ,.type= HTTP_REQUEST + ,.raw= "GET / HTTP/1.1\n" + "Line1: abc\n" + "\tdef\n" + " ghi\n" + "\t\tjkl\n" + " mno \n" + "\t \tqrs\n" + "Line2: \t line2\t\n" + "Line3:\n" + " line3\n" + "Line4: \n" + " \n" + "Connection:\n" + " close\n" + "\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 5 + ,.headers= { { "Line1", "abc\tdef ghi\t\tjkl mno \t \tqrs" } + , { "Line2", "line2\t" } + , { "Line3", "line3" } + , { "Line4", "" } + , { "Connection", "close" }, + } + ,.body= "" + } + +#define CONNECTION_MULTI 35 +, {.name = "multiple connection header values with folding" + ,.type= HTTP_REQUEST + ,.raw= "GET /demo HTTP/1.1\r\n" + "Host: example.com\r\n" + "Connection: Something,\r\n" + " Upgrade, ,Keep-Alive\r\n" + "Sec-WebSocket-Key2: 12998 5 Y3 1 .P00\r\n" + "Sec-WebSocket-Protocol: sample\r\n" + "Upgrade: WebSocket\r\n" + "Sec-WebSocket-Key1: 4 @1 46546xW%0l 1 5\r\n" + "Origin: http://example.com\r\n" + "\r\n" + "Hot diggity dogg" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 7 + ,.upgrade="Hot diggity dogg" + ,.headers= { { "Host", "example.com" } + , { "Connection", "Something, Upgrade, ,Keep-Alive" } + , { "Sec-WebSocket-Key2", "12998 5 Y3 1 .P00" } + , { "Sec-WebSocket-Protocol", "sample" } + , { "Upgrade", "WebSocket" } + , { "Sec-WebSocket-Key1", "4 @1 46546xW%0l 1 5" } + , { "Origin", "http://example.com" } + } + ,.body= "" + } + +#define CONNECTION_MULTI_LWS 36 +, {.name = "multiple connection header values with folding and lws" + ,.type= HTTP_REQUEST + ,.raw= "GET /demo HTTP/1.1\r\n" + "Connection: keep-alive, upgrade\r\n" + "Upgrade: WebSocket\r\n" + "\r\n" + "Hot diggity dogg" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 2 + ,.upgrade="Hot diggity dogg" + ,.headers= { { "Connection", "keep-alive, upgrade" } + , { "Upgrade", "WebSocket" } + } + ,.body= "" + } + +#define CONNECTION_MULTI_LWS_CRLF 37 +, {.name = "multiple connection header values with folding and lws" + ,.type= HTTP_REQUEST + ,.raw= "GET /demo HTTP/1.1\r\n" + "Connection: keep-alive, \r\n upgrade\r\n" + "Upgrade: WebSocket\r\n" + "\r\n" + "Hot diggity dogg" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_GET + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 2 + ,.upgrade="Hot diggity dogg" + ,.headers= { { "Connection", "keep-alive, upgrade" } + , { "Upgrade", "WebSocket" } + } + ,.body= "" + } + +#define UPGRADE_POST_REQUEST 38 +, {.name = "upgrade post request" + ,.type= HTTP_REQUEST + ,.raw= "POST /demo HTTP/1.1\r\n" + "Host: example.com\r\n" + "Connection: Upgrade\r\n" + "Upgrade: HTTP/2.0\r\n" + "Content-Length: 15\r\n" + "\r\n" + "sweet post body" + "Hot diggity dogg" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.request_path= "/demo" + ,.request_url= "/demo" + ,.num_headers= 4 + ,.upgrade="Hot diggity dogg" + ,.headers= { { "Host", "example.com" } + , { "Connection", "Upgrade" } + , { "Upgrade", "HTTP/2.0" } + , { "Content-Length", "15" } + } + ,.body= "sweet post body" + } + +#define CONNECT_WITH_BODY_REQUEST 39 +, {.name = "connect with body request" + ,.type= HTTP_REQUEST + ,.raw= "CONNECT foo.bar.com:443 HTTP/1.0\r\n" + "User-agent: Mozilla/1.1N\r\n" + "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n" + "Content-Length: 10\r\n" + "\r\n" + "blarfcicle" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_CONNECT + ,.request_url= "foo.bar.com:443" + ,.num_headers= 3 + ,.upgrade="blarfcicle" + ,.headers= { { "User-agent", "Mozilla/1.1N" } + , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" } + , { "Content-Length", "10" } + } + ,.body= "" + } + +/* Examples from the Internet draft for LINK/UNLINK methods: + * https://tools.ietf.org/id/draft-snell-link-method-01.html#rfc.section.5 + */ + +#define LINK_REQUEST 40 +, {.name = "link request" + ,.type= HTTP_REQUEST + ,.raw= "LINK /images/my_dog.jpg HTTP/1.1\r\n" + "Host: example.com\r\n" + "Link: <http://example.com/profiles/joe>; rel=\"tag\"\r\n" + "Link: <http://example.com/profiles/sally>; rel=\"tag\"\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_LINK + ,.request_path= "/images/my_dog.jpg" + ,.request_url= "/images/my_dog.jpg" + ,.query_string= "" + ,.fragment= "" + ,.num_headers= 3 + ,.headers= { { "Host", "example.com" } + , { "Link", "<http://example.com/profiles/joe>; rel=\"tag\"" } + , { "Link", "<http://example.com/profiles/sally>; rel=\"tag\"" } + } + ,.body= "" + } + +#define UNLINK_REQUEST 41 +, {.name = "unlink request" + ,.type= HTTP_REQUEST + ,.raw= "UNLINK /images/my_dog.jpg HTTP/1.1\r\n" + "Host: example.com\r\n" + "Link: <http://example.com/profiles/sally>; rel=\"tag\"\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_UNLINK + ,.request_path= "/images/my_dog.jpg" + ,.request_url= "/images/my_dog.jpg" + ,.query_string= "" + ,.fragment= "" + ,.num_headers= 2 + ,.headers= { { "Host", "example.com" } + , { "Link", "<http://example.com/profiles/sally>; rel=\"tag\"" } + } + ,.body= "" + } + +#define SOURCE_REQUEST 42 +, {.name = "source request" + ,.type= HTTP_REQUEST + ,.raw= "SOURCE /music/sweet/music HTTP/1.1\r\n" + "Host: example.com\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_SOURCE + ,.request_path= "/music/sweet/music" + ,.request_url= "/music/sweet/music" + ,.query_string= "" + ,.fragment= "" + ,.num_headers= 1 + ,.headers= { { "Host", "example.com" } } + ,.body= "" + } + +#define SOURCE_ICE_REQUEST 43 +, {.name = "source request" + ,.type= HTTP_REQUEST + ,.raw= "SOURCE /music/sweet/music ICE/1.0\r\n" + "Host: example.com\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.method= HTTP_SOURCE + ,.request_path= "/music/sweet/music" + ,.request_url= "/music/sweet/music" + ,.query_string= "" + ,.fragment= "" + ,.num_headers= 1 + ,.headers= { { "Host", "example.com" } } + ,.body= "" + } + +#define POST_MULTI_TE_LAST_CHUNKED 44 +, {.name= "post - multi coding transfer-encoding chunked body" + ,.type= HTTP_REQUEST + ,.raw= "POST / HTTP/1.1\r\n" + "Transfer-Encoding: deflate, chunked\r\n" + "\r\n" + "1e\r\nall your base are belong to us\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding" , "deflate, chunked" } + } + ,.body= "all your base are belong to us" + ,.num_chunks_complete= 2 + ,.chunk_lengths= { 0x1e } + } + +#define POST_MULTI_LINE_TE_LAST_CHUNKED 45 +, {.name= "post - multi line coding transfer-encoding chunked body" + ,.type= HTTP_REQUEST + ,.raw= "POST / HTTP/1.1\r\n" + "Transfer-Encoding: deflate,\r\n" + " chunked\r\n" + "\r\n" + "1e\r\nall your base are belong to us\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.method= HTTP_POST + ,.query_string= "" + ,.fragment= "" + ,.request_path= "/" + ,.request_url= "/" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding" , "deflate, chunked" } + } + ,.body= "all your base are belong to us" + ,.num_chunks_complete= 2 + ,.chunk_lengths= { 0x1e } + } +}; + +/* * R E S P O N S E S * */ +const struct message responses[] = +#define GOOGLE_301 0 +{ {.name= "google 301" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301 Moved Permanently\r\n" + "Location: http://www.google.com/\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Date: Sun, 26 Apr 2009 11:11:49 GMT\r\n" + "Expires: Tue, 26 May 2009 11:11:49 GMT\r\n" + "X-$PrototypeBI-Version: 1.6.0.3\r\n" /* $ char in header field */ + "Cache-Control: public, max-age=2592000\r\n" + "Server: gws\r\n" + "Content-Length: 219 \r\n" + "\r\n" + "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n" + "<TITLE>301 Moved</TITLE></HEAD><BODY>\n" + "<H1>301 Moved</H1>\n" + "The document has moved\n" + "<A HREF=\"http://www.google.com/\">here</A>.\r\n" + "</BODY></HTML>\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.response_status= "Moved Permanently" + ,.num_headers= 8 + ,.headers= + { { "Location", "http://www.google.com/" } + , { "Content-Type", "text/html; charset=UTF-8" } + , { "Date", "Sun, 26 Apr 2009 11:11:49 GMT" } + , { "Expires", "Tue, 26 May 2009 11:11:49 GMT" } + , { "X-$PrototypeBI-Version", "1.6.0.3" } + , { "Cache-Control", "public, max-age=2592000" } + , { "Server", "gws" } + , { "Content-Length", "219 " } + } + ,.body= "<HTML><HEAD><meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n" + "<TITLE>301 Moved</TITLE></HEAD><BODY>\n" + "<H1>301 Moved</H1>\n" + "The document has moved\n" + "<A HREF=\"http://www.google.com/\">here</A>.\r\n" + "</BODY></HTML>\r\n" + } + +#define NO_CONTENT_LENGTH_RESPONSE 1 +/* The client should wait for the server's EOF. That is, when content-length + * is not specified, and "Connection: close", the end of body is specified + * by the EOF. + * Compare with APACHEBENCH_GET + */ +, {.name= "no content-length response" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Date: Tue, 04 Aug 2009 07:59:32 GMT\r\n" + "Server: Apache\r\n" + "X-Powered-By: Servlet/2.5 JSP/2.1\r\n" + "Content-Type: text/xml; charset=utf-8\r\n" + "Connection: close\r\n" + "\r\n" + "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<SOAP-ENV:Envelope xmlns:SOAP-ENV=\"http://schemas.xmlsoap.org/soap/envelope/\">\n" + " <SOAP-ENV:Body>\n" + " <SOAP-ENV:Fault>\n" + " <faultcode>SOAP-ENV:Client</faultcode>\n" + " <faultstring>Client Error</faultstring>\n" + " </SOAP-ENV:Fault>\n" + " </SOAP-ENV:Body>\n" + "</SOAP-ENV:Envelope>" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 5 + ,.headers= + { { "Date", "Tue, 04 Aug 2009 07:59:32 GMT" } + , { "Server", "Apache" } + , { "X-Powered-By", "Servlet/2.5 JSP/2.1" } + , { "Content-Type", "text/xml; charset=utf-8" } + , { "Connection", "close" } + } + ,.body= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<SOAP-ENV:Envelope xmlns:SOAP-ENV=\"http://schemas.xmlsoap.org/soap/envelope/\">\n" + " <SOAP-ENV:Body>\n" + " <SOAP-ENV:Fault>\n" + " <faultcode>SOAP-ENV:Client</faultcode>\n" + " <faultstring>Client Error</faultstring>\n" + " </SOAP-ENV:Fault>\n" + " </SOAP-ENV:Body>\n" + "</SOAP-ENV:Envelope>" + } + +#define NO_HEADERS_NO_BODY_404 2 +, {.name= "404 no headers no body" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 404 Not Found\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 404 + ,.response_status= "Not Found" + ,.num_headers= 0 + ,.headers= {} + ,.body_size= 0 + ,.body= "" + } + +#define NO_REASON_PHRASE 3 +, {.name= "301 no response phrase" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301\r\n\r\n" + ,.should_keep_alive = FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.response_status= "" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define TRAILING_SPACE_ON_CHUNKED_BODY 4 +, {.name="200 trailing space on chunked body" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "25 \r\n" + "This is the data in the first chunk\r\n" + "\r\n" + "1C\r\n" + "and this is the second one\r\n" + "\r\n" + "0 \r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 2 + ,.headers= + { {"Content-Type", "text/plain" } + , {"Transfer-Encoding", "chunked" } + } + ,.body_size = 37+28 + ,.body = + "This is the data in the first chunk\r\n" + "and this is the second one\r\n" + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 0x25, 0x1c } + } + +#define NO_CARRIAGE_RET 5 +, {.name="no carriage ret" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\n" + "Content-Type: text/html; charset=utf-8\n" + "Connection: close\n" + "\n" + "these headers are from http://news.ycombinator.com/" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 2 + ,.headers= + { {"Content-Type", "text/html; charset=utf-8" } + , {"Connection", "close" } + } + ,.body= "these headers are from http://news.ycombinator.com/" + } + +#define PROXY_CONNECTION 6 +, {.name="proxy connection" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Content-Length: 11\r\n" + "Proxy-Connection: close\r\n" + "Date: Thu, 31 Dec 2009 20:55:48 +0000\r\n" + "\r\n" + "hello world" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 4 + ,.headers= + { {"Content-Type", "text/html; charset=UTF-8" } + , {"Content-Length", "11" } + , {"Proxy-Connection", "close" } + , {"Date", "Thu, 31 Dec 2009 20:55:48 +0000"} + } + ,.body= "hello world" + } + +#define UNDERSTORE_HEADER_KEY 7 + // shown by + // curl -o /dev/null -v "http://ad.doubleclick.net/pfadx/DARTSHELLCONFIGXML;dcmt=text/xml;" +, {.name="underscore header key" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Server: DCLK-AdSvr\r\n" + "Content-Type: text/xml\r\n" + "Content-Length: 0\r\n" + "DCLK_imp: v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o\r\n\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 4 + ,.headers= + { {"Server", "DCLK-AdSvr" } + , {"Content-Type", "text/xml" } + , {"Content-Length", "0" } + , {"DCLK_imp", "v7;x;114750856;0-0;0;17820020;0/0;21603567/21621457/1;;~okv=;dcmt=text/xml;;~cs=o" } + } + ,.body= "" + } + +#define BONJOUR_MADAME_FR 8 +/* The client should not merge two headers fields when the first one doesn't + * have a value. + */ +, {.name= "bonjourmadame.fr" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 301 Moved Permanently\r\n" + "Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n" + "Server: Apache/2.2.3 (Red Hat)\r\n" + "Cache-Control: public\r\n" + "Pragma: \r\n" + "Location: http://www.bonjourmadame.fr/\r\n" + "Vary: Accept-Encoding\r\n" + "Content-Length: 0\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 301 + ,.response_status= "Moved Permanently" + ,.num_headers= 9 + ,.headers= + { { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" } + , { "Server", "Apache/2.2.3 (Red Hat)" } + , { "Cache-Control", "public" } + , { "Pragma", "" } + , { "Location", "http://www.bonjourmadame.fr/" } + , { "Vary", "Accept-Encoding" } + , { "Content-Length", "0" } + , { "Content-Type", "text/html; charset=UTF-8" } + , { "Connection", "keep-alive" } + } + ,.body= "" + } + +#define RES_FIELD_UNDERSCORE 9 +/* Should handle spaces in header fields */ +, {.name= "field underscore" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Date: Tue, 28 Sep 2010 01:14:13 GMT\r\n" + "Server: Apache\r\n" + "Cache-Control: no-cache, must-revalidate\r\n" + "Expires: Mon, 26 Jul 1997 05:00:00 GMT\r\n" + ".et-Cookie: PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com\r\n" + "Vary: Accept-Encoding\r\n" + "_eep-Alive: timeout=45\r\n" /* semantic value ignored */ + "_onnection: Keep-Alive\r\n" /* semantic value ignored */ + "Transfer-Encoding: chunked\r\n" + "Content-Type: text/html\r\n" + "Connection: close\r\n" + "\r\n" + "0\r\n\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 11 + ,.headers= + { { "Date", "Tue, 28 Sep 2010 01:14:13 GMT" } + , { "Server", "Apache" } + , { "Cache-Control", "no-cache, must-revalidate" } + , { "Expires", "Mon, 26 Jul 1997 05:00:00 GMT" } + , { ".et-Cookie", "PlaxoCS=1274804622353690521; path=/; domain=.plaxo.com" } + , { "Vary", "Accept-Encoding" } + , { "_eep-Alive", "timeout=45" } + , { "_onnection", "Keep-Alive" } + , { "Transfer-Encoding", "chunked" } + , { "Content-Type", "text/html" } + , { "Connection", "close" } + } + ,.body= "" + ,.num_chunks_complete= 1 + ,.chunk_lengths= {} + } + +#define NON_ASCII_IN_STATUS_LINE 10 +/* Should handle non-ASCII in status line */ +, {.name= "non-ASCII in status line" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 500 Oriëntatieprobleem\r\n" + "Date: Fri, 5 Nov 2010 23:07:12 GMT+2\r\n" + "Content-Length: 0\r\n" + "Connection: close\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 500 + ,.response_status= "Oriëntatieprobleem" + ,.num_headers= 3 + ,.headers= + { { "Date", "Fri, 5 Nov 2010 23:07:12 GMT+2" } + , { "Content-Length", "0" } + , { "Connection", "close" } + } + ,.body= "" + } + +#define HTTP_VERSION_0_9 11 +/* Should handle HTTP/0.9 */ +, {.name= "http version 0.9" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/0.9 200 OK\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 0 + ,.http_minor= 9 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 0 + ,.headers= + {} + ,.body= "" + } + +#define NO_CONTENT_LENGTH_NO_TRANSFER_ENCODING_RESPONSE 12 +/* The client should wait for the server's EOF. That is, when neither + * content-length nor transfer-encoding is specified, the end of body + * is specified by the EOF. + */ +, {.name= "neither content-length nor transfer-encoding response" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "\r\n" + "hello world" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 1 + ,.headers= + { { "Content-Type", "text/plain" } + } + ,.body= "hello world" + } + +#define NO_BODY_HTTP10_KA_200 13 +, {.name= "HTTP/1.0 with keep-alive and EOF-terminated 200 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 200 OK\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 1 + ,.headers= + { { "Connection", "keep-alive" } + } + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP10_KA_204 14 +, {.name= "HTTP/1.0 with keep-alive and a 204 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.0 204 No content\r\n" + "Connection: keep-alive\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 204 + ,.response_status= "No content" + ,.num_headers= 1 + ,.headers= + { { "Connection", "keep-alive" } + } + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_KA_200 15 +, {.name= "HTTP/1.1 with an EOF-terminated 200 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 0 + ,.headers={} + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_KA_204 16 +, {.name= "HTTP/1.1 with a 204 status" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 204 No content\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 204 + ,.response_status= "No content" + ,.num_headers= 0 + ,.headers={} + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_NOKA_204 17 +, {.name= "HTTP/1.1 with a 204 status and keep-alive disabled" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 204 No content\r\n" + "Connection: close\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 204 + ,.response_status= "No content" + ,.num_headers= 1 + ,.headers= + { { "Connection", "close" } + } + ,.body_size= 0 + ,.body= "" + } + +#define NO_BODY_HTTP11_KA_CHUNKED_200 18 +, {.name= "HTTP/1.1 with chunked endocing and a 200 response" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 1 + ,.headers= + { { "Transfer-Encoding", "chunked" } + } + ,.body_size= 0 + ,.body= "" + ,.num_chunks_complete= 1 + } + +#if !HTTP_PARSER_STRICT +#define SPACE_IN_FIELD_RES 19 +/* Should handle spaces in header fields */ +, {.name= "field space" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Server: Microsoft-IIS/6.0\r\n" + "X-Powered-By: ASP.NET\r\n" + "en-US Content-Type: text/xml\r\n" /* this is the problem */ + "Content-Type: text/xml\r\n" + "Content-Length: 16\r\n" + "Date: Fri, 23 Jul 2010 18:45:38 GMT\r\n" + "Connection: keep-alive\r\n" + "\r\n" + "<xml>hello</xml>" /* fake body */ + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 7 + ,.headers= + { { "Server", "Microsoft-IIS/6.0" } + , { "X-Powered-By", "ASP.NET" } + , { "en-US Content-Type", "text/xml" } + , { "Content-Type", "text/xml" } + , { "Content-Length", "16" } + , { "Date", "Fri, 23 Jul 2010 18:45:38 GMT" } + , { "Connection", "keep-alive" } + } + ,.body= "<xml>hello</xml>" + } +#endif /* !HTTP_PARSER_STRICT */ + +#define AMAZON_COM 20 +, {.name= "amazon.com" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 301 MovedPermanently\r\n" + "Date: Wed, 15 May 2013 17:06:33 GMT\r\n" + "Server: Server\r\n" + "x-amz-id-1: 0GPHKXSJQ826RK7GZEB2\r\n" + "p3p: policyref=\"http://www.amazon.com/w3c/p3p.xml\",CP=\"CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC \"\r\n" + "x-amz-id-2: STN69VZxIFSz9YJLbz1GDbxpbjG6Qjmmq5E3DxRhOUw+Et0p4hr7c/Q8qNcx4oAD\r\n" + "Location: http://www.amazon.com/Dan-Brown/e/B000AP9DSU/ref=s9_pop_gw_al1?_encoding=UTF8&refinementId=618073011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0SHYY5BZXN3KR20BNFAY&pf_rd_t=101&pf_rd_p=1263340922&pf_rd_i=507846\r\n" + "Vary: Accept-Encoding,User-Agent\r\n" + "Content-Type: text/html; charset=ISO-8859-1\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "1\r\n" + "\n\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 301 + ,.response_status= "MovedPermanently" + ,.num_headers= 9 + ,.headers= { { "Date", "Wed, 15 May 2013 17:06:33 GMT" } + , { "Server", "Server" } + , { "x-amz-id-1", "0GPHKXSJQ826RK7GZEB2" } + , { "p3p", "policyref=\"http://www.amazon.com/w3c/p3p.xml\",CP=\"CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC \"" } + , { "x-amz-id-2", "STN69VZxIFSz9YJLbz1GDbxpbjG6Qjmmq5E3DxRhOUw+Et0p4hr7c/Q8qNcx4oAD" } + , { "Location", "http://www.amazon.com/Dan-Brown/e/B000AP9DSU/ref=s9_pop_gw_al1?_encoding=UTF8&refinementId=618073011&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=center-2&pf_rd_r=0SHYY5BZXN3KR20BNFAY&pf_rd_t=101&pf_rd_p=1263340922&pf_rd_i=507846" } + , { "Vary", "Accept-Encoding,User-Agent" } + , { "Content-Type", "text/html; charset=ISO-8859-1" } + , { "Transfer-Encoding", "chunked" } + } + ,.body= "\n" + ,.num_chunks_complete= 2 + ,.chunk_lengths= { 1 } + } + +#define EMPTY_REASON_PHRASE_AFTER_SPACE 21 +, {.name= "empty reason phrase after space" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 \r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "" + ,.num_headers= 0 + ,.headers= {} + ,.body= "" + } + +#define CONTENT_LENGTH_X 22 +, {.name= "Content-Length-X" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Content-Length-X: 0\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "2\r\n" + "OK\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 2 + ,.headers= { { "Content-Length-X", "0" } + , { "Transfer-Encoding", "chunked" } + } + ,.body= "OK" + ,.num_chunks_complete= 2 + ,.chunk_lengths= { 2 } + } + +#define HTTP_101_RESPONSE_WITH_UPGRADE_HEADER 23 +, {.name= "HTTP 101 response with Upgrade header" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 101 Switching Protocols\r\n" + "Connection: upgrade\r\n" + "Upgrade: h2c\r\n" + "\r\n" + "proto" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 101 + ,.response_status= "Switching Protocols" + ,.upgrade= "proto" + ,.num_headers= 2 + ,.headers= + { { "Connection", "upgrade" } + , { "Upgrade", "h2c" } + } + } + +#define HTTP_101_RESPONSE_WITH_UPGRADE_HEADER_AND_CONTENT_LENGTH 24 +, {.name= "HTTP 101 response with Upgrade and Content-Length header" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 101 Switching Protocols\r\n" + "Connection: upgrade\r\n" + "Upgrade: h2c\r\n" + "Content-Length: 4\r\n" + "\r\n" + "body" + "proto" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 101 + ,.response_status= "Switching Protocols" + ,.body= "body" + ,.upgrade= "proto" + ,.num_headers= 3 + ,.headers= + { { "Connection", "upgrade" } + , { "Upgrade", "h2c" } + , { "Content-Length", "4" } + } + } + +#define HTTP_101_RESPONSE_WITH_UPGRADE_HEADER_AND_TRANSFER_ENCODING 25 +, {.name= "HTTP 101 response with Upgrade and Transfer-Encoding header" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 101 Switching Protocols\r\n" + "Connection: upgrade\r\n" + "Upgrade: h2c\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "2\r\n" + "bo\r\n" + "2\r\n" + "dy\r\n" + "0\r\n" + "\r\n" + "proto" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 101 + ,.response_status= "Switching Protocols" + ,.body= "body" + ,.upgrade= "proto" + ,.num_headers= 3 + ,.headers= + { { "Connection", "upgrade" } + , { "Upgrade", "h2c" } + , { "Transfer-Encoding", "chunked" } + } + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 2, 2 } + } + +#define HTTP_200_RESPONSE_WITH_UPGRADE_HEADER 26 +, {.name= "HTTP 200 response with Upgrade header" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Connection: upgrade\r\n" + "Upgrade: h2c\r\n" + "\r\n" + "body" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.body= "body" + ,.upgrade= NULL + ,.num_headers= 2 + ,.headers= + { { "Connection", "upgrade" } + , { "Upgrade", "h2c" } + } + } + +#define HTTP_200_RESPONSE_WITH_UPGRADE_HEADER_AND_CONTENT_LENGTH 27 +, {.name= "HTTP 200 response with Upgrade and Content-Length header" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Connection: upgrade\r\n" + "Upgrade: h2c\r\n" + "Content-Length: 4\r\n" + "\r\n" + "body" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 3 + ,.body= "body" + ,.upgrade= NULL + ,.headers= + { { "Connection", "upgrade" } + , { "Upgrade", "h2c" } + , { "Content-Length", "4" } + } + } + +#define HTTP_200_RESPONSE_WITH_UPGRADE_HEADER_AND_TRANSFER_ENCODING 28 +, {.name= "HTTP 200 response with Upgrade and Transfer-Encoding header" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Connection: upgrade\r\n" + "Upgrade: h2c\r\n" + "Transfer-Encoding: chunked\r\n" + "\r\n" + "2\r\n" + "bo\r\n" + "2\r\n" + "dy\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= TRUE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 3 + ,.body= "body" + ,.upgrade= NULL + ,.headers= + { { "Connection", "upgrade" } + , { "Upgrade", "h2c" } + , { "Transfer-Encoding", "chunked" } + } + ,.num_chunks_complete= 3 + ,.chunk_lengths= { 2, 2 } + } +#define HTTP_200_MULTI_TE_NOT_LAST_CHUNKED 29 +, {.name= "HTTP 200 response with `chunked` being *not last* Transfer-Encoding" + ,.type= HTTP_RESPONSE + ,.raw= "HTTP/1.1 200 OK\r\n" + "Transfer-Encoding: chunked, identity\r\n" + "\r\n" + "2\r\n" + "OK\r\n" + "0\r\n" + "\r\n" + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= TRUE + ,.http_major= 1 + ,.http_minor= 1 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 1 + ,.headers= { { "Transfer-Encoding", "chunked, identity" } + } + ,.body= "2\r\nOK\r\n0\r\n\r\n" + ,.num_chunks_complete= 0 + } +}; + +/* strnlen() is a POSIX.2008 addition. Can't rely on it being available so + * define it ourselves. + */ +size_t +strnlen(const char *s, size_t maxlen) +{ + const char *p; + + p = memchr(s, '\0', maxlen); + if (p == NULL) + return maxlen; + + return p - s; +} + +size_t +strlncat(char *dst, size_t len, const char *src, size_t n) +{ + size_t slen; + size_t dlen; + size_t rlen; + size_t ncpy; + + slen = strnlen(src, n); + dlen = strnlen(dst, len); + + if (dlen < len) { + rlen = len - dlen; + ncpy = slen < rlen ? slen : (rlen - 1); + memcpy(dst + dlen, src, ncpy); + dst[dlen + ncpy] = '\0'; + } + + assert(len > slen + dlen); + return slen + dlen; +} + +size_t +strlncpy(char *dst, size_t len, const char *src, size_t n) +{ + size_t slen; + size_t ncpy; + + slen = strnlen(src, n); + + if (len > 0) { + ncpy = slen < len ? slen : (len - 1); + memcpy(dst, src, ncpy); + dst[ncpy] = '\0'; + } + + assert(len > slen); + return slen; +} + +int +request_url_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == &parser); + strlncat(messages[num_messages].request_url, + sizeof(messages[num_messages].request_url), + buf, + len); + return 0; +} + +int +header_field_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == &parser); + struct message *m = &messages[num_messages]; + + if (m->last_header_element != FIELD) + m->num_headers++; + + strlncat(m->headers[m->num_headers-1][0], + sizeof(m->headers[m->num_headers-1][0]), + buf, + len); + + m->last_header_element = FIELD; + + return 0; +} + +int +header_value_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == &parser); + struct message *m = &messages[num_messages]; + + strlncat(m->headers[m->num_headers-1][1], + sizeof(m->headers[m->num_headers-1][1]), + buf, + len); + + m->last_header_element = VALUE; + + return 0; +} + +void +check_body_is_final (const http_parser *p) +{ + if (messages[num_messages].body_is_final) { + fprintf(stderr, "\n\n *** Error http_body_is_final() should return 1 " + "on last on_body callback call " + "but it doesn't! ***\n\n"); + assert(0); + abort(); + } + messages[num_messages].body_is_final = http_body_is_final(p); +} + +int +body_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == &parser); + strlncat(messages[num_messages].body, + sizeof(messages[num_messages].body), + buf, + len); + messages[num_messages].body_size += len; + check_body_is_final(p); + // printf("body_cb: '%s'\n", requests[num_messages].body); + return 0; +} + +int +count_body_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == &parser); + assert(buf); + messages[num_messages].body_size += len; + check_body_is_final(p); + return 0; +} + +int +message_begin_cb (http_parser *p) +{ + assert(p == &parser); + assert(!messages[num_messages].message_begin_cb_called); + messages[num_messages].message_begin_cb_called = TRUE; + return 0; +} + +int +headers_complete_cb (http_parser *p) +{ + assert(p == &parser); + messages[num_messages].method = parser.method; + messages[num_messages].status_code = parser.status_code; + messages[num_messages].http_major = parser.http_major; + messages[num_messages].http_minor = parser.http_minor; + messages[num_messages].headers_complete_cb_called = TRUE; + messages[num_messages].should_keep_alive = http_should_keep_alive(&parser); + return 0; +} + +int +message_complete_cb (http_parser *p) +{ + assert(p == &parser); + if (messages[num_messages].should_keep_alive != + http_should_keep_alive(&parser)) + { + fprintf(stderr, "\n\n *** Error http_should_keep_alive() should have same " + "value in both on_message_complete and on_headers_complete " + "but it doesn't! ***\n\n"); + assert(0); + abort(); + } + + if (messages[num_messages].body_size && + http_body_is_final(p) && + !messages[num_messages].body_is_final) + { + fprintf(stderr, "\n\n *** Error http_body_is_final() should return 1 " + "on last on_body callback call " + "but it doesn't! ***\n\n"); + assert(0); + abort(); + } + + messages[num_messages].message_complete_cb_called = TRUE; + + messages[num_messages].message_complete_on_eof = currently_parsing_eof; + + num_messages++; + return 0; +} + +int +response_status_cb (http_parser *p, const char *buf, size_t len) +{ + assert(p == &parser); + + messages[num_messages].status_cb_called = TRUE; + + strlncat(messages[num_messages].response_status, + sizeof(messages[num_messages].response_status), + buf, + len); + return 0; +} + +int +chunk_header_cb (http_parser *p) +{ + assert(p == &parser); + int chunk_idx = messages[num_messages].num_chunks; + messages[num_messages].num_chunks++; + if (chunk_idx < MAX_CHUNKS) { + messages[num_messages].chunk_lengths[chunk_idx] = p->content_length; + } + + return 0; +} + +int +chunk_complete_cb (http_parser *p) +{ + assert(p == &parser); + + /* Here we want to verify that each chunk_header_cb is matched by a + * chunk_complete_cb, so not only should the total number of calls to + * both callbacks be the same, but they also should be interleaved + * properly */ + assert(messages[num_messages].num_chunks == + messages[num_messages].num_chunks_complete + 1); + + messages[num_messages].num_chunks_complete++; + return 0; +} + +/* These dontcall_* callbacks exist so that we can verify that when we're + * paused, no additional callbacks are invoked */ +int +dontcall_message_begin_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_message_begin() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_header_field_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_header_field() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_header_value_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_header_value() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_request_url_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_request_url() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_body_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_body_cb() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_headers_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_headers_complete() called on paused " + "parser ***\n\n"); + abort(); +} + +int +dontcall_message_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_message_complete() called on paused " + "parser ***\n\n"); + abort(); +} + +int +dontcall_response_status_cb (http_parser *p, const char *buf, size_t len) +{ + if (p || buf || len) { } // gcc + fprintf(stderr, "\n\n*** on_status() called on paused parser ***\n\n"); + abort(); +} + +int +dontcall_chunk_header_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_chunk_header() called on paused parser ***\n\n"); + exit(1); +} + +int +dontcall_chunk_complete_cb (http_parser *p) +{ + if (p) { } // gcc + fprintf(stderr, "\n\n*** on_chunk_complete() " + "called on paused parser ***\n\n"); + exit(1); +} + +static http_parser_settings settings_dontcall = + {.on_message_begin = dontcall_message_begin_cb + ,.on_header_field = dontcall_header_field_cb + ,.on_header_value = dontcall_header_value_cb + ,.on_url = dontcall_request_url_cb + ,.on_status = dontcall_response_status_cb + ,.on_body = dontcall_body_cb + ,.on_headers_complete = dontcall_headers_complete_cb + ,.on_message_complete = dontcall_message_complete_cb + ,.on_chunk_header = dontcall_chunk_header_cb + ,.on_chunk_complete = dontcall_chunk_complete_cb + }; + +/* These pause_* callbacks always pause the parser and just invoke the regular + * callback that tracks content. Before returning, we overwrite the parser + * settings to point to the _dontcall variety so that we can verify that + * the pause actually did, you know, pause. */ +int +pause_message_begin_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return message_begin_cb(p); +} + +int +pause_header_field_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return header_field_cb(p, buf, len); +} + +int +pause_header_value_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return header_value_cb(p, buf, len); +} + +int +pause_request_url_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return request_url_cb(p, buf, len); +} + +int +pause_body_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return body_cb(p, buf, len); +} + +int +pause_headers_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return headers_complete_cb(p); +} + +int +pause_message_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return message_complete_cb(p); +} + +int +pause_response_status_cb (http_parser *p, const char *buf, size_t len) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return response_status_cb(p, buf, len); +} + +int +pause_chunk_header_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return chunk_header_cb(p); +} + +int +pause_chunk_complete_cb (http_parser *p) +{ + http_parser_pause(p, 1); + *current_pause_parser = settings_dontcall; + return chunk_complete_cb(p); +} + +int +connect_headers_complete_cb (http_parser *p) +{ + headers_complete_cb(p); + return 1; +} + +int +connect_message_complete_cb (http_parser *p) +{ + messages[num_messages].should_keep_alive = http_should_keep_alive(&parser); + return message_complete_cb(p); +} + +static http_parser_settings settings_pause = + {.on_message_begin = pause_message_begin_cb + ,.on_header_field = pause_header_field_cb + ,.on_header_value = pause_header_value_cb + ,.on_url = pause_request_url_cb + ,.on_status = pause_response_status_cb + ,.on_body = pause_body_cb + ,.on_headers_complete = pause_headers_complete_cb + ,.on_message_complete = pause_message_complete_cb + ,.on_chunk_header = pause_chunk_header_cb + ,.on_chunk_complete = pause_chunk_complete_cb + }; + +static http_parser_settings settings = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_url = request_url_cb + ,.on_status = response_status_cb + ,.on_body = body_cb + ,.on_headers_complete = headers_complete_cb + ,.on_message_complete = message_complete_cb + ,.on_chunk_header = chunk_header_cb + ,.on_chunk_complete = chunk_complete_cb + }; + +static http_parser_settings settings_count_body = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_url = request_url_cb + ,.on_status = response_status_cb + ,.on_body = count_body_cb + ,.on_headers_complete = headers_complete_cb + ,.on_message_complete = message_complete_cb + ,.on_chunk_header = chunk_header_cb + ,.on_chunk_complete = chunk_complete_cb + }; + +static http_parser_settings settings_connect = + {.on_message_begin = message_begin_cb + ,.on_header_field = header_field_cb + ,.on_header_value = header_value_cb + ,.on_url = request_url_cb + ,.on_status = response_status_cb + ,.on_body = dontcall_body_cb + ,.on_headers_complete = connect_headers_complete_cb + ,.on_message_complete = connect_message_complete_cb + ,.on_chunk_header = chunk_header_cb + ,.on_chunk_complete = chunk_complete_cb + }; + +static http_parser_settings settings_null = + {.on_message_begin = 0 + ,.on_header_field = 0 + ,.on_header_value = 0 + ,.on_url = 0 + ,.on_status = 0 + ,.on_body = 0 + ,.on_headers_complete = 0 + ,.on_message_complete = 0 + ,.on_chunk_header = 0 + ,.on_chunk_complete = 0 + }; + +void +parser_init (enum http_parser_type type) +{ + num_messages = 0; + http_parser_init(&parser, type); + memset(&messages, 0, sizeof messages); +} + +size_t parse (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(&parser, &settings, buf, len); + return nparsed; +} + +size_t parse_count_body (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(&parser, &settings_count_body, buf, len); + return nparsed; +} + +size_t parse_pause (const char *buf, size_t len) +{ + size_t nparsed; + http_parser_settings s = settings_pause; + + currently_parsing_eof = (len == 0); + current_pause_parser = &s; + nparsed = http_parser_execute(&parser, current_pause_parser, buf, len); + return nparsed; +} + +size_t parse_connect (const char *buf, size_t len) +{ + size_t nparsed; + currently_parsing_eof = (len == 0); + nparsed = http_parser_execute(&parser, &settings_connect, buf, len); + return nparsed; +} + +static inline int +check_str_eq (const struct message *m, + const char *prop, + const char *expected, + const char *found) { + if ((expected == NULL) != (found == NULL)) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected %s\n", (expected == NULL) ? "NULL" : expected); + printf(" found %s\n", (found == NULL) ? "NULL" : found); + return 0; + } + if (expected != NULL && 0 != strcmp(expected, found)) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected '%s'\n", expected); + printf(" found '%s'\n", found); + return 0; + } + return 1; +} + +static inline int +check_num_eq (const struct message *m, + const char *prop, + int expected, + int found) { + if (expected != found) { + printf("\n*** Error: %s in '%s' ***\n\n", prop, m->name); + printf("expected %d\n", expected); + printf(" found %d\n", found); + return 0; + } + return 1; +} + +#define MESSAGE_CHECK_STR_EQ(expected, found, prop) \ + if (!check_str_eq(expected, #prop, expected->prop, found->prop)) return 0 + +#define MESSAGE_CHECK_NUM_EQ(expected, found, prop) \ + if (!check_num_eq(expected, #prop, expected->prop, found->prop)) return 0 + +#define MESSAGE_CHECK_URL_EQ(u, expected, found, prop, fn) \ +do { \ + char ubuf[256]; \ + \ + if ((u)->field_set & (1 << (fn))) { \ + memcpy(ubuf, (found)->request_url + (u)->field_data[(fn)].off, \ + (u)->field_data[(fn)].len); \ + ubuf[(u)->field_data[(fn)].len] = '\0'; \ + } else { \ + ubuf[0] = '\0'; \ + } \ + \ + check_str_eq(expected, #prop, expected->prop, ubuf); \ +} while(0) + +int +message_eq (int index, int connect, const struct message *expected) +{ + int i; + struct message *m = &messages[index]; + + MESSAGE_CHECK_NUM_EQ(expected, m, http_major); + MESSAGE_CHECK_NUM_EQ(expected, m, http_minor); + + if (expected->type == HTTP_REQUEST) { + MESSAGE_CHECK_NUM_EQ(expected, m, method); + } else { + MESSAGE_CHECK_NUM_EQ(expected, m, status_code); + MESSAGE_CHECK_STR_EQ(expected, m, response_status); + assert(m->status_cb_called); + } + + if (!connect) { + MESSAGE_CHECK_NUM_EQ(expected, m, should_keep_alive); + MESSAGE_CHECK_NUM_EQ(expected, m, message_complete_on_eof); + } + + assert(m->message_begin_cb_called); + assert(m->headers_complete_cb_called); + assert(m->message_complete_cb_called); + + + MESSAGE_CHECK_STR_EQ(expected, m, request_url); + + /* Check URL components; we can't do this w/ CONNECT since it doesn't + * send us a well-formed URL. + */ + if (*m->request_url && m->method != HTTP_CONNECT) { + struct http_parser_url u; + + if (http_parser_parse_url(m->request_url, strlen(m->request_url), 0, &u)) { + fprintf(stderr, "\n\n*** failed to parse URL %s ***\n\n", + m->request_url); + abort(); + } + + if (expected->host) { + MESSAGE_CHECK_URL_EQ(&u, expected, m, host, UF_HOST); + } + + if (expected->userinfo) { + MESSAGE_CHECK_URL_EQ(&u, expected, m, userinfo, UF_USERINFO); + } + + m->port = (u.field_set & (1 << UF_PORT)) ? + u.port : 0; + + MESSAGE_CHECK_URL_EQ(&u, expected, m, query_string, UF_QUERY); + MESSAGE_CHECK_URL_EQ(&u, expected, m, fragment, UF_FRAGMENT); + MESSAGE_CHECK_URL_EQ(&u, expected, m, request_path, UF_PATH); + MESSAGE_CHECK_NUM_EQ(expected, m, port); + } + + if (connect) { + check_num_eq(m, "body_size", 0, m->body_size); + } else if (expected->body_size) { + MESSAGE_CHECK_NUM_EQ(expected, m, body_size); + } else { + MESSAGE_CHECK_STR_EQ(expected, m, body); + } + + if (connect) { + check_num_eq(m, "num_chunks_complete", 0, m->num_chunks_complete); + } else { + assert(m->num_chunks == m->num_chunks_complete); + MESSAGE_CHECK_NUM_EQ(expected, m, num_chunks_complete); + for (i = 0; i < m->num_chunks && i < MAX_CHUNKS; i++) { + MESSAGE_CHECK_NUM_EQ(expected, m, chunk_lengths[i]); + } + } + + MESSAGE_CHECK_NUM_EQ(expected, m, num_headers); + + int r; + for (i = 0; i < m->num_headers; i++) { + r = check_str_eq(expected, "header field", expected->headers[i][0], m->headers[i][0]); + if (!r) return 0; + r = check_str_eq(expected, "header value", expected->headers[i][1], m->headers[i][1]); + if (!r) return 0; + } + + if (!connect) { + MESSAGE_CHECK_STR_EQ(expected, m, upgrade); + } + + return 1; +} + +/* Given a sequence of varargs messages, return the number of them that the + * parser should successfully parse, taking into account that upgraded + * messages prevent all subsequent messages from being parsed. + */ +size_t +count_parsed_messages(const size_t nmsgs, ...) { + size_t i; + va_list ap; + + va_start(ap, nmsgs); + + for (i = 0; i < nmsgs; i++) { + struct message *m = va_arg(ap, struct message *); + + if (m->upgrade) { + va_end(ap); + return i + 1; + } + } + + va_end(ap); + return nmsgs; +} + +/* Given a sequence of bytes and the number of these that we were able to + * parse, verify that upgrade bodies are correct. + */ +void +upgrade_message_fix(char *body, const size_t nread, const size_t nmsgs, ...) { + va_list ap; + size_t i; + size_t off = 0; + + va_start(ap, nmsgs); + + for (i = 0; i < nmsgs; i++) { + struct message *m = va_arg(ap, struct message *); + + off += strlen(m->raw); + + if (m->upgrade) { + off -= strlen(m->upgrade); + + /* Check the portion of the response after its specified upgrade */ + if (!check_str_eq(m, "upgrade", body + off, body + nread)) { + abort(); + } + + /* Fix up the response so that message_eq() will verify the beginning + * of the upgrade */ + *(body + nread + strlen(m->upgrade)) = '\0'; + messages[num_messages -1 ].upgrade = body + nread; + + va_end(ap); + return; + } + } + + va_end(ap); + printf("\n\n*** Error: expected a message with upgrade ***\n"); + + abort(); +} + +static void +print_error (const char *raw, size_t error_location) +{ + fprintf(stderr, "\n*** %s ***\n\n", + http_errno_description(HTTP_PARSER_ERRNO(&parser))); + + int this_line = 0, char_len = 0; + size_t i, j, len = strlen(raw), error_location_line = 0; + for (i = 0; i < len; i++) { + if (i == error_location) this_line = 1; + switch (raw[i]) { + case '\r': + char_len = 2; + fprintf(stderr, "\\r"); + break; + + case '\n': + fprintf(stderr, "\\n\n"); + + if (this_line) goto print; + + error_location_line = 0; + continue; + + default: + char_len = 1; + fputc(raw[i], stderr); + break; + } + if (!this_line) error_location_line += char_len; + } + + fprintf(stderr, "[eof]\n"); + + print: + for (j = 0; j < error_location_line; j++) { + fputc(' ', stderr); + } + fprintf(stderr, "^\n\nerror location: %u\n", (unsigned int)error_location); +} + +void +test_preserve_data (void) +{ + char my_data[] = "application-specific data"; + http_parser parser; + parser.data = my_data; + http_parser_init(&parser, HTTP_REQUEST); + if (parser.data != my_data) { + printf("\n*** parser.data not preserved accross http_parser_init ***\n\n"); + abort(); + } +} + +struct url_test { + const char *name; + const char *url; + int is_connect; + struct http_parser_url u; + int rv; +}; + +const struct url_test url_tests[] = +{ {.name="proxy request" + ,.url="http://hostname/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 7, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 15, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="proxy request with port" + ,.url="http://hostname:444/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PORT) | (1 << UF_PATH) + ,.port=444 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 7, 8 } /* UF_HOST */ + ,{ 16, 3 } /* UF_PORT */ + ,{ 19, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="CONNECT request" + ,.url="hostname:443" + ,.is_connect=1 + ,.u= + {.field_set=(1 << UF_HOST) | (1 << UF_PORT) + ,.port=443 + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 0, 8 } /* UF_HOST */ + ,{ 9, 3 } /* UF_PORT */ + ,{ 0, 0 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="CONNECT request but not connect" + ,.url="hostname:443" + ,.is_connect=0 + ,.rv=1 + } + +, {.name="proxy ipv6 request" + ,.url="http://[1:2::3:4]/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 17, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="proxy ipv6 request with port" + ,.url="http://[1:2::3:4]:67/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PORT) | (1 << UF_PATH) + ,.port=67 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 8 } /* UF_HOST */ + ,{ 18, 2 } /* UF_PORT */ + ,{ 20, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="CONNECT ipv6 address" + ,.url="[1:2::3:4]:443" + ,.is_connect=1 + ,.u= + {.field_set=(1 << UF_HOST) | (1 << UF_PORT) + ,.port=443 + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 1, 8 } /* UF_HOST */ + ,{ 11, 3 } /* UF_PORT */ + ,{ 0, 0 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="ipv4 in ipv6 address" + ,.url="http://[2001:0000:0000:0000:0000:0000:1.9.1.1]/" + ,.is_connect=0 + ,.u= + {.field_set=(1 << UF_SCHEMA) | (1 << UF_HOST) | (1 << UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 37 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 46, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="extra ? in query string" + ,.url="http://a.tbcdn.cn/p/fp/2010c/??fp-header-min.css,fp-base-min.css," + "fp-channel-min.css,fp-product-min.css,fp-mall-min.css,fp-category-min.css," + "fp-sub-min.css,fp-gdp4p-min.css,fp-css3-min.css,fp-misc-min.css?t=20101022.css" + ,.is_connect=0 + ,.u= + {.field_set=(1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_QUERY) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 7, 10 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 17, 12 } /* UF_PATH */ + ,{ 30,187 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="space URL encoded" + ,.url="/toto.html?toto=a%20b" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_PATH) | (1<<UF_QUERY) + ,.port=0 + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 0, 0 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 0, 10 } /* UF_PATH */ + ,{ 11, 10 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + + +, {.name="URL fragment" + ,.url="/toto.html#titi" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_PATH) | (1<<UF_FRAGMENT) + ,.port=0 + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 0, 0 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 0, 10 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 11, 4 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="complex URL fragment" + ,.url="http://www.webmasterworld.com/r.cgi?f=21&d=8405&url=" + "http://www.example.com/index.html?foo=bar&hello=world#midpage" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_QUERY) |\ + (1<<UF_FRAGMENT) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 7, 22 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 29, 6 } /* UF_PATH */ + ,{ 36, 69 } /* UF_QUERY */ + ,{106, 7 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="complex URL from node js url parser doc" + ,.url="http://host.com:8080/p/a/t/h?query=string#hash" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PORT) | (1<<UF_PATH) |\ + (1<<UF_QUERY) | (1<<UF_FRAGMENT) + ,.port=8080 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 7, 8 } /* UF_HOST */ + ,{ 16, 4 } /* UF_PORT */ + ,{ 20, 8 } /* UF_PATH */ + ,{ 29, 12 } /* UF_QUERY */ + ,{ 42, 4 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="complex URL with basic auth from node js url parser doc" + ,.url="http://a:b@host.com:8080/p/a/t/h?query=string#hash" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PORT) | (1<<UF_PATH) |\ + (1<<UF_QUERY) | (1<<UF_FRAGMENT) | (1<<UF_USERINFO) + ,.port=8080 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 11, 8 } /* UF_HOST */ + ,{ 20, 4 } /* UF_PORT */ + ,{ 24, 8 } /* UF_PATH */ + ,{ 33, 12 } /* UF_QUERY */ + ,{ 46, 4 } /* UF_FRAGMENT */ + ,{ 7, 3 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="double @" + ,.url="http://a:b@@hostname:443/" + ,.is_connect=0 + ,.rv=1 + } + +, {.name="proxy empty host" + ,.url="http://:443/" + ,.is_connect=0 + ,.rv=1 + } + +, {.name="proxy empty port" + ,.url="http://hostname:/" + ,.is_connect=0 + ,.rv=1 + } + +, {.name="CONNECT with basic auth" + ,.url="a:b@hostname:443" + ,.is_connect=1 + ,.rv=1 + } + +, {.name="CONNECT empty host" + ,.url=":443" + ,.is_connect=1 + ,.rv=1 + } + +, {.name="CONNECT empty port" + ,.url="hostname:" + ,.is_connect=1 + ,.rv=1 + } + +, {.name="CONNECT with extra bits" + ,.url="hostname:443/" + ,.is_connect=1 + ,.rv=1 + } + +, {.name="space in URL" + ,.url="/foo bar/" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy basic auth with space url encoded" + ,.url="http://a%20:b@host.com/" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_USERINFO) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 14, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 22, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 7, 6 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="carriage return in URL" + ,.url="/foo\rbar/" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy double : in URL" + ,.url="http://hostname::443/" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy basic auth with double :" + ,.url="http://a::b@host.com/" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_USERINFO) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 12, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 20, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 7, 4 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="line feed in URL" + ,.url="/foo\nbar/" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy empty basic auth" + ,.url="http://@hostname/fo" + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 16, 3 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } +, {.name="proxy line feed in hostname" + ,.url="http://host\name/fo" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy % in hostname" + ,.url="http://host%name/fo" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy ; in hostname" + ,.url="http://host;ame/fo" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy basic auth with unreservedchars" + ,.url="http://a!;-_!=+$@host.com/" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) | (1<<UF_USERINFO) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 17, 8 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 25, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 7, 9 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="proxy only empty basic auth" + ,.url="http://@/fo" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy only basic auth" + ,.url="http://toto@/fo" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy emtpy hostname" + ,.url="http:///fo" + ,.rv=1 /* s_dead */ + } + +, {.name="proxy = in URL" + ,.url="http://host=ame/fo" + ,.rv=1 /* s_dead */ + } + +, {.name="ipv6 address with Zone ID" + ,.url="http://[fe80::a%25eth0]/" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 14 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 23, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="ipv6 address with Zone ID, but '%' is not percent-encoded" + ,.url="http://[fe80::a%eth0]/" + ,.is_connect=0 + ,.u= + {.field_set= (1<<UF_SCHEMA) | (1<<UF_HOST) | (1<<UF_PATH) + ,.port=0 + ,.field_data= + {{ 0, 4 } /* UF_SCHEMA */ + ,{ 8, 12 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 21, 1 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="ipv6 address ending with '%'" + ,.url="http://[fe80::a%]/" + ,.rv=1 /* s_dead */ + } + +, {.name="ipv6 address with Zone ID including bad character" + ,.url="http://[fe80::a%$HOME]/" + ,.rv=1 /* s_dead */ + } + +, {.name="just ipv6 Zone ID" + ,.url="http://[%eth0]/" + ,.rv=1 /* s_dead */ + } + +, {.name="empty url" + ,.url="" + ,.is_connect=0 + ,.rv=1 + } + +, {.name="NULL url" + ,.url=NULL + ,.is_connect=0 + ,.rv=1 + } + +, {.name="full of spaces url" + ,.url=" " + ,.is_connect=0 + ,.rv=1 + } + +#if HTTP_PARSER_STRICT + +, {.name="tab in URL" + ,.url="/foo\tbar/" + ,.rv=1 /* s_dead */ + } + +, {.name="form feed in URL" + ,.url="/foo\fbar/" + ,.rv=1 /* s_dead */ + } + +#else /* !HTTP_PARSER_STRICT */ + +, {.name="tab in URL" + ,.url="/foo\tbar/" + ,.u= + {.field_set=(1 << UF_PATH) + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 0, 0 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 0, 9 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } + +, {.name="form feed in URL" + ,.url="/foo\fbar/" + ,.u= + {.field_set=(1 << UF_PATH) + ,.field_data= + {{ 0, 0 } /* UF_SCHEMA */ + ,{ 0, 0 } /* UF_HOST */ + ,{ 0, 0 } /* UF_PORT */ + ,{ 0, 9 } /* UF_PATH */ + ,{ 0, 0 } /* UF_QUERY */ + ,{ 0, 0 } /* UF_FRAGMENT */ + ,{ 0, 0 } /* UF_USERINFO */ + } + } + ,.rv=0 + } +#endif +}; + +void +dump_url (const char *url, const struct http_parser_url *u) +{ + unsigned int i; + + printf("\tfield_set: 0x%x, port: %u\n", u->field_set, u->port); + for (i = 0; i < UF_MAX; i++) { + if ((u->field_set & (1 << i)) == 0) { + printf("\tfield_data[%u]: unset\n", i); + continue; + } + + printf("\tfield_data[%u]: off: %u len: %u part: \"%.*s\n\"", + i, + u->field_data[i].off, + u->field_data[i].len, + u->field_data[i].len, + url + u->field_data[i].off); + } +} + +void +test_parse_url (void) +{ + struct http_parser_url u; + const struct url_test *test; + unsigned int i; + int rv; + + for (i = 0; i < (sizeof(url_tests) / sizeof(url_tests[0])); i++) { + test = &url_tests[i]; + memset(&u, 0, sizeof(u)); + + rv = http_parser_parse_url(test->url, + test->url ? strlen(test->url) : 0, + test->is_connect, + &u); + + if (test->rv == 0) { + if (rv != 0) { + printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, " + "unexpected rv %d ***\n\n", test->url, test->name, rv); + abort(); + } + + if (memcmp(&u, &test->u, sizeof(u)) != 0) { + printf("\n*** http_parser_parse_url(\"%s\") \"%s\" failed ***\n", + test->url, test->name); + + printf("target http_parser_url:\n"); + dump_url(test->url, &test->u); + printf("result http_parser_url:\n"); + dump_url(test->url, &u); + + abort(); + } + } else { + /* test->rv != 0 */ + if (rv == 0) { + printf("\n*** http_parser_parse_url(\"%s\") \"%s\" test failed, " + "unexpected rv %d ***\n\n", test->url, test->name, rv); + abort(); + } + } + } +} + +void +test_method_str (void) +{ + assert(0 == strcmp("GET", http_method_str(HTTP_GET))); + assert(0 == strcmp("<unknown>", http_method_str(1337))); +} + +void +test_status_str (void) +{ + assert(0 == strcmp("OK", http_status_str(HTTP_STATUS_OK))); + assert(0 == strcmp("Not Found", http_status_str(HTTP_STATUS_NOT_FOUND))); + assert(0 == strcmp("<unknown>", http_status_str(1337))); +} + +void +test_message (const struct message *message) +{ + size_t raw_len = strlen(message->raw); + size_t msg1len; + for (msg1len = 0; msg1len < raw_len; msg1len++) { + parser_init(message->type); + + size_t read; + const char *msg1 = message->raw; + const char *msg2 = msg1 + msg1len; + size_t msg2len = raw_len - msg1len; + + if (msg1len) { + assert(num_messages == 0); + messages[0].headers_complete_cb_called = FALSE; + + read = parse(msg1, msg1len); + + if (!messages[0].headers_complete_cb_called && parser.nread != read) { + assert(parser.nread == read); + print_error(msg1, read); + abort(); + } + + if (message->upgrade && parser.upgrade && num_messages > 0) { + messages[num_messages - 1].upgrade = msg1 + read; + goto test; + } + + if (read != msg1len) { + print_error(msg1, read); + abort(); + } + } + + + read = parse(msg2, msg2len); + + if (message->upgrade && parser.upgrade) { + messages[num_messages - 1].upgrade = msg2 + read; + goto test; + } + + if (read != msg2len) { + print_error(msg2, read); + abort(); + } + + read = parse(NULL, 0); + + if (read != 0) { + print_error(message->raw, read); + abort(); + } + + test: + + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); + abort(); + } + + if(!message_eq(0, 0, message)) abort(); + } +} + +void +test_message_count_body (const struct message *message) +{ + parser_init(message->type); + + size_t read; + size_t l = strlen(message->raw); + size_t i, toread; + size_t chunk = 4024; + + for (i = 0; i < l; i+= chunk) { + toread = MIN(l-i, chunk); + read = parse_count_body(message->raw + i, toread); + if (read != toread) { + print_error(message->raw, read); + abort(); + } + } + + + read = parse_count_body(NULL, 0); + if (read != 0) { + print_error(message->raw, read); + abort(); + } + + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", message->name); + abort(); + } + + if(!message_eq(0, 0, message)) abort(); +} + +void +test_simple_type (const char *buf, + enum http_errno err_expected, + enum http_parser_type type) +{ + parser_init(type); + + enum http_errno err; + + parse(buf, strlen(buf)); + err = HTTP_PARSER_ERRNO(&parser); + parse(NULL, 0); + + /* In strict mode, allow us to pass with an unexpected HPE_STRICT as + * long as the caller isn't expecting success. + */ +#if HTTP_PARSER_STRICT + if (err_expected != err && err_expected != HPE_OK && err != HPE_STRICT) { +#else + if (err_expected != err) { +#endif + fprintf(stderr, "\n*** test_simple expected %s, but saw %s ***\n\n%s\n", + http_errno_name(err_expected), http_errno_name(err), buf); + abort(); + } +} + +void +test_simple (const char *buf, enum http_errno err_expected) +{ + test_simple_type(buf, err_expected, HTTP_REQUEST); +} + +void +test_invalid_header_content (int req, const char* str) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? + "GET / HTTP/1.1\r\n" : + "HTTP/1.1 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = str; + size_t buflen = strlen(buf); + + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { + assert(HTTP_PARSER_ERRNO(&parser) == HPE_INVALID_HEADER_TOKEN); + return; + } + + fprintf(stderr, + "\n*** Error expected but none in invalid header content test ***\n"); + abort(); +} + +void +test_invalid_header_field_content_error (int req) +{ + test_invalid_header_content(req, "Foo: F\01ailure"); + test_invalid_header_content(req, "Foo: B\02ar"); +} + +void +test_invalid_header_field (int req, const char* str) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? + "GET / HTTP/1.1\r\n" : + "HTTP/1.1 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = str; + size_t buflen = strlen(buf); + + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { + assert(HTTP_PARSER_ERRNO(&parser) == HPE_INVALID_HEADER_TOKEN); + return; + } + + fprintf(stderr, + "\n*** Error expected but none in invalid header token test ***\n"); + abort(); +} + +void +test_invalid_header_field_token_error (int req) +{ + test_invalid_header_field(req, "Fo@: Failure"); + test_invalid_header_field(req, "Foo\01\test: Bar"); +} + +void +test_double_content_length_error (int req) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? + "GET / HTTP/1.1\r\n" : + "HTTP/1.1 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = "Content-Length: 0\r\nContent-Length: 1\r\n\r\n"; + size_t buflen = strlen(buf); + + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { + assert(HTTP_PARSER_ERRNO(&parser) == HPE_UNEXPECTED_CONTENT_LENGTH); + return; + } + + fprintf(stderr, + "\n*** Error expected but none in double content-length test ***\n"); + abort(); +} + +void +test_chunked_content_length_error (int req) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? + "GET / HTTP/1.1\r\n" : + "HTTP/1.1 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = "Transfer-Encoding: anything\r\nContent-Length: 1\r\n\r\n"; + size_t buflen = strlen(buf); + + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { + assert(HTTP_PARSER_ERRNO(&parser) == HPE_UNEXPECTED_CONTENT_LENGTH); + return; + } + + fprintf(stderr, + "\n*** Error expected but none in chunked content-length test ***\n"); + abort(); +} + +void +test_header_cr_no_lf_error (int req) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? + "GET / HTTP/1.1\r\n" : + "HTTP/1.1 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = "Foo: 1\rBar: 1\r\n\r\n"; + size_t buflen = strlen(buf); + + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { + assert(HTTP_PARSER_ERRNO(&parser) == HPE_LF_EXPECTED); + return; + } + + fprintf(stderr, + "\n*** Error expected but none in header whitespace test ***\n"); + abort(); +} + +void +test_no_overflow_parse_url (void) +{ + int rv; + struct http_parser_url u; + + http_parser_url_init(&u); + rv = http_parser_parse_url("http://example.com:8001", 22, 0, &u); + + if (rv != 0) { + fprintf(stderr, + "\n*** test_no_overflow_parse_url invalid return value=%d\n", + rv); + abort(); + } + + if (u.port != 800) { + fprintf(stderr, + "\n*** test_no_overflow_parse_url invalid port number=%d\n", + u.port); + abort(); + } +} + +void +test_header_overflow_error (int req) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + const char *buf; + buf = req ? "GET / HTTP/1.1\r\n" : "HTTP/1.0 200 OK\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + buf = "header-key: header-value\r\n"; + size_t buflen = strlen(buf); + + int i; + for (i = 0; i < 10000; i++) { + parsed = http_parser_execute(&parser, &settings_null, buf, buflen); + if (parsed != buflen) { + //fprintf(stderr, "error found on iter %d\n", i); + assert(HTTP_PARSER_ERRNO(&parser) == HPE_HEADER_OVERFLOW); + return; + } + } + + fprintf(stderr, "\n*** Error expected but none in header overflow test ***\n"); + abort(); +} + + +void +test_header_nread_value () +{ + http_parser parser; + http_parser_init(&parser, HTTP_REQUEST); + size_t parsed; + const char *buf; + buf = "GET / HTTP/1.1\r\nheader: value\nhdr: value\r\n"; + parsed = http_parser_execute(&parser, &settings_null, buf, strlen(buf)); + assert(parsed == strlen(buf)); + + assert(parser.nread == strlen(buf)); +} + + +static void +test_content_length_overflow (const char *buf, size_t buflen, int expect_ok) +{ + http_parser parser; + http_parser_init(&parser, HTTP_RESPONSE); + http_parser_execute(&parser, &settings_null, buf, buflen); + + if (expect_ok) + assert(HTTP_PARSER_ERRNO(&parser) == HPE_OK); + else + assert(HTTP_PARSER_ERRNO(&parser) == HPE_INVALID_CONTENT_LENGTH); +} + +void +test_header_content_length_overflow_error (void) +{ +#define X(size) \ + "HTTP/1.1 200 OK\r\n" \ + "Content-Length: " #size "\r\n" \ + "\r\n" + const char a[] = X(1844674407370955160); /* 2^64 / 10 - 1 */ + const char b[] = X(18446744073709551615); /* 2^64-1 */ + const char c[] = X(18446744073709551616); /* 2^64 */ +#undef X + test_content_length_overflow(a, sizeof(a) - 1, 1); /* expect ok */ + test_content_length_overflow(b, sizeof(b) - 1, 0); /* expect failure */ + test_content_length_overflow(c, sizeof(c) - 1, 0); /* expect failure */ +} + +void +test_chunk_content_length_overflow_error (void) +{ +#define X(size) \ + "HTTP/1.1 200 OK\r\n" \ + "Transfer-Encoding: chunked\r\n" \ + "\r\n" \ + #size "\r\n" \ + "..." + const char a[] = X(FFFFFFFFFFFFFFE); /* 2^64 / 16 - 1 */ + const char b[] = X(FFFFFFFFFFFFFFFF); /* 2^64-1 */ + const char c[] = X(10000000000000000); /* 2^64 */ +#undef X + test_content_length_overflow(a, sizeof(a) - 1, 1); /* expect ok */ + test_content_length_overflow(b, sizeof(b) - 1, 0); /* expect failure */ + test_content_length_overflow(c, sizeof(c) - 1, 0); /* expect failure */ +} + +void +test_no_overflow_long_body (int req, size_t length) +{ + http_parser parser; + http_parser_init(&parser, req ? HTTP_REQUEST : HTTP_RESPONSE); + size_t parsed; + size_t i; + char buf1[3000]; + size_t buf1len = sprintf(buf1, "%s\r\nConnection: Keep-Alive\r\nContent-Length: %lu\r\n\r\n", + req ? "POST / HTTP/1.0" : "HTTP/1.0 200 OK", (unsigned long)length); + parsed = http_parser_execute(&parser, &settings_null, buf1, buf1len); + if (parsed != buf1len) + goto err; + + for (i = 0; i < length; i++) { + char foo = 'a'; + parsed = http_parser_execute(&parser, &settings_null, &foo, 1); + if (parsed != 1) + goto err; + } + + parsed = http_parser_execute(&parser, &settings_null, buf1, buf1len); + if (parsed != buf1len) goto err; + return; + + err: + fprintf(stderr, + "\n*** error in test_no_overflow_long_body %s of length %lu ***\n", + req ? "REQUEST" : "RESPONSE", + (unsigned long)length); + abort(); +} + +void +test_multiple3 (const struct message *r1, const struct message *r2, const struct message *r3) +{ + int message_count = count_parsed_messages(3, r1, r2, r3); + + char total[ strlen(r1->raw) + + strlen(r2->raw) + + strlen(r3->raw) + + 1 + ]; + total[0] = '\0'; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + parser_init(r1->type); + + size_t read; + + read = parse(total, strlen(total)); + + if (parser.upgrade) { + upgrade_message_fix(total, read, 3, r1, r2, r3); + goto test; + } + + if (read != strlen(total)) { + print_error(total, read); + abort(); + } + + read = parse(NULL, 0); + + if (read != 0) { + print_error(total, read); + abort(); + } + +test: + + if (message_count != num_messages) { + fprintf(stderr, "\n\n*** Parser didn't see 3 messages only %d *** \n", num_messages); + abort(); + } + + if (!message_eq(0, 0, r1)) abort(); + if (message_count > 1 && !message_eq(1, 0, r2)) abort(); + if (message_count > 2 && !message_eq(2, 0, r3)) abort(); +} + +/* SCAN through every possible breaking to make sure the + * parser can handle getting the content in any chunks that + * might come from the socket + */ +void +test_scan (const struct message *r1, const struct message *r2, const struct message *r3) +{ + char total[80*1024] = "\0"; + char buf1[80*1024] = "\0"; + char buf2[80*1024] = "\0"; + char buf3[80*1024] = "\0"; + + strcat(total, r1->raw); + strcat(total, r2->raw); + strcat(total, r3->raw); + + size_t read; + + int total_len = strlen(total); + + int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2; + int ops = 0 ; + + size_t buf1_len, buf2_len, buf3_len; + int message_count = count_parsed_messages(3, r1, r2, r3); + + int i,j,type_both; + for (type_both = 0; type_both < 2; type_both ++ ) { + for (j = 2; j < total_len; j ++ ) { + for (i = 1; i < j; i ++ ) { + + if (ops % 1000 == 0) { + printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops); + fflush(stdout); + } + ops += 1; + + parser_init(type_both ? HTTP_BOTH : r1->type); + + buf1_len = i; + strlncpy(buf1, sizeof(buf1), total, buf1_len); + buf1[buf1_len] = 0; + + buf2_len = j - i; + strlncpy(buf2, sizeof(buf1), total+i, buf2_len); + buf2[buf2_len] = 0; + + buf3_len = total_len - j; + strlncpy(buf3, sizeof(buf1), total+j, buf3_len); + buf3[buf3_len] = 0; + + assert(num_messages == 0); + messages[0].headers_complete_cb_called = FALSE; + + read = parse(buf1, buf1_len); + + if (!messages[0].headers_complete_cb_called && parser.nread != read) { + print_error(buf1, read); + goto error; + } + + if (parser.upgrade) goto test; + + if (read != buf1_len) { + print_error(buf1, read); + goto error; + } + + read += parse(buf2, buf2_len); + + if (parser.upgrade) goto test; + + if (read != buf1_len + buf2_len) { + print_error(buf2, read); + goto error; + } + + read += parse(buf3, buf3_len); + + if (parser.upgrade) goto test; + + if (read != buf1_len + buf2_len + buf3_len) { + print_error(buf3, read); + goto error; + } + + parse(NULL, 0); + +test: + if (parser.upgrade) { + upgrade_message_fix(total, read, 3, r1, r2, r3); + } + + if (message_count != num_messages) { + fprintf(stderr, "\n\nParser didn't see %d messages only %d\n", + message_count, num_messages); + goto error; + } + + if (!message_eq(0, 0, r1)) { + fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n"); + goto error; + } + + if (message_count > 1 && !message_eq(1, 0, r2)) { + fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n"); + goto error; + } + + if (message_count > 2 && !message_eq(2, 0, r3)) { + fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n"); + goto error; + } + } + } + } + puts("\b\b\b\b100%"); + return; + + error: + fprintf(stderr, "i=%d j=%d\n", i, j); + fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1); + fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2); + fprintf(stderr, "buf3 (%u) %s\n", (unsigned int)buf3_len, buf3); + abort(); +} + +// user required to free the result +// string terminated by \0 +char * +create_large_chunked_message (int body_size_in_kb, const char* headers) +{ + int i; + size_t wrote = 0; + size_t headers_len = strlen(headers); + size_t bufsize = headers_len + (5+1024+2)*body_size_in_kb + 6; + char * buf = malloc(bufsize); + + memcpy(buf, headers, headers_len); + wrote += headers_len; + + for (i = 0; i < body_size_in_kb; i++) { + // write 1kb chunk into the body. + memcpy(buf + wrote, "400\r\n", 5); + wrote += 5; + memset(buf + wrote, 'C', 1024); + wrote += 1024; + strcpy(buf + wrote, "\r\n"); + wrote += 2; + } + + memcpy(buf + wrote, "0\r\n\r\n", 6); + wrote += 6; + assert(wrote == bufsize); + + return buf; +} + +/* Verify that we can pause parsing at any of the bytes in the + * message and still get the result that we're expecting. */ +void +test_message_pause (const struct message *msg) +{ + char *buf = (char*) msg->raw; + size_t buflen = strlen(msg->raw); + size_t nread; + + parser_init(msg->type); + + do { + nread = parse_pause(buf, buflen); + + // We can only set the upgrade buffer once we've gotten our message + // completion callback. + if (messages[0].message_complete_cb_called && + msg->upgrade && + parser.upgrade) { + messages[0].upgrade = buf + nread; + goto test; + } + + if (nread < buflen) { + + // Not much do to if we failed a strict-mode check + if (HTTP_PARSER_ERRNO(&parser) == HPE_STRICT) { + return; + } + + assert (HTTP_PARSER_ERRNO(&parser) == HPE_PAUSED); + } + + buf += nread; + buflen -= nread; + http_parser_pause(&parser, 0); + } while (buflen > 0); + + nread = parse_pause(NULL, 0); + assert (nread == 0); + +test: + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", msg->name); + abort(); + } + + if(!message_eq(0, 0, msg)) abort(); +} + +/* Verify that body and next message won't be parsed in responses to CONNECT */ +void +test_message_connect (const struct message *msg) +{ + char *buf = (char*) msg->raw; + size_t buflen = strlen(msg->raw); + + parser_init(msg->type); + + parse_connect(buf, buflen); + + if (num_messages != 1) { + printf("\n*** num_messages != 1 after testing '%s' ***\n\n", msg->name); + abort(); + } + + if(!message_eq(0, 1, msg)) abort(); +} + +int +main (void) +{ + unsigned i, j, k; + unsigned long version; + unsigned major; + unsigned minor; + unsigned patch; + + version = http_parser_version(); + major = (version >> 16) & 255; + minor = (version >> 8) & 255; + patch = version & 255; + printf("http_parser v%u.%u.%u (0x%06lx)\n", major, minor, patch, version); + + printf("sizeof(http_parser) = %u\n", (unsigned int)sizeof(http_parser)); + assert(sizeof(http_parser) == 4 + 4 + 8 + 2 + 2 + 4 + sizeof(void *)); + + //// API + test_preserve_data(); + test_parse_url(); + test_method_str(); + test_status_str(); + + //// NREAD + test_header_nread_value(); + + //// OVERFLOW CONDITIONS + test_no_overflow_parse_url(); + + test_header_overflow_error(HTTP_REQUEST); + test_no_overflow_long_body(HTTP_REQUEST, 1000); + test_no_overflow_long_body(HTTP_REQUEST, 100000); + + test_header_overflow_error(HTTP_RESPONSE); + test_no_overflow_long_body(HTTP_RESPONSE, 1000); + test_no_overflow_long_body(HTTP_RESPONSE, 100000); + + test_header_content_length_overflow_error(); + test_chunk_content_length_overflow_error(); + + //// HEADER FIELD CONDITIONS + test_double_content_length_error(HTTP_REQUEST); + test_chunked_content_length_error(HTTP_REQUEST); + test_header_cr_no_lf_error(HTTP_REQUEST); + test_invalid_header_field_token_error(HTTP_REQUEST); + test_invalid_header_field_content_error(HTTP_REQUEST); + test_double_content_length_error(HTTP_RESPONSE); + test_chunked_content_length_error(HTTP_RESPONSE); + test_header_cr_no_lf_error(HTTP_RESPONSE); + test_invalid_header_field_token_error(HTTP_RESPONSE); + test_invalid_header_field_content_error(HTTP_RESPONSE); + + test_simple_type( + "POST / HTTP/1.1\r\n" + "Content-Length:\r\n" // empty + "\r\n", + HPE_INVALID_CONTENT_LENGTH, + HTTP_REQUEST); + + test_simple_type( + "POST / HTTP/1.1\r\n" + "Content-Length: 42 \r\n" // Note the surrounding whitespace. + "\r\n", + HPE_OK, + HTTP_REQUEST); + + test_simple_type( + "POST / HTTP/1.1\r\n" + "Content-Length: 4 2\r\n" + "\r\n", + HPE_INVALID_CONTENT_LENGTH, + HTTP_REQUEST); + + test_simple_type( + "POST / HTTP/1.1\r\n" + "Content-Length: 13 37\r\n" + "\r\n", + HPE_INVALID_CONTENT_LENGTH, + HTTP_REQUEST); + + test_simple_type( + "POST / HTTP/1.1\r\n" + "Content-Length: 42\r\n" + " Hello world!\r\n", + HPE_INVALID_CONTENT_LENGTH, + HTTP_REQUEST); + + test_simple_type( + "POST / HTTP/1.1\r\n" + "Content-Length: 42\r\n" + " \r\n", + HPE_OK, + HTTP_REQUEST); + + //// RESPONSES + + test_simple_type("HTP/1.1 200 OK\r\n\r\n", HPE_INVALID_VERSION, HTTP_RESPONSE); + test_simple_type("HTTP/01.1 200 OK\r\n\r\n", HPE_INVALID_VERSION, HTTP_RESPONSE); + test_simple_type("HTTP/11.1 200 OK\r\n\r\n", HPE_INVALID_VERSION, HTTP_RESPONSE); + test_simple_type("HTTP/1.01 200 OK\r\n\r\n", HPE_INVALID_VERSION, HTTP_RESPONSE); + test_simple_type("HTTP/1.1\t200 OK\r\n\r\n", HPE_INVALID_VERSION, HTTP_RESPONSE); + test_simple_type("\rHTTP/1.1\t200 OK\r\n\r\n", HPE_INVALID_VERSION, HTTP_RESPONSE); + + for (i = 0; i < ARRAY_SIZE(responses); i++) { + test_message(&responses[i]); + } + + for (i = 0; i < ARRAY_SIZE(responses); i++) { + test_message_pause(&responses[i]); + } + + for (i = 0; i < ARRAY_SIZE(responses); i++) { + test_message_connect(&responses[i]); + } + + for (i = 0; i < ARRAY_SIZE(responses); i++) { + if (!responses[i].should_keep_alive) continue; + for (j = 0; j < ARRAY_SIZE(responses); j++) { + if (!responses[j].should_keep_alive) continue; + for (k = 0; k < ARRAY_SIZE(responses); k++) { + test_multiple3(&responses[i], &responses[j], &responses[k]); + } + } + } + + test_message_count_body(&responses[NO_HEADERS_NO_BODY_404]); + test_message_count_body(&responses[TRAILING_SPACE_ON_CHUNKED_BODY]); + + // test very large chunked response + { + char * msg = create_large_chunked_message(31337, + "HTTP/1.0 200 OK\r\n" + "Transfer-Encoding: chunked\r\n" + "Content-Type: text/plain\r\n" + "\r\n"); + struct message large_chunked = + {.name= "large chunked" + ,.type= HTTP_RESPONSE + ,.raw= msg + ,.should_keep_alive= FALSE + ,.message_complete_on_eof= FALSE + ,.http_major= 1 + ,.http_minor= 0 + ,.status_code= 200 + ,.response_status= "OK" + ,.num_headers= 2 + ,.headers= + { { "Transfer-Encoding", "chunked" } + , { "Content-Type", "text/plain" } + } + ,.body_size= 31337*1024 + ,.num_chunks_complete= 31338 + }; + for (i = 0; i < MAX_CHUNKS; i++) { + large_chunked.chunk_lengths[i] = 1024; + } + test_message_count_body(&large_chunked); + free(msg); + } + + + + printf("response scan 1/2 "); + test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY] + , &responses[NO_BODY_HTTP10_KA_204] + , &responses[NO_REASON_PHRASE] + ); + + printf("response scan 2/2 "); + test_scan( &responses[BONJOUR_MADAME_FR] + , &responses[UNDERSTORE_HEADER_KEY] + , &responses[NO_CARRIAGE_RET] + ); + + puts("responses okay"); + + + /// REQUESTS + + test_simple("GET / IHTTP/1.0\r\n\r\n", HPE_INVALID_CONSTANT); + test_simple("GET / ICE/1.0\r\n\r\n", HPE_INVALID_CONSTANT); + test_simple("GET / HTP/1.1\r\n\r\n", HPE_INVALID_VERSION); + test_simple("GET / HTTP/01.1\r\n\r\n", HPE_INVALID_VERSION); + test_simple("GET / HTTP/11.1\r\n\r\n", HPE_INVALID_VERSION); + test_simple("GET / HTTP/1.01\r\n\r\n", HPE_INVALID_VERSION); + + test_simple("GET / HTTP/1.0\r\nHello: w\1rld\r\n\r\n", HPE_INVALID_HEADER_TOKEN); + test_simple("GET / HTTP/1.0\r\nHello: woooo\2rld\r\n\r\n", HPE_INVALID_HEADER_TOKEN); + + // Extended characters - see nodejs/test/parallel/test-http-headers-obstext.js + test_simple("GET / HTTP/1.1\r\n" + "Test: Düsseldorf\r\n", + HPE_OK); + + // Well-formed but incomplete + test_simple("GET / HTTP/1.1\r\n" + "Content-Type: text/plain\r\n" + "Content-Length: 6\r\n" + "\r\n" + "fooba", + HPE_OK); + + // Unknown Transfer-Encoding in request + test_simple("GET / HTTP/1.1\r\n" + "Transfer-Encoding: unknown\r\n" + "\r\n", + HPE_INVALID_TRANSFER_ENCODING); + + static const char *all_methods[] = { + "DELETE", + "GET", + "HEAD", + "POST", + "PUT", + //"CONNECT", //CONNECT can't be tested like other methods, it's a tunnel + "OPTIONS", + "TRACE", + "COPY", + "LOCK", + "MKCOL", + "MOVE", + "PROPFIND", + "PROPPATCH", + "SEARCH", + "UNLOCK", + "BIND", + "REBIND", + "UNBIND", + "ACL", + "REPORT", + "MKACTIVITY", + "CHECKOUT", + "MERGE", + "M-SEARCH", + "NOTIFY", + "SUBSCRIBE", + "UNSUBSCRIBE", + "PATCH", + "PURGE", + "MKCALENDAR", + "LINK", + "UNLINK", + 0 }; + const char **this_method; + for (this_method = all_methods; *this_method; this_method++) { + char buf[200]; + sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); + test_simple(buf, HPE_OK); + } + + static const char *bad_methods[] = { + "ASDF", + "C******", + "COLA", + "GEM", + "GETA", + "M****", + "MKCOLA", + "PROPPATCHA", + "PUN", + "PX", + "SA", + "hello world", + 0 }; + for (this_method = bad_methods; *this_method; this_method++) { + char buf[200]; + sprintf(buf, "%s / HTTP/1.1\r\n\r\n", *this_method); + test_simple(buf, HPE_INVALID_METHOD); + } + + // illegal header field name line folding + test_simple("GET / HTTP/1.1\r\n" + "name\r\n" + " : value\r\n" + "\r\n", + HPE_INVALID_HEADER_TOKEN); + + const char *dumbluck2 = + "GET / HTTP/1.1\r\n" + "X-SSL-Nonsense: -----BEGIN CERTIFICATE-----\r\n" + "\tMIIFbTCCBFWgAwIBAgICH4cwDQYJKoZIhvcNAQEFBQAwcDELMAkGA1UEBhMCVUsx\r\n" + "\tETAPBgNVBAoTCGVTY2llbmNlMRIwEAYDVQQLEwlBdXRob3JpdHkxCzAJBgNVBAMT\r\n" + "\tAkNBMS0wKwYJKoZIhvcNAQkBFh5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMu\r\n" + "\tdWswHhcNMDYwNzI3MTQxMzI4WhcNMDcwNzI3MTQxMzI4WjBbMQswCQYDVQQGEwJV\r\n" + "\tSzERMA8GA1UEChMIZVNjaWVuY2UxEzARBgNVBAsTCk1hbmNoZXN0ZXIxCzAJBgNV\r\n" + "\tBAcTmrsogriqMWLAk1DMRcwFQYDVQQDEw5taWNoYWVsIHBhcmQYJKoZIhvcNAQEB\r\n" + "\tBQADggEPADCCAQoCggEBANPEQBgl1IaKdSS1TbhF3hEXSl72G9J+WC/1R64fAcEF\r\n" + "\tW51rEyFYiIeZGx/BVzwXbeBoNUK41OK65sxGuflMo5gLflbwJtHBRIEKAfVVp3YR\r\n" + "\tgW7cMA/s/XKgL1GEC7rQw8lIZT8RApukCGqOVHSi/F1SiFlPDxuDfmdiNzL31+sL\r\n" + "\t0iwHDdNkGjy5pyBSB8Y79dsSJtCW/iaLB0/n8Sj7HgvvZJ7x0fr+RQjYOUUfrePP\r\n" + "\tu2MSpFyf+9BbC/aXgaZuiCvSR+8Snv3xApQY+fULK/xY8h8Ua51iXoQ5jrgu2SqR\r\n" + "\twgA7BUi3G8LFzMBl8FRCDYGUDy7M6QaHXx1ZWIPWNKsCAwEAAaOCAiQwggIgMAwG\r\n" + "\tA1UdEwEB/wQCMAAwEQYJYIZIAYb4QgHTTPAQDAgWgMA4GA1UdDwEB/wQEAwID6DAs\r\n" + "\tBglghkgBhvhCAQ0EHxYdVUsgZS1TY2llbmNlIFVzZXIgQ2VydGlmaWNhdGUwHQYD\r\n" + "\tVR0OBBYEFDTt/sf9PeMaZDHkUIldrDYMNTBZMIGaBgNVHSMEgZIwgY+AFAI4qxGj\r\n" + "\tloCLDdMVKwiljjDastqooXSkcjBwMQswCQYDVQQGEwJVSzERMA8GA1UEChMIZVNj\r\n" + "\taWVuY2UxEjAQBgNVBAsTCUF1dGhvcml0eTELMAkGA1UEAxMCQ0ExLTArBgkqhkiG\r\n" + "\t9w0BCQEWHmNhLW9wZXJhdG9yQGdyaWQtc3VwcG9ydC5hYy51a4IBADApBgNVHRIE\r\n" + "\tIjAggR5jYS1vcGVyYXRvckBncmlkLXN1cHBvcnQuYWMudWswGQYDVR0gBBIwEDAO\r\n" + "\tBgwrBgEEAdkvAQEBAQYwPQYJYIZIAYb4QgEEBDAWLmh0dHA6Ly9jYS5ncmlkLXN1\r\n" + "\tcHBvcnQuYWMudmT4sopwqlBWsvcHViL2NybC9jYWNybC5jcmwwPQYJYIZIAYb4QgEDBDAWLmh0\r\n" + "\tdHA6Ly9jYS5ncmlkLXN1cHBvcnQuYWMudWsvcHViL2NybC9jYWNybC5jcmwwPwYD\r\n" + "\tVR0fBDgwNjA0oDKgMIYuaHR0cDovL2NhLmdyaWQt5hYy51ay9wdWIv\r\n" + "\tY3JsL2NhY3JsLmNybDANBgkqhkiG9w0BAQUFAAOCAQEAS/U4iiooBENGW/Hwmmd3\r\n" + "\tXCy6Zrt08YjKCzGNjorT98g8uGsqYjSxv/hmi0qlnlHs+k/3Iobc3LjS5AMYr5L8\r\n" + "\tUO7OSkgFFlLHQyC9JzPfmLCAugvzEbyv4Olnsr8hbxF1MbKZoQxUZtMVu29wjfXk\r\n" + "\thTeApBv7eaKCWpSp7MCbvgzm74izKhu3vlDk9w6qVrxePfGgpKPqfHiOoGhFnbTK\r\n" + "\twTC6o2xq5y0qZ03JonF7OJspEd3I5zKY3E+ov7/ZhW6DqT8UFvsAdjvQbXyhV8Eu\r\n" + "\tYhixw1aKEPzNjNowuIseVogKOLXxWI5vAi5HgXdS0/ES5gDGsABo4fqovUKlgop3\r\n" + "\tRA==\r\n" + "\t-----END CERTIFICATE-----\r\n" + "\r\n"; + test_simple(dumbluck2, HPE_OK); + + const char *corrupted_connection = + "GET / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Connection\r\033\065\325eep-Alive\r\n" + "Accept-Encoding: gzip\r\n" + "\r\n"; + test_simple(corrupted_connection, HPE_INVALID_HEADER_TOKEN); + + const char *corrupted_header_name = + "GET / HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "X-Some-Header\r\033\065\325eep-Alive\r\n" + "Accept-Encoding: gzip\r\n" + "\r\n"; + test_simple(corrupted_header_name, HPE_INVALID_HEADER_TOKEN); + +#if 0 + // NOTE(Wed Nov 18 11:57:27 CET 2009) this seems okay. we just read body + // until EOF. + // + // no content-length + // error if there is a body without content length + const char *bad_get_no_headers_no_body = "GET /bad_get_no_headers_no_body/world HTTP/1.1\r\n" + "Accept: */*\r\n" + "\r\n" + "HELLO"; + test_simple(bad_get_no_headers_no_body, 0); +#endif + /* TODO sending junk and large headers gets rejected */ + + + /* check to make sure our predefined requests are okay */ + for (i = 0; i < ARRAY_SIZE(requests); i++) { + test_message(&requests[i]); + } + + for (i = 0; i < ARRAY_SIZE(requests); i++) { + test_message_pause(&requests[i]); + } + + for (i = 0; i < ARRAY_SIZE(requests); i++) { + if (!requests[i].should_keep_alive) continue; + for (j = 0; j < ARRAY_SIZE(requests); j++) { + if (!requests[j].should_keep_alive) continue; + for (k = 0; k < ARRAY_SIZE(requests); k++) { + test_multiple3(&requests[i], &requests[j], &requests[k]); + } + } + } + + printf("request scan 1/4 "); + test_scan( &requests[GET_NO_HEADERS_NO_BODY] + , &requests[GET_ONE_HEADER_NO_BODY] + , &requests[GET_NO_HEADERS_NO_BODY] + ); + + printf("request scan 2/4 "); + test_scan( &requests[POST_CHUNKED_ALL_YOUR_BASE] + , &requests[POST_IDENTITY_BODY_WORLD] + , &requests[GET_FUNKY_CONTENT_LENGTH] + ); + + printf("request scan 3/4 "); + test_scan( &requests[TWO_CHUNKS_MULT_ZERO_END] + , &requests[CHUNKED_W_TRAILING_HEADERS] + , &requests[CHUNKED_W_NONSENSE_AFTER_LENGTH] + ); + + printf("request scan 4/4 "); + test_scan( &requests[QUERY_URL_WITH_QUESTION_MARK_GET] + , &requests[PREFIX_NEWLINE_GET ] + , &requests[CONNECT_REQUEST] + ); + + puts("requests okay"); + + return 0; +} |