From 870f16cebdf2aac9e9a5e0c8d50c4e632528ae54 Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Fri, 24 Jun 2022 16:56:10 +0200
Subject: [PATCH 1/8] Avoid panic in VRT_CacheReqBody if called from vcl_init{}

---
 bin/varnishd/cache/cache_vrt.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/bin/varnishd/cache/cache_vrt.c b/bin/varnishd/cache/cache_vrt.c
index ef43a3adee1..be3d56768f9 100644
--- a/bin/varnishd/cache/cache_vrt.c
+++ b/bin/varnishd/cache/cache_vrt.c
@@ -971,11 +971,16 @@ VRT_ban_string(VRT_CTX, VCL_STRING str)
 VCL_BYTES
 VRT_CacheReqBody(VRT_CTX, VCL_BYTES maxsize)
 {
+	const char * const err = "req.body can only be cached in vcl_recv{}";
 
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
 	if (ctx->method != VCL_MET_RECV) {
-		VSLb(ctx->vsl, SLT_VCL_Error,
-		    "req.body can only be cached in vcl_recv{}");
+		if (ctx->vsl != NULL) {
+			VSLb(ctx->vsl, SLT_VCL_Error, err);
+		} else {
+			AN(ctx->msg);
+			VSB_printf(ctx->msg, "%s\n", err);
+		};
 		return (-1);
 	}
 	CHECK_OBJ_NOTNULL(ctx->req, REQ_MAGIC);

From 0c85ddc2a9098b1909e3f46c0f04d7796b80b06a Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Mon, 30 May 2022 13:09:11 +0200
Subject: [PATCH 2/8] V1F: Read end-of-chunk as part of the chunk

Until now, we read the (CR)?LF at the end of a chunk as part of the
next chunk header (see: /* Skip leading whitespace */).

For a follow up commit, we are going to want to know if the next chunk
header is available for read, so we now consume the chunk end as part
of the chunk itself.

This also fixes a corner case: We previously accepted chunks with a
missing end-of-chunk (see fix of r01729.vtc).

Ref: https://datatracker.ietf.org/doc/html/rfc7230#section-4.1
---
 bin/varnishd/http1/cache_http1_vfp.c | 36 ++++++++++++++++++++--------
 bin/varnishtest/tests/r01184.vtc     |  2 ++
 bin/varnishtest/tests/r01729.vtc     |  6 ++---
 3 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/bin/varnishd/http1/cache_http1_vfp.c b/bin/varnishd/http1/cache_http1_vfp.c
index 69c3099d059..fd1aebdf2a4 100644
--- a/bin/varnishd/http1/cache_http1_vfp.c
+++ b/bin/varnishd/http1/cache_http1_vfp.c
@@ -89,6 +89,24 @@ v1f_read(const struct vfp_ctx *vc, struct http_conn *htc, void *d, ssize_t len)
 }
 
 
+/*--------------------------------------------------------------------
+ * read (CR)?LF at the end of a chunk
+ */
+static enum vfp_status
+v1f_chunk_end(struct vfp_ctx *vc, struct http_conn *htc)
+{
+	char c;
+
+	if (v1f_read(vc, htc, &c, 1) <= 0)
+		return (VFP_Error(vc, "chunked read err"));
+	if (c == '\r' && v1f_read(vc, htc, &c, 1) <= 0)
+		return (VFP_Error(vc, "chunked read err"));
+	if (c != '\n')
+		return (VFP_Error(vc, "chunked tail no NL"));
+	return (VFP_OK);
+}
+
+
 /*--------------------------------------------------------------------
  * Read a chunked HTTP object.
  *
@@ -99,6 +117,7 @@ static enum vfp_status v_matchproto_(vfp_pull_f)
 v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
     ssize_t *lp)
 {
+	static enum vfp_status vfps;
 	struct http_conn *htc;
 	char buf[20];		/* XXX: 20 is arbitrary */
 	char *q;
@@ -169,18 +188,15 @@ v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
 			return (VFP_Error(vc, "chunked insufficient bytes"));
 		*lp = lr;
 		vfe->priv2 -= lr;
-		if (vfe->priv2 == 0)
-			vfe->priv2 = -1;
-		return (VFP_OK);
+		if (vfe->priv2 != 0)
+			return (VFP_OK);
+
+		vfe->priv2 = -1;
+		return (v1f_chunk_end(vc, htc));
 	}
 	AZ(vfe->priv2);
-	if (v1f_read(vc, htc, buf, 1) <= 0)
-		return (VFP_Error(vc, "chunked read err"));
-	if (buf[0] == '\r' && v1f_read(vc, htc, buf, 1) <= 0)
-		return (VFP_Error(vc, "chunked read err"));
-	if (buf[0] != '\n')
-		return (VFP_Error(vc, "chunked tail no NL"));
-	return (VFP_END);
+	vfps = v1f_chunk_end(vc, htc);
+	return (vfps == VFP_OK ? VFP_END : vfps);
 }
 
 static const struct vfp v1f_chunked = {
diff --git a/bin/varnishtest/tests/r01184.vtc b/bin/varnishtest/tests/r01184.vtc
index d5aba32518e..f7cd8d778d5 100644
--- a/bin/varnishtest/tests/r01184.vtc
+++ b/bin/varnishtest/tests/r01184.vtc
@@ -62,6 +62,7 @@ server s1 {
 		sendhex " 10 45 f3 a9 83 b8 18 1c  7b c2 30 55 04 17 13 c4"
 		sendhex " 0f 07 5f 7a 38 f4 8e 50  b3 37 d4 3a 32 4a 34 07"
 		sendhex " FF FF FF FF FF FF FF FF  72 ea 06 5f b3 1c fa dd"
+	send "\n"
 	expect_close
 } -start
 
@@ -93,6 +94,7 @@ server s1 {
 		sendhex " 10 45 f3 a9 83 b8 18 1c  7b c2 30 55 04 17 13 c4"
 		sendhex " 0f 07 5f 7a 38 f4 8e 50  b3 37 d4 3a 32 4a 34 07"
 		sendhex " FF FF FF FF FF FF FF FF  72 ea 06 5f b3 1c fa dd"
+	send "\n"
 	expect_close
 } -start
 
diff --git a/bin/varnishtest/tests/r01729.vtc b/bin/varnishtest/tests/r01729.vtc
index 883a60cc680..f6a01e97692 100644
--- a/bin/varnishtest/tests/r01729.vtc
+++ b/bin/varnishtest/tests/r01729.vtc
@@ -11,7 +11,7 @@ server s1 {
 	send "\r\n"
 	send "14\r\n"
 	send "0123456789"
-	send "0123456789"
+	send "0123456789\n"
 	send "0\r\n"
 	send "\r\n"
 
@@ -29,7 +29,7 @@ client c1 {
 	send "\r\n"
 	send "14\r\n"
 	send "0123456789"
-	send "0123456789"
+	send "0123456789\n"
 	send "0\r\n"
 	send "\r\n"
 
@@ -45,7 +45,7 @@ client c1 {
 	send "\r\n"
 	send "14\r\n"
 	send "0123456789"
-	send "0123456789"
+	send "0123456789\n"
 	send "0\r\n"
 	send "\r\n"
 

From 1a3f9fc7eb6cdc2d34dc398a0d2ee6d713af7544 Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Mon, 30 May 2022 13:42:58 +0200
Subject: [PATCH 3/8] V1F: pull chunk header parsing into an own function

... which we are going to need in a follow up commit.

No functional changes, diff best viewed with -b
---
 bin/varnishd/http1/cache_http1_vfp.c | 123 ++++++++++++++++-----------
 1 file changed, 72 insertions(+), 51 deletions(-)

diff --git a/bin/varnishd/http1/cache_http1_vfp.c b/bin/varnishd/http1/cache_http1_vfp.c
index fd1aebdf2a4..fbe7638eb69 100644
--- a/bin/varnishd/http1/cache_http1_vfp.c
+++ b/bin/varnishd/http1/cache_http1_vfp.c
@@ -108,77 +108,98 @@ v1f_chunk_end(struct vfp_ctx *vc, struct http_conn *htc)
 
 
 /*--------------------------------------------------------------------
- * Read a chunked HTTP object.
+ * Parse a chunk header and, for VFP_OK, return size in a pointer
  *
  * XXX: Reading one byte at a time is pretty pessimal.
  */
 
-static enum vfp_status v_matchproto_(vfp_pull_f)
-v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
-    ssize_t *lp)
+static enum vfp_status
+v1f_chunked_hdr(struct vfp_ctx *vc, struct http_conn *htc, ssize_t *szp)
 {
-	static enum vfp_status vfps;
-	struct http_conn *htc;
 	char buf[20];		/* XXX: 20 is arbitrary */
-	char *q;
 	unsigned u;
 	uintmax_t cll;
-	ssize_t cl, l, lr;
+	ssize_t cl, lr;
+	char *q;
 
 	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
-	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
-	CAST_OBJ_NOTNULL(htc, vfe->priv1, HTTP_CONN_MAGIC);
-	AN(ptr);
-	AN(lp);
+	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+	AN(szp);
+	assert(*szp == -1);
 
-	l = *lp;
-	*lp = 0;
-	if (vfe->priv2 == -1) {
-		/* Skip leading whitespace */
-		do {
-			lr = v1f_read(vc, htc, buf, 1);
-			if (lr <= 0)
-				return (VFP_Error(vc, "chunked read err"));
-		} while (vct_islws(buf[0]));
-
-		if (!vct_ishex(buf[0]))
-			 return (VFP_Error(vc, "chunked header non-hex"));
-
-		/* Collect hex digits, skipping leading zeros */
-		for (u = 1; u < sizeof buf; u++) {
-			do {
-				lr = v1f_read(vc, htc, buf + u, 1);
-				if (lr <= 0)
-					return (VFP_Error(vc,
-					    "chunked read err"));
-			} while (u == 1 && buf[0] == '0' && buf[u] == '0');
-			if (!vct_ishex(buf[u]))
-				break;
-		}
+	/* Skip leading whitespace */
+	do {
+		lr = v1f_read(vc, htc, buf, 1);
+		if (lr <= 0)
+			return (VFP_Error(vc, "chunked read err"));
+	} while (vct_islws(buf[0]));
 
-		if (u >= sizeof buf)
-			return (VFP_Error(vc, "chunked header too long"));
+	if (!vct_ishex(buf[0]))
+		return (VFP_Error(vc, "chunked header non-hex"));
 
-		/* Skip trailing white space */
-		while (vct_islws(buf[u]) && buf[u] != '\n') {
+	/* Collect hex digits, skipping leading zeros */
+	for (u = 1; u < sizeof buf; u++) {
+		do {
 			lr = v1f_read(vc, htc, buf + u, 1);
 			if (lr <= 0)
 				return (VFP_Error(vc, "chunked read err"));
-		}
+		} while (u == 1 && buf[0] == '0' && buf[u] == '0');
+		if (!vct_ishex(buf[u]))
+			break;
+	}
 
-		if (buf[u] != '\n')
-			return (VFP_Error(vc, "chunked header no NL"));
+	if (u >= sizeof buf)
+		return (VFP_Error(vc, "chunked header too long"));
+
+	/* Skip trailing white space */
+	while (vct_islws(buf[u]) && buf[u] != '\n') {
+		lr = v1f_read(vc, htc, buf + u, 1);
+		if (lr <= 0)
+			return (VFP_Error(vc, "chunked read err"));
+	}
 
-		buf[u] = '\0';
+	if (buf[u] != '\n')
+		return (VFP_Error(vc, "chunked header no NL"));
 
-		cll = strtoumax(buf, &q, 16);
-		if (q == NULL || *q != '\0')
-			return (VFP_Error(vc, "chunked header number syntax"));
-		cl = (ssize_t)cll;
-		if (cl < 0 || (uintmax_t)cl != cll)
-			return (VFP_Error(vc, "bogusly large chunk size"));
+	buf[u] = '\0';
 
-		vfe->priv2 = cl;
+	cll = strtoumax(buf, &q, 16);
+	if (q == NULL || *q != '\0')
+		return (VFP_Error(vc, "chunked header number syntax"));
+	cl = (ssize_t)cll;
+	if (cl < 0 || (uintmax_t)cl != cll)
+		return (VFP_Error(vc, "bogusly large chunk size"));
+
+	*szp = cl;
+	return (VFP_OK);
+}
+
+
+/*--------------------------------------------------------------------
+ * Read a chunked HTTP object.
+ *
+ */
+
+static enum vfp_status v_matchproto_(vfp_pull_f)
+v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
+    ssize_t *lp)
+{
+	static enum vfp_status vfps;
+	struct http_conn *htc;
+	ssize_t l, lr;
+
+	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
+	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
+	CAST_OBJ_NOTNULL(htc, vfe->priv1, HTTP_CONN_MAGIC);
+	AN(ptr);
+	AN(lp);
+
+	l = *lp;
+	*lp = 0;
+	if (vfe->priv2 == -1) {
+		vfps = v1f_chunked_hdr(vc, htc, &vfe->priv2);
+		if (vfps != VFP_OK)
+			return (vfps);
 	}
 	if (vfe->priv2 > 0) {
 		if (vfe->priv2 < l)

From 0262661cb413fd52228584b921fa793ebdba9ba3 Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Mon, 30 May 2022 13:51:26 +0200
Subject: [PATCH 4/8] V1F: Send VFP_END with last data for v1f chunked

While working on request body caching improvements, it was noticed
that, for chunked encoding, we did not know the request body size
before we attempted to read at least one additional byte because
the chunked fetch did not return VFP_END with the last chunk, but
rather only as a zero-length additional chunk.

This commit changes chunked encoding processing to return VFP_END
opportunistically if the next chunk header is available.

Implementation:

Unless there is pipeline readahead data available, the test for
available data implies a poll(2) call. Relative to the existing cost
of the current implementation, this cost is not considered relevant.

To improve efficiency, we should consider a generic readahead for
chunked encoding, probably by making the file descriptor non blocking
and adding a readahead buffer.
---
 bin/varnishd/http1/cache_http1_vfp.c | 47 +++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/bin/varnishd/http1/cache_http1_vfp.c b/bin/varnishd/http1/cache_http1_vfp.c
index fbe7638eb69..4b5a03fec6c 100644
--- a/bin/varnishd/http1/cache_http1_vfp.c
+++ b/bin/varnishd/http1/cache_http1_vfp.c
@@ -38,6 +38,7 @@
 #include "config.h"
 
 #include <inttypes.h>
+#include <poll.h>
 
 #include "cache/cache_varnishd.h"
 #include "cache/cache_filter.h"
@@ -175,6 +176,37 @@ v1f_chunked_hdr(struct vfp_ctx *vc, struct http_conn *htc, ssize_t *szp)
 }
 
 
+/*--------------------------------------------------------------------
+ * Check if data is available
+ */
+
+static int
+v1f_poll(struct http_conn *htc)
+{
+	struct pollfd pfd[1];
+	int r;
+
+	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+
+	if (htc->pipeline_b)
+		return (1);
+
+	pfd->fd = *htc->rfd;
+	pfd->events = POLLIN;
+
+	r = poll(pfd, 1, 0);
+	if (r < 0) {
+		assert(errno == EINTR);
+		return (0);
+	}
+	if (r == 0)
+		return (0);
+	assert(r == 1);
+	assert(pfd->revents & POLLIN);
+	return (1);
+}
+
+
 /*--------------------------------------------------------------------
  * Read a chunked HTTP object.
  *
@@ -213,7 +245,20 @@ v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
 			return (VFP_OK);
 
 		vfe->priv2 = -1;
-		return (v1f_chunk_end(vc, htc));
+
+		vfps = v1f_chunk_end(vc, htc);
+		if (vfps != VFP_OK)
+			return (vfps);
+
+		/* only if some data of the next chunk header is available, read
+		 * it to check if we can return VFP_END */
+		if (! v1f_poll(htc))
+			return (VFP_OK);
+		vfps = v1f_chunked_hdr(vc, htc, &vfe->priv2);
+		if (vfps != VFP_OK)
+			return (vfps);
+		if (vfe->priv2 != 0)
+			return (VFP_OK);
 	}
 	AZ(vfe->priv2);
 	vfps = v1f_chunk_end(vc, htc);

From 12e06196b232b6a68a55bc98d849f98ef18f35d3 Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Mon, 30 May 2022 14:02:40 +0200
Subject: [PATCH 5/8] Stabilize partial write -sdeprecated_persistent test

It seems this test now shows no data loss more frequently, which I
hope should be fine for the purpose of the test?
---
 bin/varnishtest/tests/p00007.vtc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bin/varnishtest/tests/p00007.vtc b/bin/varnishtest/tests/p00007.vtc
index 67d00444e0c..1550200ae54 100644
--- a/bin/varnishtest/tests/p00007.vtc
+++ b/bin/varnishtest/tests/p00007.vtc
@@ -77,7 +77,8 @@ client c1 {
 	# Fetch the vampire object and see how that goes...
 	txreq -url "/1"
 	rxresp
-	expect resp.bodylen == 48
+	expect resp.bodylen >= 48
+	expect resp.bodylen <= 64
 } -run
 
 varnish v1 -expectexit 0x40

From 70fa9c97adf6c607da74658866d6006487dc1bf3 Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Tue, 31 May 2022 12:28:48 +0200
Subject: [PATCH 6/8] V1F: Add state and readahead buffer for v1f chunked
 processing

This commit contains only the init/fini mechanics for the
state/buffer, it is not used yet.

The state is allocated on the heap to be prepared for partial caching
(see #3798), where the V1F layer is left and control returns to VCL:

- the worker workspace aws is not an option because it must be freed
  to its original state before returning to VCL.

- the task (req) workspace is not an option because of rollbacks.

While I would have preferred to avoid a heap allocation, it will be
payed off by the reduction of system calls in follow up commits.
---
 bin/varnishd/http1/cache_http1_vfp.c | 55 +++++++++++++++++++++++++++-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/bin/varnishd/http1/cache_http1_vfp.c b/bin/varnishd/http1/cache_http1_vfp.c
index 4b5a03fec6c..326293e47d7 100644
--- a/bin/varnishd/http1/cache_http1_vfp.c
+++ b/bin/varnishd/http1/cache_http1_vfp.c
@@ -39,6 +39,7 @@
 
 #include <inttypes.h>
 #include <poll.h>
+#include <stdlib.h>
 
 #include "cache/cache_varnishd.h"
 #include "cache/cache_filter.h"
@@ -89,6 +90,15 @@ v1f_read(const struct vfp_ctx *vc, struct http_conn *htc, void *d, ssize_t len)
 	return (i + l);
 }
 
+struct v1f_chunked_priv {
+	unsigned		magic;
+#define V1F_CHUNKED_PRIV_MAGIC 0xf5232e94
+	unsigned		len;
+	struct vfp_ctx		*vc;
+	struct http_conn	*htc;
+	txt			avail;
+	unsigned char		buf[];
+};
 
 /*--------------------------------------------------------------------
  * read (CR)?LF at the end of a chunk
@@ -212,17 +222,46 @@ v1f_poll(struct http_conn *htc)
  *
  */
 
+static enum vfp_status v_matchproto_(vfp_init_f)
+v1f_chunked_init(VRT_CTX, struct vfp_ctx *vc, struct vfp_entry *vfe)
+{
+	struct http_conn *htc;
+	struct v1f_chunked_priv *v1fcp;
+	const unsigned sz = 24 + sizeof(struct v1f_chunked_priv);
+
+	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
+	CHECK_OBJ_NOTNULL(vc->wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
+	CAST_OBJ_NOTNULL(htc, vfe->priv1, HTTP_CONN_MAGIC);
+
+	v1fcp = calloc(1, sz);
+	AN(v1fcp);
+	v1fcp->magic = V1F_CHUNKED_PRIV_MAGIC;
+	v1fcp->len = sz - sizeof *v1fcp;
+	v1fcp->htc = htc;
+	v1fcp->vc = vc;
+	vfe->priv1 = v1fcp;
+
+	return (VFP_OK);
+}
+
+
 static enum vfp_status v_matchproto_(vfp_pull_f)
 v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
     ssize_t *lp)
 {
 	static enum vfp_status vfps;
+	struct v1f_chunked_priv *v1fcp;
 	struct http_conn *htc;
 	ssize_t l, lr;
 
 	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
-	CAST_OBJ_NOTNULL(htc, vfe->priv1, HTTP_CONN_MAGIC);
+	CAST_OBJ_NOTNULL(v1fcp, vfe->priv1, V1F_CHUNKED_PRIV_MAGIC);
+	htc = v1fcp->htc;
+	assert(vc == v1fcp->vc);
+	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
 	AN(ptr);
 	AN(lp);
 
@@ -265,9 +304,23 @@ v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
 	return (vfps == VFP_OK ? VFP_END : vfps);
 }
 
+static void v_matchproto_(vfp_fini_f)
+v1f_chunked_fini(struct vfp_ctx *vc, struct vfp_entry *vfe)
+{
+	struct v1f_chunked_priv *v1fcp;
+
+	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
+	CHECK_OBJ_NOTNULL(vc->wrk, WORKER_MAGIC);
+	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
+	TAKE_OBJ_NOTNULL(v1fcp, &vfe->priv1, V1F_CHUNKED_PRIV_MAGIC);
+	free(v1fcp);
+}
+
 static const struct vfp v1f_chunked = {
 	.name = "V1F_CHUNKED",
+	.init = v1f_chunked_init,
 	.pull = v1f_chunked_pull,
+	.fini = v1f_chunked_fini
 };
 
 

From f564ab6262f50bb5faef3b4e0749a7b89ea3a005 Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Tue, 31 May 2022 13:27:06 +0200
Subject: [PATCH 7/8] V1F: Refactor v1f_read() to use scatter/gather reads
 (readv())

This is to support optional readaheads:

The first io vector is for read data which the caller absolutely
needs, any other io vectors are for potential readahead.

The readahead is not used yet, for now, all v1f_read() calls
use the IOV() macro to just wrap the existing single buffer.
---
 bin/varnishd/http1/cache_http1_vfp.c | 92 ++++++++++++++++++----------
 1 file changed, 59 insertions(+), 33 deletions(-)

diff --git a/bin/varnishd/http1/cache_http1_vfp.c b/bin/varnishd/http1/cache_http1_vfp.c
index 326293e47d7..94c590e8ad3 100644
--- a/bin/varnishd/http1/cache_http1_vfp.c
+++ b/bin/varnishd/http1/cache_http1_vfp.c
@@ -40,6 +40,7 @@
 #include <inttypes.h>
 #include <poll.h>
 #include <stdlib.h>
+#include <sys/uio.h>
 
 #include "cache/cache_varnishd.h"
 #include "cache/cache_filter.h"
@@ -49,44 +50,69 @@
 #include "vtcp.h"
 
 /*--------------------------------------------------------------------
- * Read up to len bytes, returning pipelined data first.
+ * Read into the iovecs, returning pipelined data first.
+ *
+ * iova[1] and beyond are for readahead.
+ *
+ * v1f_read() returns with no actual read issued if pipelined data
+ * fills iova[0]
  */
 
+// for single io vector
+#define IOV(ptr,len) (struct iovec[1]){{.iov_base = (ptr), .iov_len = (len)}}, 1
+
 static ssize_t
-v1f_read(const struct vfp_ctx *vc, struct http_conn *htc, void *d, ssize_t len)
+v1f_read(const struct vfp_ctx *vc, struct http_conn *htc,
+    const struct iovec *iova, int iovcnt)
 {
-	ssize_t l;
-	unsigned char *p;
-	ssize_t i = 0;
+	ssize_t l, i = 0;
+	size_t ll;
+	int c;
+	struct iovec iov[iovcnt];
 
 	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
-	assert(len > 0);
+	AN(iova);
+	assert(iovcnt > 0);
+	memcpy(iov, iova, sizeof iov);
+	/* only check first iov */
+	AN(iov[0].iov_base);
+	assert(iov[0].iov_len > 0);
+
 	l = 0;
-	p = d;
-	if (htc->pipeline_b) {
-		l = htc->pipeline_e - htc->pipeline_b;
-		assert(l > 0);
-		l = vmin(l, len);
-		memcpy(p, htc->pipeline_b, l);
-		p += l;
-		len -= l;
-		htc->pipeline_b += l;
+	c = 0;
+
+	while (htc->pipeline_b && c < iovcnt) {
+		ll = htc->pipeline_e - htc->pipeline_b;
+		assert(ll > 0);
+		ll = vmin(ll, iov[c].iov_len);
+		memcpy(iov[c].iov_base, htc->pipeline_b, ll);
+		iov[c].iov_base = (unsigned char *)iov[c].iov_base + ll;
+		iov[c].iov_len -= ll;
+		htc->pipeline_b += ll;
+		if (iov[c].iov_len == 0)
+			c++;
 		if (htc->pipeline_b == htc->pipeline_e)
 			htc->pipeline_b = htc->pipeline_e = NULL;
+		l += ll;
 	}
-	if (len > 0) {
-		i = read(*htc->rfd, p, len);
-		if (i < 0) {
-			VTCP_Assert(i);
-			VSLb(vc->wrk->vsl, SLT_FetchError,
-			    "%s", VAS_errtxt(errno));
-			return (i);
-		}
-		if (i == 0)
-			htc->doclose = SC_RESP_CLOSE;
 
+	// see comment above
+	if (c > 0)
+		return (l);
+	AZ(c);
+
+	i = readv(*htc->rfd, iov, iovcnt);
+
+	if (i < 0) {
+		VTCP_Assert(i);
+		VSLb(vc->wrk->vsl, SLT_FetchError, "%s", VAS_errtxt(errno));
+		return (i);
 	}
+
+	if (i == 0)
+		htc->doclose = SC_RESP_CLOSE;
+
 	return (i + l);
 }
 
@@ -108,9 +134,9 @@ v1f_chunk_end(struct vfp_ctx *vc, struct http_conn *htc)
 {
 	char c;
 
-	if (v1f_read(vc, htc, &c, 1) <= 0)
+	if (v1f_read(vc, htc, IOV(&c, 1)) <= 0)
 		return (VFP_Error(vc, "chunked read err"));
-	if (c == '\r' && v1f_read(vc, htc, &c, 1) <= 0)
+	if (c == '\r' && v1f_read(vc, htc, IOV(&c, 1)) <= 0)
 		return (VFP_Error(vc, "chunked read err"));
 	if (c != '\n')
 		return (VFP_Error(vc, "chunked tail no NL"));
@@ -140,7 +166,7 @@ v1f_chunked_hdr(struct vfp_ctx *vc, struct http_conn *htc, ssize_t *szp)
 
 	/* Skip leading whitespace */
 	do {
-		lr = v1f_read(vc, htc, buf, 1);
+		lr = v1f_read(vc, htc, IOV(buf, 1));
 		if (lr <= 0)
 			return (VFP_Error(vc, "chunked read err"));
 	} while (vct_islws(buf[0]));
@@ -151,7 +177,7 @@ v1f_chunked_hdr(struct vfp_ctx *vc, struct http_conn *htc, ssize_t *szp)
 	/* Collect hex digits, skipping leading zeros */
 	for (u = 1; u < sizeof buf; u++) {
 		do {
-			lr = v1f_read(vc, htc, buf + u, 1);
+			lr = v1f_read(vc, htc, IOV(buf + u, 1));
 			if (lr <= 0)
 				return (VFP_Error(vc, "chunked read err"));
 		} while (u == 1 && buf[0] == '0' && buf[u] == '0');
@@ -164,7 +190,7 @@ v1f_chunked_hdr(struct vfp_ctx *vc, struct http_conn *htc, ssize_t *szp)
 
 	/* Skip trailing white space */
 	while (vct_islws(buf[u]) && buf[u] != '\n') {
-		lr = v1f_read(vc, htc, buf + u, 1);
+		lr = v1f_read(vc, htc, IOV(buf + u, 1));
 		if (lr <= 0)
 			return (VFP_Error(vc, "chunked read err"));
 	}
@@ -275,7 +301,7 @@ v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
 	if (vfe->priv2 > 0) {
 		if (vfe->priv2 < l)
 			l = vfe->priv2;
-		lr = v1f_read(vc, htc, ptr, l);
+		lr = v1f_read(vc, htc, IOV(ptr, l));
 		if (lr <= 0)
 			return (VFP_Error(vc, "chunked insufficient bytes"));
 		*lp = lr;
@@ -345,7 +371,7 @@ v1f_straight_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *p,
 	if (vfe->priv2 == 0) // XXX: Optimize Content-Len: 0 out earlier
 		return (VFP_END);
 	l = vmin(l, vfe->priv2);
-	lr = v1f_read(vc, htc, p, l);
+	lr = v1f_read(vc, htc, IOV(p, l));
 	if (lr <= 0)
 		return (VFP_Error(vc, "straight insufficient bytes"));
 	*lp = lr;
@@ -377,7 +403,7 @@ v1f_eof_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *p, ssize_t *lp)
 
 	l = *lp;
 	*lp = 0;
-	lr = v1f_read(vc, htc, p, l);
+	lr = v1f_read(vc, htc, IOV(p, l));
 	if (lr < 0)
 		return (VFP_Error(vc, "eof socket fail"));
 	if (lr == 0)

From fdf390cd0969b3b878c94271188757c171b8223e Mon Sep 17 00:00:00 2001
From: Nils Goroll <nils.goroll@uplex.de>
Date: Tue, 31 May 2022 17:02:39 +0200
Subject: [PATCH 8/8] V1F: Readahead for v1f_chunked_*

We maintain a readahead buffer, which gets filled whenever we parse
chunk headers and read chunks.

The implementation works at the V1F layer only. Because we currently
have no way to return, from this layer, data to the HTC pipeline, we
must take great care to never over-read the current request.

The existing code treats the CR in the CRLF end sequence for chunked
encoding as optional, which, in general, leads to a lower safe
readahead than if we required CR.

For reading chunks, the readahead case is quite trivial: After each
chunk, at least one NL needs to follow, plus the last 0 bytes chunk
header (0 + NL), plus at least one NL for the end of trailers,
totaling to four bytes of safe readahead.

In pratice, usually two bytes will be read ahead, the CRLF ending the
chunk. For the final chunk, the safe readahead is too conservative to
read all of the usual CRLF 0 CRLF CRLF sequence, but the four bytes
readahead is sufficient to trigger reading the final zero chunk
header, such that we will return VFP_END properly.

For reading chunk headers, the minimal safe readahead assumption is
for the shortest possible end chunk, 0 NL NL. We very conservatively
update the readahead, such that we will usually process a chunk header
with two reads.
---
 bin/varnishd/http1/cache_http1_vfp.c | 310 ++++++++++++++++++++-------
 1 file changed, 231 insertions(+), 79 deletions(-)

diff --git a/bin/varnishd/http1/cache_http1_vfp.c b/bin/varnishd/http1/cache_http1_vfp.c
index 94c590e8ad3..e9caa2f1a92 100644
--- a/bin/varnishd/http1/cache_http1_vfp.c
+++ b/bin/varnishd/http1/cache_http1_vfp.c
@@ -116,97 +116,255 @@ v1f_read(const struct vfp_ctx *vc, struct http_conn *htc,
 	return (i + l);
 }
 
+/* txt but not const */
+typedef struct {
+	char		*b;
+	char		*e;
+} wtxt;
+
 struct v1f_chunked_priv {
 	unsigned		magic;
 #define V1F_CHUNKED_PRIV_MAGIC 0xf5232e94
 	unsigned		len;
 	struct vfp_ctx		*vc;
 	struct http_conn	*htc;
-	txt			avail;
-	unsigned char		buf[];
+	wtxt			avail;
+	char			buf[];
 };
 
+/* XXX reduce? */
+#define ASSERT_V1F_CHUNKED_PRIV(x)					\
+do {									\
+	CHECK_OBJ_NOTNULL(x, V1F_CHUNKED_PRIV_MAGIC);			\
+	assert((x->avail.b != NULL) == (x->avail.e != NULL));		\
+	assert(x->avail.b == NULL || x->avail.b >= x->buf);		\
+	assert(x->avail.e == NULL || x->avail.e <= x->buf + x->len);	\
+} while (0)
+
+/*
+ * ensure the readahead buffer has at least n bytes available and read
+ * ahead a maximum of ra
+ *
+ * the caller must ensure that ra is below or at the maximum readahead
+ * poosible at that point in order to not read into the next request
+ *
+ * (we can not return data to the next request's htc pipeline (yet))
+ */
+
+static inline enum vfp_status
+v1f_chunked_need(struct v1f_chunked_priv *v1fcp, size_t n, size_t ra)
+{
+	char *p;
+	ssize_t i;
+	size_t l = 0;
+
+	ASSERT_V1F_CHUNKED_PRIV(v1fcp);
+	AN(n);
+	assert(n <= v1fcp->len);
+	assert(n <= ra);
+
+	if (v1fcp->avail.b != NULL)
+		l = Tlen(v1fcp->avail);
+
+	if (l >= n)
+		return (VFP_OK);
+
+	if (l == 0) {
+		p = v1fcp->buf;
+	} else {
+		p = memmove(v1fcp->buf, v1fcp->avail.b, l);
+		p += l;
+	}
+
+	if (ra > v1fcp->len)
+		ra = v1fcp->len;
+
+	assert(ra >= l);
+	ra -= l;
+
+	while (ra > 0 && l < n) {
+		i = v1f_read(v1fcp->vc, v1fcp->htc, IOV(p, ra));
+		if (i <= 0)
+			return (VFP_Error(v1fcp->vc, "chunked read err"));
+
+//		VSLb(vc->wrk->vsl, SLT_Debug, "need read %zu/%zu", i, ra);
+
+		p += i;
+		l += i;
+		ra -= i;
+	}
+
+	v1fcp->avail.b = v1fcp->buf;
+	v1fcp->avail.e = v1fcp->buf + l;
+
+	assert(Tlen(v1fcp->avail) >= n);
+
+	return (VFP_OK);
+}
+
+/*
+ * read chunked data into the buffer, returning an existing readahead
+ * and reading ahead up a maximum of ra
+ */
+
+static ssize_t
+v1f_chunked_data(struct v1f_chunked_priv *v1fcp, void *ptr, size_t len,
+    size_t ra)
+{
+	struct iovec iov[2];
+	size_t r = 0, l;
+	char *p;
+
+	ASSERT_V1F_CHUNKED_PRIV(v1fcp);
+	AN(ptr);
+	AN(len);
+	AN(ra <= v1fcp->len);
+
+	p = ptr;
+
+	if (v1fcp->avail.b != NULL) {
+		l = Tlen(v1fcp->avail);
+		l = vmin(l, len);
+
+		memcpy(p, v1fcp->avail.b, l);
+		p += l;
+		len -= l;
+		v1fcp->avail.b += l;
+		r += l;
+
+		if (Tlen(v1fcp->avail) == 0)
+		    v1fcp->avail.b = v1fcp->avail.e = NULL;
+
+		if (len == 0)
+			return (r);
+	}
+
+	/* all readahead consumed */
+	assert(v1fcp->avail.b == NULL || v1fcp->avail.b == v1fcp->avail.e);
+
+	iov[0].iov_base = p;
+	iov[0].iov_len = len;
+
+	iov[1].iov_base = v1fcp->buf;
+	iov[1].iov_len = ra;
+
+	l = v1f_read(v1fcp->vc, v1fcp->htc, iov, 2);
+	if (l <= len)
+		return (r + l);
+
+	/* we have readahead data */
+	v1fcp->avail.b = v1fcp->buf;
+	v1fcp->avail.e = v1fcp->buf + (l - len);
+
+
+//	VSLb(vc->wrk->vsl, SLT_Debug, "data readahead %zu", l - len);
+
+	return (r + len);
+}
+
+
+#define V1FNEED(priv, n, ra)						\
+do {									\
+	enum vfp_status vfps = v1f_chunked_need(priv, n, ra);		\
+	if (vfps != VFP_OK)						\
+		return (vfps);						\
+} while(0)
+
 /*--------------------------------------------------------------------
  * read (CR)?LF at the end of a chunk
+ *
  */
 static enum vfp_status
-v1f_chunk_end(struct vfp_ctx *vc, struct http_conn *htc)
+v1f_chunk_end(struct v1f_chunked_priv *v1fcp)
 {
-	char c;
-
-	if (v1f_read(vc, htc, IOV(&c, 1)) <= 0)
-		return (VFP_Error(vc, "chunked read err"));
-	if (c == '\r' && v1f_read(vc, htc, IOV(&c, 1)) <= 0)
-		return (VFP_Error(vc, "chunked read err"));
-	if (c != '\n')
-		return (VFP_Error(vc, "chunked tail no NL"));
+
+	V1FNEED(v1fcp, 1, 1);
+	if (*v1fcp->avail.b == '\r') {
+		v1fcp->avail.b++;
+		V1FNEED(v1fcp, 1, 1);
+	}
+	if (*v1fcp->avail.b != '\n')
+		return (VFP_Error(v1fcp->vc, "chunk end no NL"));
+	v1fcp->avail.b++;
 	return (VFP_OK);
 }
 
-
 /*--------------------------------------------------------------------
  * Parse a chunk header and, for VFP_OK, return size in a pointer
  *
- * XXX: Reading one byte at a time is pretty pessimal.
  */
 
 static enum vfp_status
-v1f_chunked_hdr(struct vfp_ctx *vc, struct http_conn *htc, ssize_t *szp)
+v1f_chunked_hdr(struct v1f_chunked_priv *v1fcp, ssize_t *szp)
 {
-	char buf[20];		/* XXX: 20 is arbitrary */
+	const size_t ra_safe = 3;
+	const unsigned maxdigits = 20;
 	unsigned u;
 	uintmax_t cll;
-	ssize_t cl, lr;
+	ssize_t cl, ra;
 	char *q;
 
-	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
-	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
+	ASSERT_V1F_CHUNKED_PRIV(v1fcp);
 	AN(szp);
 	assert(*szp == -1);
 
-	/* Skip leading whitespace */
+	/* safe readahead is 0 digit + NL (end chunklen) + NL (end trailers)
+	 *
+	 * for the first non-zero difit, we can, conservatively, add 2: 1
+	 * (minimum length) + 1 (NL).
+	 *
+	 * thereafter we just add 16 as a conservative lower bound
+	 *
+	 */
+	ra = ra_safe;
+
+	/* Skip leading whitespace. */
 	do {
-		lr = v1f_read(vc, htc, IOV(buf, 1));
-		if (lr <= 0)
-			return (VFP_Error(vc, "chunked read err"));
-	} while (vct_islws(buf[0]));
-
-	if (!vct_ishex(buf[0]))
-		return (VFP_Error(vc, "chunked header non-hex"));
-
-	/* Collect hex digits, skipping leading zeros */
-	for (u = 1; u < sizeof buf; u++) {
-		do {
-			lr = v1f_read(vc, htc, IOV(buf + u, 1));
-			if (lr <= 0)
-				return (VFP_Error(vc, "chunked read err"));
-		} while (u == 1 && buf[0] == '0' && buf[u] == '0');
-		if (!vct_ishex(buf[u]))
+		V1FNEED(v1fcp, 1, ra);
+		while (Tlen(v1fcp->avail) && vct_islws(*v1fcp->avail.b))
+			v1fcp->avail.b++;
+	} while (Tlen(v1fcp->avail) == 0);
+
+	V1FNEED(v1fcp, 1, ra);
+	if (!vct_ishex(*v1fcp->avail.b))
+		return (VFP_Error(v1fcp->vc, "chunked header non-hex"));
+
+	/* Collect hex digits and trailing space */
+	assert(v1fcp->len >= maxdigits);
+	for (u = 0; u < maxdigits; u++) {
+		V1FNEED(v1fcp, u + 1, u + ra);
+		if (!vct_ishex(v1fcp->avail.b[u]))
 			break;
+		/* extremely simple and conservative read-ahead adjustment */
+		if (ra > ra_safe)
+			ra += 16;
+		else if (v1fcp->avail.b[u] != '0')
+			ra += 2;
 	}
-
-	if (u >= sizeof buf)
-		return (VFP_Error(vc, "chunked header too long"));
-
-	/* Skip trailing white space */
-	while (vct_islws(buf[u]) && buf[u] != '\n') {
-		lr = v1f_read(vc, htc, IOV(buf + u, 1));
-		if (lr <= 0)
-			return (VFP_Error(vc, "chunked read err"));
+	/* min readahead is one less now ( NL + NL ) */
+	ra--;
+	for (; u < maxdigits; u++) {
+		V1FNEED(v1fcp, u + 1, u + ra);
+		if (!vct_islws(v1fcp->avail.b[u]) || v1fcp->avail.b[u] == '\n')
+			break;
+		v1fcp->avail.b[u] = '\0';
 	}
+	if (u >= maxdigits)
+		return (VFP_Error(v1fcp->vc, "chunked header too long"));
 
-	if (buf[u] != '\n')
-		return (VFP_Error(vc, "chunked header no NL"));
+	if (v1fcp->avail.b[u] != '\n')
+		return (VFP_Error(v1fcp->vc, "chunked header no NL"));
 
-	buf[u] = '\0';
+	v1fcp->avail.b[u] = '\0';
 
-	cll = strtoumax(buf, &q, 16);
+	cll = strtoumax(v1fcp->avail.b, &q, 16);
 	if (q == NULL || *q != '\0')
-		return (VFP_Error(vc, "chunked header number syntax"));
+		return (VFP_Error(v1fcp->vc, "chunked header number syntax"));
 	cl = (ssize_t)cll;
 	if (cl < 0 || (uintmax_t)cl != cll)
-		return (VFP_Error(vc, "bogusly large chunk size"));
+		return (VFP_Error(v1fcp->vc, "bogusly large chunk size"));
 
+	v1fcp->avail.b += u + 1;
 	*szp = cl;
 	return (VFP_OK);
 }
@@ -217,29 +375,11 @@ v1f_chunked_hdr(struct vfp_ctx *vc, struct http_conn *htc, ssize_t *szp)
  */
 
 static int
-v1f_poll(struct http_conn *htc)
+v1f_hasreadahead(struct v1f_chunked_priv *v1fcp)
 {
-	struct pollfd pfd[1];
-	int r;
-
-	CHECK_OBJ_NOTNULL(htc, HTTP_CONN_MAGIC);
-
-	if (htc->pipeline_b)
-		return (1);
 
-	pfd->fd = *htc->rfd;
-	pfd->events = POLLIN;
-
-	r = poll(pfd, 1, 0);
-	if (r < 0) {
-		assert(errno == EINTR);
-		return (0);
-	}
-	if (r == 0)
-		return (0);
-	assert(r == 1);
-	assert(pfd->revents & POLLIN);
-	return (1);
+	ASSERT_V1F_CHUNKED_PRIV(v1fcp);
+	return (v1fcp->avail.b != NULL && Tlen(v1fcp->avail));
 }
 
 
@@ -280,7 +420,7 @@ v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
 	static enum vfp_status vfps;
 	struct v1f_chunked_priv *v1fcp;
 	struct http_conn *htc;
-	ssize_t l, lr;
+	ssize_t ra, l, lr;
 
 	CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC);
 	CHECK_OBJ_NOTNULL(vfe, VFP_ENTRY_MAGIC);
@@ -294,14 +434,26 @@ v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
 	l = *lp;
 	*lp = 0;
 	if (vfe->priv2 == -1) {
-		vfps = v1f_chunked_hdr(vc, htc, &vfe->priv2);
+		vfps = v1f_chunked_hdr(v1fcp, &vfe->priv2);
 		if (vfps != VFP_OK)
 			return (vfps);
 	}
 	if (vfe->priv2 > 0) {
-		if (vfe->priv2 < l)
+		/* safe readahead is 4:
+		 *
+		 * NL (end chunk)
+		 * 1 digit
+		 * NL (end chunklen)
+		 * NL (end trailers)
+		 */
+		ra = 0;
+
+		if (vfe->priv2 <= l) {
 			l = vfe->priv2;
-		lr = v1f_read(vc, htc, IOV(ptr, l));
+			ra = 4;
+		}
+
+		lr = v1f_chunked_data(v1fcp, ptr, l, ra);
 		if (lr <= 0)
 			return (VFP_Error(vc, "chunked insufficient bytes"));
 		*lp = lr;
@@ -311,22 +463,22 @@ v1f_chunked_pull(struct vfp_ctx *vc, struct vfp_entry *vfe, void *ptr,
 
 		vfe->priv2 = -1;
 
-		vfps = v1f_chunk_end(vc, htc);
+		vfps = v1f_chunk_end(v1fcp);
 		if (vfps != VFP_OK)
 			return (vfps);
 
 		/* only if some data of the next chunk header is available, read
 		 * it to check if we can return VFP_END */
-		if (! v1f_poll(htc))
+		if (! v1f_hasreadahead(v1fcp))
 			return (VFP_OK);
-		vfps = v1f_chunked_hdr(vc, htc, &vfe->priv2);
+		vfps = v1f_chunked_hdr(v1fcp, &vfe->priv2);
 		if (vfps != VFP_OK)
 			return (vfps);
 		if (vfe->priv2 != 0)
 			return (VFP_OK);
 	}
 	AZ(vfe->priv2);
-	vfps = v1f_chunk_end(vc, htc);
+	vfps = v1f_chunk_end(v1fcp);
 	return (vfps == VFP_OK ? VFP_END : vfps);
 }