From 5276fb7f97b73ffe2ed4afce97272d15cbb43722 Mon Sep 17 00:00:00 2001 From: Patrick D'appollonio <930925+patrickdappollonio@users.noreply.github.com> Date: Mon, 7 Oct 2024 04:03:35 -0400 Subject: [PATCH] Ensure placeholder escaping works. --- docs/redirections.md | 211 ++++++++++++++++++++++------ internal/redirects/redirect.go | 96 ++++++++++--- internal/redirects/redirect_test.go | 28 ++++ 3 files changed, 274 insertions(+), 61 deletions(-) diff --git a/docs/redirections.md b/docs/redirections.md index 29b4f3a..f94da22 100644 --- a/docs/redirections.md +++ b/docs/redirections.md @@ -8,7 +8,11 @@ - [Path parameter match](#path-parameter-match) - [Querystring parameter match](#querystring-parameter-match) - [Maintaining querystring parameters](#maintaining-querystring-parameters) - - [Escaping colons in URLs](#escaping-colons-in-urls) + - [Regular expression match](#regular-expression-match) + - [Examples](#examples) + - [Validation of placeholders](#validation-of-placeholders) + - [Limitations](#limitations) + - [Escaping special characters](#escaping-special-characters) - [Inspecting redirections](#inspecting-redirections) > [!WARNING] @@ -18,75 +22,78 @@ * Redirect requests to other locations, either permanently (HTTP status code `301`) or temporarily (HTTP status code `302`). * Redirect using a rule system that allows: - * Splat matching, which allows you to match any path after a certain point. - * Exact matching, which allows you to match a specific path. - * Parameter matching, which allows you to match a path with a specific parameter. - * Querystring matching, which allows you to match a path with a specific querystring. - * Querystring conversion matching, which allows you to match a querystring and redirect to a path without querystrings. + * **Exact matching**, which allows you to match a specific path. + * **Splat matching**, which allows you to match any path after a certain point. + * **Parameter matching**, which allows you to match a path with specific parameters. + * **Querystring matching**, which allows you to match a path with specific query parameters. + * **Regular expression matching**, which allows you to define complex matching patterns. + * **Querystring conversion matching**, which allows you to match a querystring and redirect to a path without querystrings. -The rule system is inspired by the solutions currently available at [Cloudflare](https://developers.cloudflare.com/pages/configuration/redirects/) and [Netlify](https://docs.netlify.com/routing/redirects/) but they don't work necessarily the same way. +The rule system is inspired by solutions currently available at [Cloudflare](https://developers.cloudflare.com/pages/configuration/redirects/) and [Netlify](https://docs.netlify.com/routing/redirects/), but they don't necessarily work the same way. + +Redirections are evaluated on startup, and any errors in the redirections file will prevent `http-server` from starting. This ensures that you can't accidentally introduce a broken redirection rule. This also means that **you can't change redirections without restarting `http-server`**. ## Redirections and path prefix > [!WARNING] -> The redirection system will ignore the value set on `--pathprefix` and will always redirect from the root of the server. This is intentional, to ensure that you can still serve content under a subpath, but you can still control the redirections from the root of the server. +> The redirection system will ignore the value set on `--pathprefix` and will always redirect from the root of the server. This is intentional, to ensure that you can still serve content under a subpath, but control the redirections from the root of the server. -Since `http-server` supports masking a path prefix for the folder you're in (for example, if you need to serve a folder in `example.com/blog`, you can start `http-server` with `--pathprefix=/blog` and the contents of the folder will be displayed only when accessing `example.com/blog`), **the redirections system does not take into account the path prefix**. +Since `http-server` supports masking a path prefix for the folder you're in (for example, if you need to serve a folder at `example.com/blog`, you can start `http-server` with `--pathprefix=/blog`, and the contents of the folder will be displayed only when accessing `example.com/blog`), **the redirection system does not take into account the path prefix**. -In other words, redirections work from the serving root of the server, not from the path prefix. This is to ensure that while you can still serve content under a subpath, you can still control the redirections from the root of the server. +In other words, redirections work from the serving root of the server, not from the path prefix. This ensures that while you can serve content under a subpath, you can still control the redirections from the root of the server. -By default, `http-server` when invoked with `--pathprefix=/blog` will still listen on the root of the server, but it will produce a redirection from `/` to `/blog` to ensure that the user is redirected to the correct path. +By default, when invoked with `--pathprefix=/blog`, `http-server` will still listen on the root of the server but will produce a redirection from `/` to `/blog` to ensure that the user is redirected to the correct path. ## Syntax -The syntax is quite simple, it follows the pattern: +The syntax is quite simple and follows the pattern: -```xml +```bash [old] [new] [permanent|temporary] ``` Where: -* `[old]` is the path, relative to the root of `http-server` where the redirection should happen. -* `[new]` is the path where the request should be redirected to. This path can be relative to `http-server` or absolute to a different URL. +* `[old]` is the path, relative to the root of `http-server`, where the redirection should happen. +* `[new]` is the path where the request should be redirected to. This path can be relative to `http-server` or an absolute URL. * `[permanent|temporary]` is the type of redirection. Use `permanent` for a `301` status code or `temporary` for a `302` status code. -Any value in the URL not covered by a match expression will be removed from the URL when redirecting. The same applies for querystring parameters. +Any value in the URL not covered by a match expression will be removed from the URL when redirecting. The same applies to querystring parameters. ### Exact match The following example will redirect an exact match from `/old` to `/new`: ```bash -# redirects example.com/old to example.com/new +# Redirects example.com/old to example.com/new /old /new permanent ``` ### Splat match -The following example will redirect any path that starts with `/old` to the same path in `/new`, so if you requested `/old/foo.txt`, you will land on `/new/foo.txt`: +The following example will redirect any path that starts with `/old` to the same path in `/new`. For instance, if you request `/old/foo.txt`, you will be redirected to `/new/foo.txt`: ```bash -# redirects example.com/old/* to example.com/new/* +# Redirects example.com/old/* to example.com/new/* /old/:splat /new/:splat permanent ``` -If instead, the new location does not contain the exact same files as the old location, you can use the following syntax to redirect any prefix to a new location **without maintaining the same path**: +If the new location does not contain the same path structure as the old location, you can redirect any prefix to a new location **without maintaining the same path**: ```bash -# redirects example.com/old/* to example.com/new +# Redirects example.com/old/* to example.com/new /old/* /new permanent ``` > [!TIP] -> You can use `:splat` when you want `http-server` to copy the path after the match to the new location, or `*` when you want to redirect to a new location without maintaining the same path. +> Use `:splat` when you want `http-server` to copy the path after the match to the new location, or `*` when you want to redirect to a new location without maintaining the same path. ### Path parameter match -To match a path parameter and redirect to a new location, you can name the given parameter like in many frameworks. The following example will redirect `/posts/:id` to `/articles/:id`: +To match a path parameter and redirect to a new location, you can name the given parameter as in many web frameworks. The following example will redirect `/posts/:id` to `/articles/:id`: ```bash -# redirects example.com/posts/123 to example.com/articles/123 +# Redirects example.com/posts/123 to example.com/articles/123 /posts/:id /articles/:id permanent ``` @@ -95,23 +102,23 @@ To match a path parameter and redirect to a new location, you can name the given ### Querystring parameter match -There are two things you can do with querystring parameters: first, you can redirect a querystring parameter to a path parameter. The following example will redirect `/posts?id=123` to `/articles/123`: +You can redirect a querystring parameter to a path parameter. The following example will redirect `/posts?id=123` to `/articles/123`: ```bash -# redirects example.com/posts?id=123 to example.com/articles/123 +# Redirects example.com/posts?id=123 to example.com/articles/123 /posts?id=:id /articles/:id permanent ``` -And second, you can redirect a querystring parameter to a new location with the same querystring parameter: +Alternatively, you can redirect a querystring parameter to a new location while maintaining the same querystring parameter: ```bash -# redirects example.com/posts?id=123 to example.com/articles?id=123 +# Redirects example.com/posts?id=123 to example.com/articles?id=123 /posts?id=:id /articles?id=:id permanent ``` ### Maintaining querystring parameters -By default, the redirection system will remove any querystring parameter unless matched. You can prevent this behaviour with the special syntax `?!`, which will maintain non-conflictive querystring parameters. +By default, the redirection system will remove any querystring parameter unless matched. You can prevent this behavior with the special syntax `?!`, which will maintain non-conflicting querystring parameters. To do so, append `?!` to the old path: @@ -125,54 +132,176 @@ This will produce the following redirect: /posts/25?category=tech&utm_source=github --> /articles/25?category=tech&utm_source=github ``` -There's one caveat: if the querystring parameter conflicts with the path parameter, **the unmatched querystring parameter will be removed**. Consider the following example: +**Note:** If a querystring parameter conflicts with a path parameter, **the unmatched querystring parameter will be removed**. For example: ```bash /posts/:id?! /posts?id=:id temporary ``` -This will redirect the following URL: +This will redirect: ``` /posts/23?category=tech&id=9999 --> /posts?id=23&category=tech ``` -Note how the duplicated parameter, `id=9999` was removed in favour of the parameter defined in the path. When maintaining querystring parameters with `?!`, only those parameters that don't conflict with the rules provided in the redirection matching will be maintained. +Notice how the duplicated parameter `id=9999` was removed in favor of the parameter defined in the path. When maintaining querystring parameters with `?!`, only those parameters that don't conflict with the rules provided in the redirection matching will be maintained. + +### Regular expression match + +In addition to the standard redirection rules, `http-server` supports **regex-based redirection rules**. This allows you to define more complex matching patterns using regular expressions. + +The syntax for regex-based rules is: + +```bash +regexp "" "" [permanent|temporary] +``` + +Where: + +- `regexp` indicates that this is a regex-based rule. +- `` is a regular expression pattern, enclosed in double quotes (`"`). This pattern is applied to the entire request URI, which includes the path and the query string. +- `` is the replacement string, enclosed in double quotes (`"`). It can include references to captured groups from the pattern. +- `[permanent|temporary]` is the type of redirection. Use `permanent` for a `301` status code or `temporary` for a `302` status code. + +**Important Notes:** + +- **Enclose patterns and replacements in double quotes.** If you need to include a double quote within the pattern or replacement, escape it with a backslash (`\"`). +- **Capture groups** in the pattern can be referenced in the replacement using: + - `$1`, `$2`, etc., for **positional groups**. + - `$name` for **named capture groups** defined with `(?P...)`. +- **Regex-based rules are self-contained.** They do not mix with the placeholder-based logic (`:param` or `*`) or the `?!` syntax for maintaining query parameters. +- **Query parameters are handled entirely within the regex pattern and replacement.** If you need to match or include query parameters, include them in your regex pattern and replacement. + +#### Examples + +**Redirect with a positional capture group** + +The following example redirects any path starting with `/blog/` to `/articles/`, preserving the rest of the path: + +```bash +# Redirects example.com/blog/* to example.com/articles/* +regexp "^/blog/(.+)$" "/articles/$1" permanent +``` + +- Requesting `/blog/my-first-post` will redirect to `/articles/my-first-post`. + +**Redirect with a named capture group** + +You can use named capture groups in your regex pattern and reference them in the replacement: + +```bash +# Redirects example.com/user/:username to example.com/profile/:username +regexp "^/user/(?P[^/]+)$" "/profile/$username" temporary +``` + +- Requesting `/user/johndoe` will redirect to `/profile/johndoe`. + +**Including query parameters** + +If you need to match or include query parameters, include them in your regex pattern and replacement: + +```bash +# Redirects example.com/search?q=term to example.com/find?q=term +regexp "^/search\\?q=(.+)$" "/find?q=$1" temporary +``` + +- Requesting `/search?q=golang` will redirect to `/find?q=golang`. -### Escaping colons in URLs +**Handling complex patterns** -Since colons (`:`) play such an important part of the redirection engine, using them in a URL in a place that you might not want to match a parameter can be tricky. To avoid this, you can escape colons with a backslash (`\`), like so: +You can define more complex patterns using regular expressions: ```bash -/tech\:articles/:id /articles/tech/:id permanent +# Redirects example.com/order/:orderId/item/:itemId to example.com/orders/:orderId/items/:itemId +regexp "^/order/(?P\\d+)/item/(?P\\d+)$" "/orders/$orderId/items/$itemId" permanent ``` -This will redirect `/tech:articles/123` to `/articles/tech/123`. +- Requesting `/order/123/item/456` will redirect to `/orders/123/items/456`. + +#### Validation of placeholders + +When using placeholders in the replacement string (e.g., `$1`, `$username`), ensure that they correspond to actual capture groups defined in your regex pattern. If a placeholder does not match any capture group, the redirection rule will be invalid, and `http-server` will report an error during startup. + +**Invalid example with unmatched named capture group:** + +```bash +# Invalid rule: $username is not defined in the pattern +regexp "^/user/(?P[^/]+)$" "/profile/$username" temporary +``` + +- This will result in an error: `undefined placeholder "$username" in replacement on line X`. + +**Invalid example with unmatched positional capture group:** + +```bash +# Invalid rule: There is no second positional group ($2) +regexp "^/user/(.+)$" "/profile/$2" temporary +``` + +- This will result in an error: `undefined placeholder "$2" in replacement on line X`. + +#### Limitations + +- The `?!` syntax for maintaining query parameters **does not apply** to regex-based rules. +- Regex-based rules are evaluated in the order they appear in your redirections file, just like non-regex rules. +- Regex patterns are applied to the **entire request URI**, including the path and query string. + +### Escaping special characters + +**In non-regex rules:** + +Since colons (`:`) play an important part in the redirection engine for defining parameters, using them in a URL where you don't want to match a parameter can be tricky. To avoid this, you can escape colons with a backslash (`\`): + +```bash +# Redirects example.com/tech:articles/123 to example.com/articles/tech/123 +/tech\:articles/:id /articles/tech/:id permanent +``` + +**In regex-based rules:** + +In regex patterns, you need to escape special regex characters according to regular expression syntax. For example, to match a literal question mark (`?`), you need to escape it with a backslash (`\\?`): + +```bash +# Redirects example.com/search?q=term to example.com/find?q=term +regexp "^/search\\?q=(.+)$" "/find?q=$1" temporary +``` + +If you need to include a backslash or a double quote in your pattern or replacement, escape it with another backslash: + +- To include a double quote (`"`), use `\"`. +- To include a backslash (`\`), use `\\`. + +**Example with escaped double quotes:** + +```bash +# Redirects example.com/say/"hello world" to example.com/quote/hello world +regexp "^/say/\"(.+)\"$" "/quote/$1" temporary +``` ## Inspecting redirections `http-server` logs will report redirections. Consider the following redirections file: ```bash -/:splat https://www.patrickdap.com/:splat temporary +/:splat https://www.example.com/:splat temporary ``` -Making a `HTTP GET` request will produce the redirection: +Making an HTTP GET request will produce the redirection: ```bash $ curl -i http://localhost:1234/foo/bar/baz HTTP/1.1 302 Found Content-Type: text/html; charset=utf-8 Etag: "427d467004d2337f70dac7618d9549b478dee0f3" -Location: https://www.patrickdap.com/foo/bar/baz +Location: https://www.example.com/foo/bar/baz Date: Sat, 28 Sep 2024 02:33:32 GMT Content-Length: 61 -Found. +Found. ``` And the `http-server` logs will report the redirection: ```bash -2024/09/27 22:35:59 REDIR "/foo/bar/baz" -> "https://www.patrickdap.com/foo/bar/baz" (status: 302) +2024/09/27 22:35:59 REDIR "/foo/bar/baz" -> "https://www.example.com/foo/bar/baz" (status: 302) ``` diff --git a/internal/redirects/redirect.go b/internal/redirects/redirect.go index 5266b59..cbf0405 100644 --- a/internal/redirects/redirect.go +++ b/internal/redirects/redirect.go @@ -173,13 +173,8 @@ func parseRegexRuleLine(line string, lineNum int) (RedirectRule, error) { return rule, nil } -var rePlaceholder = regexp.MustCompile(`\$(\w+)`) - // validateRegexPlaceholders checks that all placeholders in the replacement string correspond to capture groups. func validateRegexPlaceholders(pattern *regexp.Regexp, replacement string, lineNum int) error { - // Extract placeholders from the replacement string - placeholders := rePlaceholder.FindAllStringSubmatch(replacement, -1) - // Build a set of valid group names and indices validGroups := make(map[string]struct{}) groupNames := pattern.SubexpNames() @@ -193,11 +188,35 @@ func validateRegexPlaceholders(pattern *regexp.Regexp, replacement string, lineN validGroups[fmt.Sprintf("%d", i)] = struct{}{} // Positional groups } - // Validate each placeholder - for _, match := range placeholders { - placeholder := match[1] // The group name or index without the leading '$' - if _, ok := validGroups[placeholder]; !ok { - return fmt.Errorf("undefined placeholder \"$%s\" in replacement on line %d", placeholder, lineNum) + // Process the replacement string to find placeholders + i := 0 + for i < len(replacement) { + c := replacement[i] + if c == '\\' && i+1 < len(replacement) { + // Skip escaped character + i += 2 + continue + } + if c == '$' { + // Possible placeholder + j := i + 1 + for j < len(replacement) && isWordChar(replacement[j]) { + j++ + } + if j > i+1 { + // Found a placeholder + key := replacement[i+1 : j] + if _, ok := validGroups[key]; !ok { + return fmt.Errorf("undefined placeholder \"$%s\" in replacement on line %d", key, lineNum) + } + i = j + continue + } else { + // No valid placeholder after '$', skip it + i++ + } + } else { + i++ } } @@ -337,17 +356,54 @@ func (rule *RedirectRule) MatchRegex(requestURI string) (string, bool) { } // replacePlaceholders replaces placeholders in the format $name or $1 with actual values from the groups map. +// It handles escaped dollar signs (\$) and escaped backslashes (\\) in the template string. func replacePlaceholders(template string, groups map[string]string) string { - // Regex to find placeholders like $1, $name - result := rePlaceholder.ReplaceAllStringFunc(template, func(m string) string { - key := m[1:] // Remove the leading $ - if val, ok := groups[key]; ok { - return val - } - // If the key is not found, leave the placeholder as is - return m - }) - return result + // Process the template string to handle escapes + var result strings.Builder + i := 0 + for i < len(template) { + c := template[i] + if c == '\\' && i+1 < len(template) { + nextChar := template[i+1] + if nextChar == '$' || nextChar == '\\' { + // Escaped dollar sign or backslash + result.WriteByte(nextChar) + i += 2 + continue + } + } + if c == '$' { + // Possible placeholder + j := i + 1 + for j < len(template) && (isWordChar(template[j])) { + j++ + } + if j > i+1 { + // Found a placeholder + key := template[i+1 : j] + if val, ok := groups[key]; ok { + result.WriteString(val) + i = j + continue + } + } + // Not a valid placeholder, keep as is + result.WriteByte(c) + i++ + } else { + result.WriteByte(c) + i++ + } + } + return result.String() +} + +// isWordChar checks if a byte is a valid word character (letter, digit, or underscore) +func isWordChar(c byte) bool { + return (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '9') || + c == '_' } // Match checks if the request path and query parameters match the rule. diff --git a/internal/redirects/redirect_test.go b/internal/redirects/redirect_test.go index 8b0b74b..36c71d6 100644 --- a/internal/redirects/redirect_test.go +++ b/internal/redirects/redirect_test.go @@ -438,6 +438,34 @@ func TestRedirectionEngine(t *testing.T) { rules: `regexp "^/user/(.+)$" "/profile/$2" temporary`, expectError: true, }, + { + name: "regex rule - escaped dollar sign in replacement", + rules: `regexp "^/product/(\d+)$" "/store/item/$1?price=\$20" temporary`, + visitedPath: "/product/123", + expectStatusCode: http.StatusFound, + expectLocation: "/store/item/123?price=$20", + }, + { + name: "regex rule - multiple escaped dollar signs", + rules: `regexp "^/donate$" "/contribute?amount=\$\$\$" permanent`, + visitedPath: "/donate", + expectStatusCode: http.StatusMovedPermanently, + expectLocation: "/contribute?amount=$$$", + }, + { + name: "regex rule - escaped dollar sign and placeholder", + rules: `regexp "^/user/(?P[^/]+)$" "/profile/\$$username" temporary`, + visitedPath: "/user/johndoe", + expectStatusCode: http.StatusFound, + expectLocation: "/profile/$johndoe", + }, + { + name: "regex rule - escaped backslash before dollar sign", + rules: `regexp "^/path$" "/new\\path\$" permanent`, + visitedPath: "/path", + expectStatusCode: http.StatusMovedPermanently, + expectLocation: `/new\path$`, + }, } for _, tt := range tests {