Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decode the HTML before loading static assets #60

Merged
merged 1 commit into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ license = "Apache-2.0"

[dependencies]
goose = { version = "0.17", default-features = false }
html-escape = "0.2"
http = "0.2"
log = "0.4"
rand = "0.8"
Expand Down
4 changes: 2 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,7 @@ pub async fn get_src_elements(user: &mut GooseUser, html: &str) -> Vec<String> {
// @TODO: parse HTML5 srcset= also
let src_elements = Regex::new(r#"(?i)src="(.*?)""#).unwrap();
let mut elements: Vec<String> = Vec::new();
for url in src_elements.captures_iter(html) {
for url in src_elements.captures_iter(html_escape::decode_html_entities(html).as_ref()) {
if valid_local_uri(user, &url[1]) {
elements.push(url[1].to_string());
}
Expand All @@ -1010,7 +1010,7 @@ pub async fn get_css_elements(user: &mut GooseUser, html: &str) -> Vec<String> {
// <foo> is the URL to local css assets.
let css = Regex::new(r#"(?i)href="(.*?\.css.*?)""#).unwrap();
let mut elements: Vec<String> = Vec::new();
for url in css.captures_iter(html) {
for url in css.captures_iter(html_escape::decode_html_entities(html).as_ref()) {
if valid_local_uri(user, &url[1]) {
elements.push(url[1].to_string());
}
Expand Down
53 changes: 53 additions & 0 deletions tests/parse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
use gumdrop::Options;
use httpmock::{Method::GET, MockServer};

use goose::config::GooseConfiguration;
use goose::goose::get_base_url;
use goose::metrics::GooseCoordinatedOmissionMitigation::Disabled;
use goose::prelude::*;
use goose_eggs::load_static_elements;

#[tokio::test]
// Loads static elements and checks that characters are decoded properly.
async fn test_html_decoding() {
let html: &str = r#"
<!DOCTYPE html>
<head>
<!-- Check that encoded paths are decoded properly -->
<script type="text/javascript" src="/test1.js?foo=1&amp;bar=2"></script>
<!-- Check that decoded paths still work -->
<script type="text/javascript" src="/test2.js?foo=1&bar=2"></script>
<title>Title 1234ABCD</title>
</head>
<body>
<p>Test text on the page.</p>
</body>
"#;

let server = MockServer::start();

let mock_endpoint1 = server.mock(|when, then| {
when.method(GET)
.path("/test1.js")
.query_param("foo", "1")
.query_param("bar", "2");
then.status(200).body("test");
});
let mock_endpoint2 = server.mock(|when, then| {
when.method(GET)
.path("/test2.js")
.query_param("foo", "1")
.query_param("bar", "2");
then.status(200).body("test");
});

let config: Vec<&str> = vec![];
let mut configuration = GooseConfiguration::parse_args_default(&config).unwrap();
configuration.co_mitigation = Some(Disabled);
let base_url = get_base_url(Some(server.base_url()), None, None).unwrap();
let mut user = GooseUser::new(0, "".to_string(), base_url, &configuration, 0, None).unwrap();

load_static_elements(&mut user, html).await;
assert_eq!(mock_endpoint1.hits(), 1);
assert_eq!(mock_endpoint2.hits(), 1);
}
Loading