Skip to content

Commit

Permalink
feat: raise error for self closing script tags.
Browse files Browse the repository at this point in the history
  • Loading branch information
airblast-dev authored and ctron committed Apr 16, 2024
1 parent 1892944 commit f7ff300
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 45 deletions.
63 changes: 54 additions & 9 deletions src/common/html_rewrite.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use anyhow::Result;
use anyhow::{Error, Result};
use lol_html::{element, html_content::Element, HtmlRewriter, Settings};

/// A wrapper for Html modifications, and rewrites.
Expand All @@ -15,8 +15,38 @@ impl Document {
/// Create a new document
///
/// Note: if this is not a valid HTML document, it will fail later on.
pub fn new(data: impl Into<Vec<u8>>) -> Self {
Self(data.into())
// In case we want to add more things to check in the future, we can take in the config as an
// argument, and `anyhow::Result<()>` could be
// replaced with an `std::result::Result<(), MyErrorEnum>` in case we want to handle the error
// case differently depending on the error variant.
pub fn new(data: impl Into<Vec<u8>>, ignore_script_error: bool) -> Result<Self> {
let doc = Self(data.into());

// Check for self closed script tags such as "<script.../>"
doc.select("script[data-trunk]", |el| {
if el.is_self_closing() {
const SELF_CLOSED_SCRIPT: &str = concat!(
"Self closing script tag found. ",
r#"Replace the self closing script tag ("<script .../>") with a normally closed one such as "<script ...></script>"."#,
"\nFor more information please take a look at https://github.com/trunk-rs/trunk/discussions/771."
);

if ignore_script_error {
tracing::warn!("{}", SELF_CLOSED_SCRIPT);
}
else {
return Err(Error::msg(
format!("{}\n{}",
SELF_CLOSED_SCRIPT,
r#"In case this is a false positive the "--ignore-script-error" flag can be used to issue a warning instead."#
)
))
}
}
Ok(())
})?;

Ok(doc)
}

pub fn into_inner(self) -> Vec<u8> {
Expand All @@ -41,8 +71,8 @@ impl Document {
let mut buf = Vec::new();
HtmlRewriter::new(
Settings {
element_content_handlers: vec![element!(selector, |x| {
call(x)?;
element_content_handlers: vec![element!(selector, |el| {
call(el)?;
Ok(())
})],
..Self::default_settings()
Expand All @@ -59,11 +89,15 @@ impl Document {
/// Run a non-mutating handler for the provided selector
///
/// To perform modifications on the `Document` use `Document::select_mut`.
pub fn select(&self, selector: &str, mut call: impl FnMut(&Element<'_, '_>)) -> Result<()> {
pub fn select(
&self,
selector: &str,
mut call: impl FnMut(&Element<'_, '_>) -> Result<()>,
) -> Result<()> {
HtmlRewriter::new(
Settings {
element_content_handlers: vec![element!(selector, |el| {
call(el);
call(el)?;
Ok(())
})],
..Self::default_settings()
Expand Down Expand Up @@ -100,7 +134,10 @@ impl Document {

pub fn len(&mut self, selector: &str) -> Result<usize> {
let mut len = 0;
self.select(selector, |_| len += 1)?;
self.select(selector, |_| {
len += 1;
Ok(())
})?;

Ok(len)
}
Expand All @@ -122,7 +159,9 @@ mod test {
<body></body>
</html>
"#,
);
false,
)
.expect("this is valid HTML");

doc.append_html("script", r#"<span>here</span>"#)
.expect("not expected to fail");
Expand All @@ -142,4 +181,10 @@ mod test {
"#
);
}

#[test]
fn test_self_closing_script_tag() {
let doc = Document::new("<script data-trunk/>", false);
assert!(doc.is_err());
}
}
8 changes: 8 additions & 0 deletions src/config/models/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,4 +131,12 @@ pub struct ConfigOptsBuild {
#[serde(default)]
#[arg(long)]
pub no_sri: bool,

/// Ignore error's related to self closing script elements, and instead issue a warning.
///
/// Since this error can cause the HTML output to be truncated, only enable this in case you
/// are sure it is caused due to a false positive.
#[serde(default)]
#[arg(long)]
pub ignore_script_error: bool,
}
4 changes: 4 additions & 0 deletions src/config/rt/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ pub struct RtcBuild {
pub no_minification: bool,
/// Allow disabling SRI
pub no_sri: bool,
/// Ignore error's due to self closed script tags, instead will issue a warning.
pub ignore_script_error: bool,
}

impl RtcBuild {
Expand Down Expand Up @@ -174,6 +176,7 @@ impl RtcBuild {
accept_invalid_certs: opts.accept_invalid_certs,
no_minification: opts.no_minification,
no_sri: opts.no_sri,
ignore_script_error: opts.ignore_script_error,
})
}

Expand Down Expand Up @@ -216,6 +219,7 @@ impl RtcBuild {
accept_invalid_certs: None,
no_minification: false,
no_sri: false,
ignore_script_error: false,
})
}
}
70 changes: 34 additions & 36 deletions src/pipelines/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ impl HtmlPipeline {

// Open the source HTML file for processing.
let raw_html = fs::read(&self.target_html_path).await?;
let mut target_html = Document::new(raw_html);
let mut target_html = Document::new(raw_html, self.cfg.ignore_script_error)?;
let mut partial_assets = vec![];

// Since the `lol_html` doesn't provide an iterator for elements, we must use our own id.
Expand All @@ -95,41 +95,39 @@ impl HtmlPipeline {
//
// This is the first parsing of the HTML meaning it is pretty likely to receive
// invalid HTML at this stage.
target_html
.select_mut(r#"link[data-trunk], script[data-trunk]"#, |el| {
'l: {
el.set_attribute(TRUNK_ID, &id.to_string())?;

// Both are function pointers, no need to branch out.
let asset_constructor = match el.tag_name().as_str() {
"link" => TrunkAssetReference::Link,
"script" => TrunkAssetReference::Script,
// Just an early break since we won't do anything else.
_ => break 'l,
};

// Accumulate all attrs. The main reason we collect this as
// raw data instead of passing around the link itself, is the lifetime
// requirements of elements used in `lol_html::html_content::HtmlRewriter`.
let attrs = el.attributes().iter().fold(Attrs::new(), |mut acc, attr| {
acc.insert(attr.name(), attr.value());
acc
});

let asset = TrunkAsset::from_html(
self.cfg.clone(),
self.target_html_dir.clone(),
self.ignore_chan.clone(),
asset_constructor(attrs),
id,
);

partial_assets.push(asset);
}
id += 1;
Ok(())
})
.context("error parsing HTML, check HTML validity")?;
target_html.select_mut(r#"link[data-trunk], script[data-trunk]"#, |el| {
'l: {
el.set_attribute(TRUNK_ID, &id.to_string())?;

// Both are function pointers, no need to branch out.
let asset_constructor = match el.tag_name().as_str() {
"link" => TrunkAssetReference::Link,
"script" => TrunkAssetReference::Script,
// Just an early break since we won't do anything else.
_ => break 'l,
};

// Accumulate all attrs. The main reason we collect this as
// raw data instead of passing around the link itself, is the lifetime
// requirements of elements used in `lol_html::html_content::HtmlRewriter`.
let attrs = el.attributes().iter().fold(Attrs::new(), |mut acc, attr| {
acc.insert(attr.name(), attr.value());
acc
});

let asset = TrunkAsset::from_html(
self.cfg.clone(),
self.target_html_dir.clone(),
self.ignore_chan.clone(),
asset_constructor(attrs),
id,
);

partial_assets.push(asset);
}
id += 1;
Ok(())
})?;

let mut assets: Vec<TrunkAsset> = futures_util::future::join_all(partial_assets)
.await
Expand Down

0 comments on commit f7ff300

Please # to comment.