Skip to content

Implement smart punctuation #470

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 11 commits into from
Sep 1, 2020
Merged
24 changes: 24 additions & 0 deletions benches/html_rendering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ use std::str::from_utf8;
static CRDT_BYTES: &[u8] = include_bytes!("../third_party/xi-editor/crdt.md");

fn criterion_benchmark(c: &mut Criterion) {
let mut full_opts = Options::empty();
full_opts.insert(Options::ENABLE_TABLES);
full_opts.insert(Options::ENABLE_FOOTNOTES);
full_opts.insert(Options::ENABLE_STRIKETHROUGH);
full_opts.insert(Options::ENABLE_TASKLISTS);
full_opts.insert(Options::ENABLE_SMART_PUNCTUATION);

c.bench_function("crdt_total", |b| {
let input = from_utf8(CRDT_BYTES).unwrap();
let mut buf = String::with_capacity(input.len() * 3 / 2);
Expand All @@ -30,12 +37,29 @@ fn criterion_benchmark(c: &mut Criterion) {
})
});

c.bench_function("crdt_all_options_parse", |b| {
let input = from_utf8(CRDT_BYTES).unwrap();

b.iter(|| Parser::new_ext(input, full_opts).count())
});

c.bench_function("crdt_parse", |b| {
let input = from_utf8(CRDT_BYTES).unwrap();

b.iter(|| Parser::new_ext(input, Options::empty()).count())
});

c.bench_function("smart_punctuation", |b| {
let input = r#"""'This here a real "quote"'

And -- if you're interested -- some em-dashes. Wait --- she actually said that?

Wow... Becky is so 'mean'!
"""#;

b.iter(|| Parser::new_ext(input, full_opts).count());
});

c.bench_function("links_n_emphasis", |b| {
let input = r#"""This is a [link](example.com). **Cool!**

Expand Down
51 changes: 25 additions & 26 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,12 @@ fn generate_tests_from_spec() {
// and make it easy to eventually add other hardcoded paths in the future if needed
let hardcoded = [
"./third_party/CommonMark/spec.txt",
"./third_party/CommonMark/smart_punct.txt",
"./third_party/GitHub/gfm_table.txt",
"./third_party/GitHub/gfm_strikethrough.txt",
"./third_party/GitHub/gfm_tasklist.txt",
];
let hardcoded_iter = hardcoded.into_iter().map(PathBuf::from);
let hardcoded_iter = hardcoded.iter().map(PathBuf::from);

// Create an iterator over the files in the specs/ directory that have a .txt extension
let spec_files = fs::read_dir("./specs")
Expand Down Expand Up @@ -82,13 +83,14 @@ fn {}_test_{i}() {{
let original = r##"{original}"##;
let expected = r##"{expected}"##;

test_markdown_html(original, expected);
test_markdown_html(original, expected, {smart_punct});
}}
"###,
spec_name,
i = i + 1,
original = testcase.original,
expected = testcase.expected
expected = testcase.expected,
smart_punct = testcase.smart_punct,
))
.unwrap();

Expand Down Expand Up @@ -133,14 +135,15 @@ pub struct Spec<'a> {
#[cfg(feature = "gen-tests")]
impl<'a> Spec<'a> {
pub fn new(spec: &'a str) -> Self {
Spec { spec: spec }
Spec { spec }
}
}

#[cfg(feature = "gen-tests")]
pub struct TestCase {
pub original: String,
pub expected: String,
pub smart_punct: bool,
}

#[cfg(feature = "gen-tests")]
Expand All @@ -149,37 +152,33 @@ impl<'a> Iterator for Spec<'a> {

fn next(&mut self) -> Option<TestCase> {
let spec = self.spec;

let i_start = match self
.spec
.find("```````````````````````````````` example\n")
.map(|pos| pos + 41)
{
Some(pos) => pos,
None => return None,
};

let i_end = match self.spec[i_start..]
let prefix = "```````````````````````````````` example";

let (i_start, smart_punct) = self.spec.find(prefix).and_then(|pos| {
let suffix = "_smartpunct\n";
if spec[(pos + prefix.len())..].starts_with(suffix) {
Some((pos + prefix.len() + suffix.len(), true))
} else if spec[(pos + prefix.len())..].starts_with('\n') {
Some((pos + prefix.len() + 1, false))
} else {
None
}
})?;

let i_end = self.spec[i_start..]
.find("\n.\n")
.map(|pos| (pos + 1) + i_start)
{
Some(pos) => pos,
None => return None,
};
.map(|pos| (pos + 1) + i_start)?;

let e_end = match self.spec[i_end + 2..]
let e_end = self.spec[i_end + 2..]
.find("````````````````````````````````\n")
.map(|pos| pos + i_end + 2)
{
Some(pos) => pos,
None => return None,
};
.map(|pos| pos + i_end + 2)?;

self.spec = &self.spec[e_end + 33..];

let test_case = TestCase {
original: spec[i_start..i_end].to_string().replace("→", "\t"),
expected: spec[i_end + 2..e_end].to_string().replace("→", "\t"),
smart_punct,
};

Some(test_case)
Expand Down
Loading