Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add :character-info command #4000

Merged
merged 3 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions book/src/generated/typable-cmd.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
| `:change-current-directory`, `:cd` | Change the current working directory. |
| `:show-directory`, `:pwd` | Show the current working directory. |
| `:encoding` | Set encoding. Based on `https://encoding.spec.whatwg.org`. |
| `:character-info`, `:char` | Get info about the character under the primary cursor. |
| `:reload` | Discard changes and reload from the source file. |
| `:reload-all` | Discard changes and reload all documents from the source files. |
| `:update` | Write changes only if the file has been modified. |
Expand Down
133 changes: 133 additions & 0 deletions helix-term/src/commands/typed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use crate::job::Job;

use super::*;

use helix_core::encoding;
use helix_view::editor::{Action, CloseError, ConfigEvent};
use ui::completers::{self, Completer};

Expand Down Expand Up @@ -1033,6 +1034,131 @@ fn set_encoding(
}
}

/// Shows info about the character under the primary cursor.
fn get_character_info(
cx: &mut compositor::Context,
_args: &[Cow<str>],
event: PromptEvent,
) -> anyhow::Result<()> {
if event != PromptEvent::Validate {
return Ok(());
}

let (view, doc) = current_ref!(cx.editor);
let text = doc.text().slice(..);

let grapheme_start = doc.selection(view.id).primary().cursor(text);
let grapheme_end = graphemes::next_grapheme_boundary(text, grapheme_start);

if grapheme_start == grapheme_end {
return Ok(());
}

let grapheme = text.slice(grapheme_start..grapheme_end).to_string();
let encoding = doc.encoding();

let printable = grapheme.chars().fold(String::new(), |mut s, c| {
match c {
'\0' => s.push_str("\\0"),
'\t' => s.push_str("\\t"),
'\n' => s.push_str("\\n"),
'\r' => s.push_str("\\r"),
_ => s.push(c),
}

s
});

// Convert to Unicode codepoints if in UTF-8
let unicode = if encoding == encoding::UTF_8 {
let mut unicode = " (".to_owned();

for (i, char) in grapheme.chars().enumerate() {
if i != 0 {
unicode.push(' ');
}

unicode.push_str("U+");

let codepoint: u32 = if char.is_ascii() {
char.into()
} else {
// Not ascii means it will be multi-byte, so strip out the extra
// bits that encode the length & mark continuation bytes

let s = String::from(char);
let bytes = s.as_bytes();

// First byte starts with 2-4 ones then a zero, so strip those off
let first = bytes[0];
let codepoint = first & (0xFF >> (first.leading_ones() + 1));
let mut codepoint = u32::from(codepoint);

// Following bytes start with 10
for byte in bytes.iter().skip(1) {
codepoint <<= 6;
codepoint += u32::from(*byte) & 0x3F;
}

codepoint
};

unicode.push_str(&format!("{codepoint:0>4x}"));
}

unicode.push(')');
unicode
} else {
String::new()
};

// Give the decimal value for ascii characters
let dec = if encoding.is_ascii_compatible() && grapheme.len() == 1 {
format!(" Dec {}", grapheme.as_bytes()[0])
} else {
String::new()
};

let hex = {
let mut encoder = encoding.new_encoder();
let max_encoded_len = encoder
.max_buffer_length_from_utf8_without_replacement(grapheme.len())
.unwrap();
let mut bytes = Vec::with_capacity(max_encoded_len);
let mut current_byte = 0;
let mut hex = String::new();

for (i, char) in grapheme.chars().enumerate() {
if i != 0 {
hex.push_str(" +");
}

let (result, _input_bytes_read) = encoder.encode_from_utf8_to_vec_without_replacement(
&char.to_string(),
&mut bytes,
true,
);

if let encoding::EncoderResult::Unmappable(char) = result {
bail!("{char:?} cannot be mapped to {}", encoding.name());
}

for byte in &bytes[current_byte..] {
hex.push_str(&format!(" {byte:0>2x}"));
}

current_byte = bytes.len();
}

hex
};

cx.editor
.set_status(format!("\"{printable}\"{unicode}{dec} Hex{hex}"));

Ok(())
}

/// Reload the [`Document`] from its source file.
fn reload(
cx: &mut compositor::Context,
Expand Down Expand Up @@ -2131,6 +2257,13 @@ pub const TYPABLE_COMMAND_LIST: &[TypableCommand] = &[
fun: set_encoding,
completer: None,
},
TypableCommand {
name: "character-info",
aliases: &["char"],
doc: "Get info about the character under the primary cursor.",
fun: get_character_info,
completer: None,
},
TypableCommand {
name: "reload",
aliases: &[],
Expand Down
58 changes: 58 additions & 0 deletions helix-term/tests/test/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,3 +354,61 @@ async fn test_extend_line() -> anyhow::Result<()> {

Ok(())
}

#[tokio::test(flavor = "multi_thread")]
async fn test_character_info() -> anyhow::Result<()> {
// UTF-8, single byte
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some("ih<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(
r#""h" (U+0068) Dec 104 Hex 68"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;

// UTF-8, multi-byte
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some("ië<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(
r#""ë" (U+0065 U+0308) Hex 65 + cc 88"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;

// Multiple characters displayed as one, escaped characters
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some(":line<minus>ending crlf<ret>:char<ret>"),
Some(&|app| {
assert_eq!(
r#""\r\n" (U+000d U+000a) Hex 0d + 0a"#,
app.editor.get_status().unwrap().0
);
}),
false,
)
.await?;

// Non-UTF-8
test_key_sequence(
&mut helpers::AppBuilder::new().build()?,
Some(":encoding ascii<ret>ih<esc>h:char<ret>"),
Some(&|app| {
assert_eq!(r#""h" Dec 104 Hex 68"#, app.editor.get_status().unwrap().0);
}),
false,
)
.await?;

Ok(())
}