Skip to content

Commit

Permalink
Revert "feat: Character-level diffs (#273)" (#296)
Browse files Browse the repository at this point in the history
This reverts commit 879d21c.
  • Loading branch information
afnanenayet authored Feb 26, 2022
1 parent 879d21c commit e03cce1
Show file tree
Hide file tree
Showing 19 changed files with 171 additions and 3,426 deletions.
190 changes: 113 additions & 77 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ logging_timer = "1.1.0"
build-info = { version = "0.0.25", optional = true }
jemallocator = { version = "0.3.2", optional = true }
libloading = "0.7.3"
unicode-segmentation = "1.9.0"

[dev-dependencies]
test-case = "2.0.0"
Expand Down
121 changes: 17 additions & 104 deletions src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ use crate::diff::Myers;
use logging_timer::time;
use std::{cell::RefCell, ops::Index, path::PathBuf};
use tree_sitter::Node as TSNode;
use tree_sitter::Point;
use tree_sitter::Tree as TSTree;
use unicode_segmentation as us;

/// A mapping between a tree-sitter node and the text it corresponds to
#[derive(Debug, Clone, Copy)]
Expand All @@ -24,86 +22,6 @@ pub struct Entry<'a> {
/// This is different from the `source_text` that the [AstVector](AstVector) refers to, as the
/// entry only holds a reference to the specific range of text that the node covers.
pub text: &'a str,

/// The entry's start position in the document.
pub start_position: Point,

/// The entry's end position in the document.
pub end_position: Point,
}

impl<'a> Entry<'a> {
/// Split an entry into a vector of entries per grapheme.
///
/// Each grapheme will get its own [Entry] struct. This method will resolve the
/// indices/positioning of each grapheme from the `self.text` field.
fn split_graphemes(self) -> Vec<Self> {
let mut entries = Vec::new();
let indices: Vec<(usize, &str)> =
us::UnicodeSegmentation::grapheme_indices(self.text, true).collect();
entries.reserve(indices.len());
let mut current_line = self.reference.start_position().row;

for (idx, grapheme) in indices {
// Every grapheme has to be at least one byte
debug_assert!(!grapheme.is_empty());

let original_start_col = self.reference.start_position().column;
let new_start_pos = Point {
row: current_line,
column: original_start_col + idx,
};
let new_end_pos = Point {
row: current_line,
column: new_start_pos.column + grapheme.len(),
};

debug_assert!(new_start_pos.row <= new_end_pos.row);

// If the end position is on the next row, then the column index can be less than or
// equal to the the start column. If they are on the same line, then the ending column
// *must be* greater than the starting column.
debug_assert!(
new_start_pos.column < new_end_pos.column || new_start_pos.row < new_end_pos.row
);

let entry = Entry {
reference: self.reference,
text: &self.text[idx..idx + grapheme.len()],
start_position: new_start_pos,
end_position: new_end_pos,
};
entries.push(entry);

// If the last entry was a new line, iterate up for the next entry
if grapheme == "\n" || grapheme == "\r\n" {
current_line += 1;
}
}
entries
}

/// Get the start position of an entry
pub fn start_position(&self) -> Point {
self.start_position
}

/// Get the end position of an entry
pub fn end_position(&self) -> Point {
self.end_position
}
}

impl<'a> From<&'a AstVector<'a>> for Vec<Entry<'a>> {
fn from(ast_vector: &'a AstVector<'a>) -> Self {
let mut entries = Vec::new();
entries.reserve(ast_vector.leaves.len());

for entry in &ast_vector.leaves {
entries.extend(entry.split_graphemes().iter());
}
entries
}
}

/// A vector that allows for linear traversal through the leafs of an AST.
Expand Down Expand Up @@ -170,7 +88,7 @@ impl<'a> Index<usize> for AstVector<'a> {

impl<'a> PartialEq for Entry<'a> {
fn eq(&self, other: &Entry) -> bool {
self.reference.kind_id() == other.reference.kind_id() && self.text == other.text
self.text == other.text
}
}

Expand All @@ -180,15 +98,16 @@ impl<'a> PartialEq for AstVector<'a> {
return false;
}

for i in 0..self.leaves.len() {
let leaf = self.leaves[i];
let other_leaf = other.leaves[i];

if leaf != other_leaf {
return false;
}
}
true
// Zip through each entry to determine whether the elements are equal. We start with a
// `false` value for not equal and accumulate any inequalities along the way.
let not_equal = self
.leaves
.iter()
.zip(other.leaves.iter())
.fold(false, |not_equal, (entry_a, entry_b)| {
not_equal | (entry_a != entry_b)
});
!not_equal
}
}

Expand All @@ -208,8 +127,6 @@ fn build<'a>(vector: &RefCell<Vec<Entry<'a>>>, node: tree_sitter::Node<'a>, text
vector.borrow_mut().push(Entry {
reference: node,
text: node_text,
start_position: node.start_position(),
end_position: node.end_position(),
});
}
return;
Expand Down Expand Up @@ -247,19 +164,15 @@ pub enum EditType<T> {
#[time("info", "ast::{}")]
pub fn compute_edit_script<'a>(a: &'a AstVector, b: &'a AstVector) -> (Hunks<'a>, Hunks<'a>) {
let myers = Myers::default();
let a_graphemes: Vec<Entry> = a.into();
let b_graphemes: Vec<Entry> = b.into();
let edit_script = myers.diff(&a_graphemes[..], &b_graphemes[..]);
let edit_script_len = edit_script.len();

let mut old_edits = Vec::with_capacity(edit_script_len);
let mut new_edits = Vec::with_capacity(edit_script_len);
let edit_script = myers.diff(&a.leaves[..], &b.leaves[..]);
let mut old_edits = Vec::with_capacity(edit_script.len());
let mut new_edits = Vec::with_capacity(edit_script.len());

for edit in edit_script {
match edit {
EditType::Deletion(&e) => old_edits.push(e),
EditType::Addition(&e) => new_edits.push(e),
};
EditType::Deletion(&edit) => old_edits.push(edit),
EditType::Addition(&edit) => new_edits.push(edit),
}
}

// Convert the vectors of edits into hunks that can be displayed
Expand Down
64 changes: 9 additions & 55 deletions src/diff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,10 @@ pub enum HunkInsertionError {
last_line: usize,
},

#[error("Attempted to append an entry with a column ({incoming_col:?}, line: {incoming_line:?}) less than the first entry's column ({last_col:?}, line: {last_line:?})")]
#[error("Attempted to prepend an entry with a column ({incoming_col:?}) greater than the first entry's column ({last_col:?})")]
PriorColumn {
incoming_col: usize,
incoming_line: usize,
last_col: usize,
last_line: usize,
},
}

Expand Down Expand Up @@ -146,7 +144,7 @@ impl<'a> Hunk<'a> {
///
/// Entries can only be prepended in descending order (from last to first)
pub fn push_front(&mut self, entry: Entry<'a>) -> Result<(), HunkInsertionError> {
let incoming_line_idx = entry.start_position().row;
let incoming_line_idx = entry.reference.start_position().row;

// Add a new line vector if the entry has a greater line index, or if the vector is empty.
// We ensure that the last line has the same line index as the incoming entry.
Expand Down Expand Up @@ -188,10 +186,10 @@ impl<'a> Hunk<'a> {
//if let Some(&first_entry) = first_line.entries.front() {
// TODO(afnan) ^ this instead?
// TODO(afnan) should this be start_position() instead of end?
let first_col = first_entry.end_position().column;
//let first_col = first_entry.start_position().column;
let first_col = first_entry.reference.end_position().column;
//let first_col = first_entry.reference.start_position().column;
// TODO(afnan) ^ this instead?
let incoming_col = entry.end_position().column;
let incoming_col = entry.reference.end_position().column;

if incoming_col > first_col {
return Err(HunkInsertionError::LaterColumn {
Expand All @@ -210,7 +208,7 @@ impl<'a> Hunk<'a> {
/// entries out of order. For example, you can't insert an entry on line 1 after inserting an
/// entry on line 5.
pub fn push_back(&mut self, entry: Entry<'a>) -> Result<(), HunkInsertionError> {
let incoming_line_idx = entry.start_position().row;
let incoming_line_idx = entry.reference.start_position().row;

// Create a new line if the incoming entry is on the next line. This will throw an error
// if we have an entry on a non-adjacent line or an out-of-order insertion.
Expand All @@ -237,23 +235,19 @@ impl<'a> Hunk<'a> {
}
// The lines are empty, we need to add the first one
else {
self.0.push_back(Line::new(incoming_line_idx));
self.0.push_back(Line::new(incoming_line_idx))
}

let last_line = self.0.back_mut().unwrap();

if let Some(&last_entry) = last_line.entries.back() {
let last_col = last_entry.end_position().column;
let last_line = last_entry.end_position().row;
let incoming_col = entry.start_position().column;
let incoming_line = entry.end_position().row;
let last_col = last_entry.reference.end_position().column;
let incoming_col = entry.reference.start_position().column;

if incoming_col < last_col {
return Err(HunkInsertionError::PriorColumn {
incoming_col,
last_col,
incoming_line,
last_line,
});
}
}
Expand Down Expand Up @@ -325,46 +319,6 @@ impl<'a> Hunks<'a> {
}
}

pub struct HunkAppender<'a>(pub Hunks<'a>);

impl<'a> FromIterator<Entry<'a>> for HunkAppender<'a> {
/// Create an instance of `Hunks` from an iterator over [entries](Entry).
///
/// The user is responsible for making sure that the hunks are in proper order, otherwise this
/// constructor may panic.
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = Entry<'a>>,
{
let mut hunks = Hunks::new();

for i in iter {
hunks.push_back(i).expect("Invalid iterator");
}
HunkAppender(hunks)
}
}

pub struct HunkPrepender<'a>(pub Hunks<'a>);

impl<'a> FromIterator<Entry<'a>> for HunkPrepender<'a> {
/// Create an instance of `Hunks` from an iterator over [entries](Entry).
///
/// The user is responsible for making sure that the hunks are in proper order, otherwise this
/// constructor may panic.
fn from_iter<T>(iter: T) -> Self
where
T: IntoIterator<Item = Entry<'a>>,
{
let mut hunks = Hunks::new();

for i in iter {
hunks.push_front(i).expect("Invalid iterator");
}
HunkPrepender(hunks)
}
}

impl<'a> FromIterator<Entry<'a>> for Hunks<'a> {
/// Create an instance of `Hunks` from an iterator over [entries](Entry).
///
Expand Down
11 changes: 6 additions & 5 deletions src/formatting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,14 +280,15 @@ impl DiffWriter {
old_fmt: &FormattingDirectives,
new_fmt: &FormattingDirectives,
) -> std::io::Result<()> {
let divider = " -> ";

// The different ways we can stack the title
#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, strum_macros::Display)]
#[strum(serialize_all = "snake_case")]
enum TitleStack {
Vertical,
Horizontal,
}
let divider = " -> ";

// We construct the fully horizontal title string. If wider than the terminal, then we
// format another title string that's vertically stacked
Expand Down Expand Up @@ -418,16 +419,16 @@ impl DiffWriter {
// First, we print the prefix to stdout
write!(term, "{}", regular.apply_to(fmt.prefix.as_ref()))?;

// The number of characters that have been printed out to stdout already. All indices are
// in raw byte offsets, as splitting on graphemes, etc was taken care of when processing
// the AST nodes.
// The number of characters that have been printed out to stdout already. These aren't
// *actually* chars because UTF-8, but you get the gist.
let mut printed_chars = 0;

// We keep printing ranges until we've covered the entire line
for entry in &line.entries {
// The range of text to emphasize
// TODO(afnan) deal with ranges spanning multiple rows
let emphasis_range = entry.start_position().column..entry.end_position().column;
let emphasis_range =
entry.reference.start_position().column..entry.reference.end_position().column;

// First we need to see if there's any regular text to cover. If the range has a len of
// zero this is a no-op
Expand Down
17 changes: 7 additions & 10 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,9 @@ fn generate_ast_vector_data(
Ok(AstVectorData { text, tree, path })
}

/// Generate an AST vector from the underlying data.
/// Generate an AST vector from the underlying data
///
/// This will break up the AST vector data into a list of AST nodes that correspond to graphemes.
/// This is split off into a function so we can handle things like logging and keep the code DRY
fn generate_ast_vector(data: &AstVectorData) -> AstVector<'_> {
let ast_vec = AstVector::from_ts_tree(&data.tree, &data.text);
info!(
Expand Down Expand Up @@ -181,11 +181,10 @@ fn run_diff(args: &Args, config: &Config) -> Result<()> {
text: &ast_data_b.text,
},
};
// Use a buffered terminal instead of a normal unbuffered terminal so we can amortize the cost
// of printing. It doesn't really matter how frequently the terminal prints to stdout because
// the user just cares about the output at the end, we don't care about how frequently the
// terminal does partial updates or anything like that. If the user is curious about progress,
// they can enable logging and see when hunks are processed and written to the buffer.
// Use a buffered terminal instead of a normal unbuffered terminal so we can amortize the cost of printing. It
// doesn't really how frequently the terminal prints to stdout because the user just cares about the output at the
// end, we don't care about how frequently the terminal does partial updates or anything like that. If the user is
// curious about progress, they can enable logging and see when hunks are processed and written to the buffer.
let mut buf_writer = BufWriter::new(Term::stdout());
config.formatting.print(&mut buf_writer, &params)?;
// Just in case we forgot to flush anything in the `print` function
Expand Down Expand Up @@ -288,6 +287,7 @@ fn main() -> Result<()> {
.filter_level(log_level)
.init();
set_term_colors(args.color_output);

// First check if the input files can be parsed with tree-sitter.
let files_supported = are_input_files_supported(&args, &config);

Expand Down Expand Up @@ -326,14 +326,11 @@ mod tests {
"test data path {} does not exist",
path_b.to_str().unwrap()
);

(path_a, path_b)
}

#[test_case("short", "rust", "rs")]
#[test_case("short", "python", "py")]
#[test_case("medium", "rust", "rs")]
#[test_case("medium", "cpp", "cpp")]
fn diff_hunks_snapshot(test_type: &str, name: &str, ext: &str) {
let (path_a, path_b) = get_test_paths(test_type, name, ext);
let config = GrammarConfig::default();
Expand Down
1 change: 1 addition & 0 deletions src/neg_idx_vec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ impl<T> NegIdxVec<T> {
/// so negative indices are relative to the end of the vector.
fn idx_helper(&self, idx: i32) -> usize {
let len = self.len;

let final_index = if idx >= 0 {
idx as usize
} else {
Expand Down
Loading

0 comments on commit e03cce1

Please # to comment.