Revert "feat: Character-level diffs (#273)" (#296)

This reverts commit 879d21c.
afnanenayet · Feb 26, 2022 · e03cce1 · e03cce1
1 parent 879d21c
commit e03cce1
Show file tree

Hide file tree

Showing 19 changed files with 171 additions and 3,426 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -56,7 +56,6 @@ logging_timer = "1.1.0"
 build-info = { version = "0.0.25", optional = true }
 jemallocator = { version = "0.3.2", optional = true }
 libloading = "0.7.3"
-unicode-segmentation = "1.9.0"
 
 [dev-dependencies]
 test-case = "2.0.0"

diff --git a/src/ast.rs b/src/ast.rs
@@ -6,9 +6,7 @@ use crate::diff::Myers;
 use logging_timer::time;
 use std::{cell::RefCell, ops::Index, path::PathBuf};
 use tree_sitter::Node as TSNode;
-use tree_sitter::Point;
 use tree_sitter::Tree as TSTree;
-use unicode_segmentation as us;
 
 /// A mapping between a tree-sitter node and the text it corresponds to
 #[derive(Debug, Clone, Copy)]
@@ -24,86 +22,6 @@ pub struct Entry<'a> {
     /// This is different from the `source_text` that the [AstVector](AstVector) refers to, as the
     /// entry only holds a reference to the specific range of text that the node covers.
     pub text: &'a str,
-
-    /// The entry's start position in the document.
-    pub start_position: Point,
-
-    /// The entry's end position in the document.
-    pub end_position: Point,
-}
-
-impl<'a> Entry<'a> {
-    /// Split an entry into a vector of entries per grapheme.
-    ///
-    /// Each grapheme will get its own [Entry] struct. This method will resolve the
-    /// indices/positioning of each grapheme from the `self.text` field.
-    fn split_graphemes(self) -> Vec<Self> {
-        let mut entries = Vec::new();
-        let indices: Vec<(usize, &str)> =
-            us::UnicodeSegmentation::grapheme_indices(self.text, true).collect();
-        entries.reserve(indices.len());
-        let mut current_line = self.reference.start_position().row;
-
-        for (idx, grapheme) in indices {
-            // Every grapheme has to be at least one byte
-            debug_assert!(!grapheme.is_empty());
-
-            let original_start_col = self.reference.start_position().column;
-            let new_start_pos = Point {
-                row: current_line,
-                column: original_start_col + idx,
-            };
-            let new_end_pos = Point {
-                row: current_line,
-                column: new_start_pos.column + grapheme.len(),
-            };
-
-            debug_assert!(new_start_pos.row <= new_end_pos.row);
-
-            // If the end position is on the next row, then the column index can be less than or
-            // equal to the the start column. If they are on the same line, then the ending column
-            // *must be* greater than the starting column.
-            debug_assert!(
-                new_start_pos.column < new_end_pos.column || new_start_pos.row < new_end_pos.row
-            );
-
-            let entry = Entry {
-                reference: self.reference,
-                text: &self.text[idx..idx + grapheme.len()],
-                start_position: new_start_pos,
-                end_position: new_end_pos,
-            };
-            entries.push(entry);
-
-            // If the last entry was a new line, iterate up for the next entry
-            if grapheme == "\n" || grapheme == "\r\n" {
-                current_line += 1;
-            }
-        }
-        entries
-    }
-
-    /// Get the start position of an entry
-    pub fn start_position(&self) -> Point {
-        self.start_position
-    }
-
-    /// Get the end position of an entry
-    pub fn end_position(&self) -> Point {
-        self.end_position
-    }
-}
-
-impl<'a> From<&'a AstVector<'a>> for Vec<Entry<'a>> {
-    fn from(ast_vector: &'a AstVector<'a>) -> Self {
-        let mut entries = Vec::new();
-        entries.reserve(ast_vector.leaves.len());
-
-        for entry in &ast_vector.leaves {
-            entries.extend(entry.split_graphemes().iter());
-        }
-        entries
-    }
 }
 
 /// A vector that allows for linear traversal through the leafs of an AST.
@@ -170,7 +88,7 @@ impl<'a> Index<usize> for AstVector<'a> {
 
 impl<'a> PartialEq for Entry<'a> {
     fn eq(&self, other: &Entry) -> bool {
-        self.reference.kind_id() == other.reference.kind_id() && self.text == other.text
+        self.text == other.text
     }
 }
 
@@ -180,15 +98,16 @@ impl<'a> PartialEq for AstVector<'a> {
             return false;
         }
 
-        for i in 0..self.leaves.len() {
-            let leaf = self.leaves[i];
-            let other_leaf = other.leaves[i];
-
-            if leaf != other_leaf {
-                return false;
-            }
-        }
-        true
+        // Zip through each entry to determine whether the elements are equal. We start with a
+        // `false` value for not equal and accumulate any inequalities along the way.
+        let not_equal = self
+            .leaves
+            .iter()
+            .zip(other.leaves.iter())
+            .fold(false, |not_equal, (entry_a, entry_b)| {
+                not_equal | (entry_a != entry_b)
+            });
+        !not_equal
     }
 }
 
@@ -208,8 +127,6 @@ fn build<'a>(vector: &RefCell<Vec<Entry<'a>>>, node: tree_sitter::Node<'a>, text
             vector.borrow_mut().push(Entry {
                 reference: node,
                 text: node_text,
-                start_position: node.start_position(),
-                end_position: node.end_position(),
             });
         }
         return;
@@ -247,19 +164,15 @@ pub enum EditType<T> {
 #[time("info", "ast::{}")]
 pub fn compute_edit_script<'a>(a: &'a AstVector, b: &'a AstVector) -> (Hunks<'a>, Hunks<'a>) {
     let myers = Myers::default();
-    let a_graphemes: Vec<Entry> = a.into();
-    let b_graphemes: Vec<Entry> = b.into();
-    let edit_script = myers.diff(&a_graphemes[..], &b_graphemes[..]);
-    let edit_script_len = edit_script.len();
-
-    let mut old_edits = Vec::with_capacity(edit_script_len);
-    let mut new_edits = Vec::with_capacity(edit_script_len);
+    let edit_script = myers.diff(&a.leaves[..], &b.leaves[..]);
+    let mut old_edits = Vec::with_capacity(edit_script.len());
+    let mut new_edits = Vec::with_capacity(edit_script.len());
 
     for edit in edit_script {
         match edit {
-            EditType::Deletion(&e) => old_edits.push(e),
-            EditType::Addition(&e) => new_edits.push(e),
-        };
+            EditType::Deletion(&edit) => old_edits.push(edit),
+            EditType::Addition(&edit) => new_edits.push(edit),
+        }
     }
 
     // Convert the vectors of edits into hunks that can be displayed

diff --git a/src/diff.rs b/src/diff.rs
@@ -113,12 +113,10 @@ pub enum HunkInsertionError {
         last_line: usize,
     },
 
-    #[error("Attempted to append an entry with a column ({incoming_col:?}, line: {incoming_line:?}) less than the first entry's column ({last_col:?}, line: {last_line:?})")]
+    #[error("Attempted to prepend an entry with a column ({incoming_col:?}) greater than the first entry's column ({last_col:?})")]
     PriorColumn {
         incoming_col: usize,
-        incoming_line: usize,
         last_col: usize,
-        last_line: usize,
     },
 }
 
@@ -146,7 +144,7 @@ impl<'a> Hunk<'a> {
     ///
     /// Entries can only be prepended in descending order (from last to first)
     pub fn push_front(&mut self, entry: Entry<'a>) -> Result<(), HunkInsertionError> {
-        let incoming_line_idx = entry.start_position().row;
+        let incoming_line_idx = entry.reference.start_position().row;
 
         // Add a new line vector if the entry has a greater line index, or if the vector is empty.
         // We ensure that the last line has the same line index as the incoming entry.
@@ -188,10 +186,10 @@ impl<'a> Hunk<'a> {
             //if let Some(&first_entry) = first_line.entries.front() {
             // TODO(afnan) ^ this instead?
             // TODO(afnan) should this be start_position() instead of end?
-            let first_col = first_entry.end_position().column;
-            //let first_col = first_entry.start_position().column;
+            let first_col = first_entry.reference.end_position().column;
+            //let first_col = first_entry.reference.start_position().column;
             // TODO(afnan) ^ this instead?
-            let incoming_col = entry.end_position().column;
+            let incoming_col = entry.reference.end_position().column;
 
             if incoming_col > first_col {
                 return Err(HunkInsertionError::LaterColumn {
@@ -210,7 +208,7 @@ impl<'a> Hunk<'a> {
     /// entries out of order. For example, you can't insert an entry on line 1 after inserting an
     /// entry on line 5.
     pub fn push_back(&mut self, entry: Entry<'a>) -> Result<(), HunkInsertionError> {
-        let incoming_line_idx = entry.start_position().row;
+        let incoming_line_idx = entry.reference.start_position().row;
 
         // Create a new line if the incoming entry is on the next line. This will throw an error
         // if we have an entry on a non-adjacent line or an out-of-order insertion.
@@ -237,23 +235,19 @@ impl<'a> Hunk<'a> {
         }
         // The lines are empty, we need to add the first one
         else {
-            self.0.push_back(Line::new(incoming_line_idx));
+            self.0.push_back(Line::new(incoming_line_idx))
         }
 
         let last_line = self.0.back_mut().unwrap();
 
         if let Some(&last_entry) = last_line.entries.back() {
-            let last_col = last_entry.end_position().column;
-            let last_line = last_entry.end_position().row;
-            let incoming_col = entry.start_position().column;
-            let incoming_line = entry.end_position().row;
+            let last_col = last_entry.reference.end_position().column;
+            let incoming_col = entry.reference.start_position().column;
 
             if incoming_col < last_col {
                 return Err(HunkInsertionError::PriorColumn {
                     incoming_col,
                     last_col,
-                    incoming_line,
-                    last_line,
                 });
             }
         }
@@ -325,46 +319,6 @@ impl<'a> Hunks<'a> {
     }
 }
 
-pub struct HunkAppender<'a>(pub Hunks<'a>);
-
-impl<'a> FromIterator<Entry<'a>> for HunkAppender<'a> {
-    /// Create an instance of `Hunks` from an iterator over [entries](Entry).
-    ///
-    /// The user is responsible for making sure that the hunks are in proper order, otherwise this
-    /// constructor may panic.
-    fn from_iter<T>(iter: T) -> Self
-    where
-        T: IntoIterator<Item = Entry<'a>>,
-    {
-        let mut hunks = Hunks::new();
-
-        for i in iter {
-            hunks.push_back(i).expect("Invalid iterator");
-        }
-        HunkAppender(hunks)
-    }
-}
-
-pub struct HunkPrepender<'a>(pub Hunks<'a>);
-
-impl<'a> FromIterator<Entry<'a>> for HunkPrepender<'a> {
-    /// Create an instance of `Hunks` from an iterator over [entries](Entry).
-    ///
-    /// The user is responsible for making sure that the hunks are in proper order, otherwise this
-    /// constructor may panic.
-    fn from_iter<T>(iter: T) -> Self
-    where
-        T: IntoIterator<Item = Entry<'a>>,
-    {
-        let mut hunks = Hunks::new();
-
-        for i in iter {
-            hunks.push_front(i).expect("Invalid iterator");
-        }
-        HunkPrepender(hunks)
-    }
-}
-
 impl<'a> FromIterator<Entry<'a>> for Hunks<'a> {
     /// Create an instance of `Hunks` from an iterator over [entries](Entry).
     ///

diff --git a/src/formatting.rs b/src/formatting.rs
@@ -280,14 +280,15 @@ impl DiffWriter {
         old_fmt: &FormattingDirectives,
         new_fmt: &FormattingDirectives,
     ) -> std::io::Result<()> {
+        let divider = " -> ";
+
         // The different ways we can stack the title
         #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, strum_macros::Display)]
         #[strum(serialize_all = "snake_case")]
         enum TitleStack {
             Vertical,
             Horizontal,
         }
-        let divider = " -> ";
 
         // We construct the fully horizontal title string. If wider than the terminal, then we
         // format another title string that's vertically stacked
@@ -418,16 +419,16 @@ impl DiffWriter {
         // First, we print the prefix to stdout
         write!(term, "{}", regular.apply_to(fmt.prefix.as_ref()))?;
 
-        // The number of characters that have been printed out to stdout already. All indices are
-        // in raw byte offsets, as splitting on graphemes, etc was taken care of when processing
-        // the AST nodes.
+        // The number of characters that have been printed out to stdout already. These aren't
+        // *actually* chars because UTF-8, but you get the gist.
         let mut printed_chars = 0;
 
         // We keep printing ranges until we've covered the entire line
         for entry in &line.entries {
             // The range of text to emphasize
             // TODO(afnan) deal with ranges spanning multiple rows
-            let emphasis_range = entry.start_position().column..entry.end_position().column;
+            let emphasis_range =
+                entry.reference.start_position().column..entry.reference.end_position().column;
 
             // First we need to see if there's any regular text to cover. If the range has a len of
             // zero this is a no-op

diff --git a/src/main.rs b/src/main.rs
@@ -96,9 +96,9 @@ fn generate_ast_vector_data(
     Ok(AstVectorData { text, tree, path })
 }
 
-/// Generate an AST vector from the underlying data.
+/// Generate an AST vector from the underlying data
 ///
-/// This will break up the AST vector data into a list of AST nodes that correspond to graphemes.
+/// This is split off into a function so we can handle things like logging and keep the code DRY
 fn generate_ast_vector(data: &AstVectorData) -> AstVector<'_> {
     let ast_vec = AstVector::from_ts_tree(&data.tree, &data.text);
     info!(
@@ -181,11 +181,10 @@ fn run_diff(args: &Args, config: &Config) -> Result<()> {
             text: &ast_data_b.text,
         },
     };
-    // Use a buffered terminal instead of a normal unbuffered terminal so we can amortize the cost
-    // of printing. It doesn't really matter how frequently the terminal prints to stdout because
-    // the user just cares about the output at the end, we don't care about how frequently the
-    // terminal does partial updates or anything like that. If the user is curious about progress,
-    // they can enable logging and see when hunks are processed and written to the buffer.
+    // Use a buffered terminal instead of a normal unbuffered terminal so we can amortize the cost of printing. It
+    // doesn't really how frequently the terminal prints to stdout because the user just cares about the output at the
+    // end, we don't care about how frequently the terminal does partial updates or anything like that. If the user is
+    // curious about progress, they can enable logging and see when hunks are processed and written to the buffer.
     let mut buf_writer = BufWriter::new(Term::stdout());
     config.formatting.print(&mut buf_writer, &params)?;
     // Just in case we forgot to flush anything in the `print` function
@@ -288,6 +287,7 @@ fn main() -> Result<()> {
             .filter_level(log_level)
             .init();
         set_term_colors(args.color_output);
+
         // First check if the input files can be parsed with tree-sitter.
         let files_supported = are_input_files_supported(&args, &config);
 
@@ -326,14 +326,11 @@ mod tests {
             "test data path {} does not exist",
             path_b.to_str().unwrap()
         );
-
         (path_a, path_b)
     }
 
     #[test_case("short", "rust", "rs")]
     #[test_case("short", "python", "py")]
-    #[test_case("medium", "rust", "rs")]
-    #[test_case("medium", "cpp", "cpp")]
     fn diff_hunks_snapshot(test_type: &str, name: &str, ext: &str) {
         let (path_a, path_b) = get_test_paths(test_type, name, ext);
         let config = GrammarConfig::default();

diff --git a/src/neg_idx_vec.rs b/src/neg_idx_vec.rs
@@ -52,6 +52,7 @@ impl<T> NegIdxVec<T> {
     /// so negative indices are relative to the end of the vector.
     fn idx_helper(&self, idx: i32) -> usize {
         let len = self.len;
+
         let final_index = if idx >= 0 {
             idx as usize
         } else {