diff --git a/.gitignore b/.gitignore
index 2040c29..8571fdf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,4 @@
+.zig-cache
zig-cache
+zig-out
+kcov-output
\ No newline at end of file
diff --git a/DiffMatchPatch.zig b/DiffMatchPatch.zig
index 3540518..8c4fb8f 100644
--- a/DiffMatchPatch.zig
+++ b/DiffMatchPatch.zig
@@ -2,12 +2,89 @@ const DiffMatchPatch = @This();
const std = @import("std");
const testing = std.testing;
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
const ArrayListUnmanaged = std.ArrayListUnmanaged;
const DiffList = ArrayListUnmanaged(Diff);
+const PatchList = ArrayListUnmanaged(Patch);
+
+pub const DiffError = error{
+ OutOfMemory,
+ BadPatchString,
+};
+
+const OutOfMemory = error.OutOfMemory;
+
+//| Fields
+
+/// Number of milliseconds to map a diff before giving up (0 for infinity).
+diff_timeout: u64 = 1000,
+/// Cost of an empty edit operation in terms of edit characters.
+diff_edit_cost: u16 = 4,
+
+/// Number of bytes in each string needed to trigger a line-based diff
+diff_check_lines_over: u64 = 100,
+
+/// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
+/// This defaults to 0.05, on the premise that the library will mostly be
+/// used in cases where failure is better than a bad patch application.
+match_threshold: f64 = 0.05,
+
+/// How far to search for a match (0 = exact location, 1000+ = broad match).
+/// A match this many characters away from the expected location will add
+/// 1.0 to the score (0.0 is a perfect match).
+match_distance: u32 = 1000,
+
+/// The number of bits in a usize.
+match_max_bits: u8 = @bitSizeOf(usize),
+
+/// When deleting a large block of text (over ~64 characters), how close
+/// do the contents have to be to match the expected contents. (0.0 =
+/// perfection, 1.0 = very loose). Note that Match_Threshold controls
+/// how closely the end points of a delete need to match.
+patch_delete_threshold: f32 = 0.5,
+
+/// Chunk size for context length.
+patch_margin: u8 = 4,
+
+//| Allocation Management Helpers
+
+/// Deinit an `ArrayListUnmanaged(Diff)` and the allocated slices of
+/// text in each `Diff`.
+pub fn deinitDiffList(allocator: Allocator, diffs: *DiffList) void {
+ defer diffs.deinit(allocator);
+ for (diffs.items) |d| {
+ allocator.free(d.text);
+ }
+}
+
+/// Free a range of Diffs inside a list. Used during cleanups and
+/// edits.
+fn freeRangeDiffList(
+ allocator: Allocator,
+ diffs: *DiffList,
+ start: usize,
+ len: usize,
+) void {
+ const after_range = start + len;
+ const range = diffs.items[start..after_range];
+ for (range) |d| {
+ allocator.free(d.text);
+ }
+}
+
+pub fn deinitPatchList(allocator: Allocator, patches: *PatchList) void {
+ defer patches.deinit(allocator);
+ for (patches.items) |*a_patch| {
+ deinitDiffList(allocator, &a_patch.diffs);
+ }
+}
/// DMP with default configuration options
pub const default = DiffMatchPatch{};
+/// Represents a single edit operation.
+/// TODO rename this Edit
pub const Diff = struct {
pub const Operation = enum {
insert,
@@ -35,34 +112,103 @@ pub const Diff = struct {
pub fn eql(a: Diff, b: Diff) bool {
return a.operation == b.operation and std.mem.eql(u8, a.text, b.text);
}
+
+ pub fn clone(self: Diff, allocator: Allocator) !Diff {
+ return Diff{
+ .operation = self.operation,
+ .text = try allocator.dupe(u8, self.text),
+ };
+ }
};
-/// Number of milliseconds to map a diff before giving up (0 for infinity).
-diff_timeout: u64 = 1000,
-/// Cost of an empty edit operation in terms of edit characters.
-diff_edit_cost: u16 = 4,
+pub const Patch = struct {
+ /// Diffs to be applied
+ diffs: DiffList = DiffList{},
+ /// Start of patch in before text
+ start1: usize = 0,
+ length1: usize = 0,
+ /// Start of patch in after text
+ start2: usize = 0,
+ length2: usize = 0,
+
+ /// Make a clone of the Patch, including all Diffs.
+ pub fn clone(patch: Patch, allocator: Allocator) !Patch {
+ var new_diffs = DiffList{};
+ try new_diffs.ensureTotalCapacity(allocator, patch.diffs.items.len);
+ errdefer {
+ deinitDiffList(allocator, &new_diffs);
+ }
+ for (patch.diffs.items) |a_diff| {
+ new_diffs.appendAssumeCapacity(try a_diff.clone(allocator));
+ }
+ return Patch{
+ .diffs = new_diffs,
+ .start1 = patch.start1,
+ .length1 = patch.length1,
+ .start2 = patch.start2,
+ .length2 = patch.length2,
+ };
+ }
-/// At what point is no match declared (0.0 = perfection, 1.0 = very loose).
-match_threshold: f32 = 0.5,
-/// How far to search for a match (0 = exact location, 1000+ = broad match).
-/// A match this many characters away from the expected location will add
-/// 1.0 to the score (0.0 is a perfect match).
-match_distance: u32 = 1000,
-/// The number of bits in an int.
-match_max_bits: u16 = 32,
+ pub fn deinit(patch: *Patch, allocator: Allocator) void {
+ deinitDiffList(allocator, &patch.diffs);
+ }
-/// When deleting a large block of text (over ~64 characters), how close
-/// do the contents have to be to match the expected contents. (0.0 =
-/// perfection, 1.0 = very loose). Note that Match_Threshold controls
-/// how closely the end points of a delete need to match.
-patch_delete_threshold: f32 = 0.5,
-/// Chunk size for context length.
-patch_margin: u16 = 4,
+ /// Emit patch in Unidiff format, as specifified here:
+ /// https://github.com/google/diff-match-patch/wiki/Unidiff
+ /// This is similar to GNU Unidiff format, but not identical.
+ /// Header: @@ -382,8 +481,9 @@
+ /// Indices are printed as 1-based, not 0-based.
+ /// @return The GNU diff string.
+ pub fn asText(patch: Patch, allocator: Allocator) ![]const u8 {
+ var text_array = std.ArrayList(u8).init(allocator);
+ defer text_array.deinit();
+ const writer = text_array.writer();
+ try patch.writeText(writer);
+ return text_array.toOwnedSlice();
+ }
+
+ const format = std.fmt.format;
+
+ /// Stream textual patch representation to Writer. See `asText`
+ /// for more information.
+ pub fn writeText(patch: Patch, writer: anytype) !void {
+ // Write header.
+ _ = try writer.write(PATCH_HEAD);
+ // Stream coordinates
+ if (patch.length1 == 0) {
+ try format(writer, "{d},0", .{patch.start1});
+ } else if (patch.length1 == 1) {
+ try format(writer, "{d}", .{patch.start1 + 1});
+ } else {
+ try format(writer, "{d},{d}", .{ patch.start1 + 1, patch.length1 });
+ }
+ _ = try writer.write(" +");
+ if (patch.length2 == 0) {
+ try std.fmt.format(writer, "{d},0", .{patch.start2});
+ } else if (patch.length2 == 1) {
+ _ = try format(writer, "{d}", .{patch.start2 + 1});
+ } else {
+ try format(writer, "{d},{d}", .{ patch.start2 + 1, patch.length2 });
+ }
+ _ = try writer.write(PATCH_TAIL);
+ // Escape the body of the patch with %xx notation.
+ for (patch.diffs.items) |a_diff| {
+ switch (a_diff.operation) {
+ .insert => try writer.writeByte('+'),
+ .delete => try writer.writeByte('-'),
+ .equal => try writer.writeByte(' '),
+ }
+ _ = try writeUriEncoded(writer, a_diff.text);
+ try writer.writeByte('\n');
+ }
+ return;
+ }
+};
-pub const DiffError = error{OutOfMemory};
+const PATCH_HEAD = "@@ -";
+const PATCH_TAIL = " @@\n";
-/// It is recommended that you use an Arena for this operation.
-///
/// Find the differences between two texts.
/// @param before Old string to be diffed.
/// @param after New string to be diffed.
@@ -79,7 +225,7 @@ pub fn diff(
/// to identify the changed areas. If true, then run
/// a faster slightly less optimal diff.
check_lines: bool,
-) DiffError!DiffList {
+) error{OutOfMemory}!DiffList {
const deadline = if (dmp.diff_timeout == 0)
std.math.maxInt(u64)
else
@@ -94,12 +240,17 @@ fn diffInternal(
after: []const u8,
check_lines: bool,
deadline: u64,
-) DiffError!DiffList {
+) error{OutOfMemory}!DiffList {
// Check for equality (speedup).
- var diffs = DiffList{};
if (std.mem.eql(u8, before, after)) {
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
if (before.len != 0) {
- try diffs.append(allocator, Diff.init(.equal, try allocator.dupe(u8, before)));
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.appendAssumeCapacity(Diff.init(
+ .equal,
+ try allocator.dupe(u8, before),
+ ));
}
return diffs;
}
@@ -117,40 +268,57 @@ fn diffInternal(
trimmed_after = trimmed_after[0 .. trimmed_after.len - common_length];
// Compute the diff on the middle block.
- diffs = try dmp.diffCompute(allocator, trimmed_before, trimmed_after, check_lines, deadline);
+ var diffs = try dmp.diffCompute(allocator, trimmed_before, trimmed_after, check_lines, deadline);
+ errdefer deinitDiffList(allocator, &diffs);
// Restore the prefix and suffix.
if (common_prefix.len != 0) {
- try diffs.insert(allocator, 0, Diff.init(.equal, try allocator.dupe(u8, common_prefix)));
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.insertAssumeCapacity(0, Diff.init(
+ .equal,
+ try allocator.dupe(u8, common_prefix),
+ ));
}
if (common_suffix.len != 0) {
- try diffs.append(allocator, Diff.init(.equal, try allocator.dupe(u8, common_suffix)));
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.appendAssumeCapacity(Diff.init(
+ .equal,
+ try allocator.dupe(u8, common_suffix),
+ ));
}
-
try diffCleanupMerge(allocator, &diffs);
return diffs;
}
+/// Test if a byte is a UTF-8 follow byte
+inline fn is_follow(byte: u8) bool {
+ return byte & 0b1100_0000 == 0b1000_0000;
+}
+
+/// Find a common prefix which respects UTF-8 code point boundaries.
fn diffCommonPrefix(before: []const u8, after: []const u8) usize {
const n = @min(before.len, after.len);
var i: usize = 0;
-
while (i < n) : (i += 1) {
- if (before[i] != after[i]) {
- return i;
+ const b = before[i];
+ const a = after[i];
+ if (a != b) {
+ return fixSplitBackward(before, i);
}
}
return n;
}
+/// Find a common suffix which respects UTF-8 code point boundaries
fn diffCommonSuffix(before: []const u8, after: []const u8) usize {
const n = @min(before.len, after.len);
var i: usize = 1;
-
while (i <= n) : (i += 1) {
- if (before[before.len - i] != after[after.len - i]) {
- return i - 1;
+ const b = before[before.len - i];
+ const a = after[after.len - i];
+ if (a != b) {
+ return before.len - fixSplitForward(before, before.len - i + 1);
}
}
@@ -173,18 +341,28 @@ fn diffCompute(
after: []const u8,
check_lines: bool,
deadline: u64,
-) DiffError!DiffList {
- var diffs = DiffList{};
-
+) error{OutOfMemory}!DiffList {
if (before.len == 0) {
// Just add some text (speedup).
- try diffs.append(allocator, Diff.init(.insert, try allocator.dupe(u8, after)));
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.appendAssumeCapacity(Diff.init(
+ .insert,
+ try allocator.dupe(u8, after),
+ ));
return diffs;
}
if (after.len == 0) {
// Just delete some text (speedup).
- try diffs.append(allocator, Diff.init(.delete, try allocator.dupe(u8, before)));
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.appendAssumeCapacity(Diff.init(
+ .delete,
+ try allocator.dupe(u8, before),
+ ));
return diffs;
}
@@ -193,36 +371,59 @@ fn diffCompute(
if (std.mem.indexOf(u8, long_text, short_text)) |index| {
// Shorter text is inside the longer text (speedup).
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
const op: Diff.Operation = if (before.len > after.len)
.delete
else
.insert;
- try diffs.append(allocator, Diff.init(op, try allocator.dupe(u8, long_text[0..index])));
- try diffs.append(allocator, Diff.init(.equal, try allocator.dupe(u8, short_text)));
- try diffs.append(allocator, Diff.init(op, try allocator.dupe(u8, long_text[index + short_text.len ..])));
+ try diffs.ensureUnusedCapacity(allocator, 3);
+ diffs.appendAssumeCapacity(Diff.init(
+ op,
+ try allocator.dupe(u8, long_text[0..index]),
+ ));
+ diffs.appendAssumeCapacity(Diff.init(
+ .equal,
+ try allocator.dupe(u8, short_text),
+ ));
+ diffs.appendAssumeCapacity(Diff.init(
+ op,
+ try allocator.dupe(u8, long_text[index + short_text.len ..]),
+ ));
return diffs;
}
if (short_text.len == 1) {
// Single character string.
// After the previous speedup, the character can't be an equality.
- try diffs.append(allocator, Diff.init(.delete, before));
- try diffs.append(allocator, Diff.init(.insert, after));
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
+ try diffs.ensureUnusedCapacity(allocator, 2);
+ diffs.appendAssumeCapacity(Diff.init(
+ .delete,
+ try allocator.dupe(u8, before),
+ ));
+ diffs.appendAssumeCapacity(Diff.init(
+ .insert,
+ try allocator.dupe(u8, after),
+ ));
return diffs;
}
// Check to see if the problem can be split in two.
- if (try dmp.diffHalfMatch(allocator, before, after)) |half_match| {
+ var maybe_half_match = try dmp.diffHalfMatch(allocator, before, after);
+ if (maybe_half_match) |*half_match| {
// A half-match was found, sort out the return data.
-
+ defer half_match.deinit(allocator);
// Send both pairs off for separate processing.
- const diffs_a = try dmp.diffInternal(
+ var diffs = try dmp.diffInternal(
allocator,
half_match.prefix_before,
half_match.prefix_after,
check_lines,
deadline,
);
+ errdefer deinitDiffList(allocator, &diffs);
var diffs_b = try dmp.diffInternal(
allocator,
half_match.suffix_before,
@@ -231,21 +432,27 @@ fn diffCompute(
deadline,
);
defer diffs_b.deinit(allocator);
-
- var tmp_diffs = diffs;
- defer tmp_diffs.deinit(allocator);
+ // we have to deinit regardless, so deinitDiffList would be
+ // a double free:
+ errdefer {
+ for (diffs_b.items) |d| {
+ allocator.free(d.text);
+ }
+ }
// Merge the results.
- diffs = diffs_a;
- try diffs.append(allocator, Diff.init(.equal, half_match.common_middle));
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.appendAssumeCapacity(
+ Diff.init(.equal, half_match.common_middle),
+ );
+ half_match.common_middle = "";
try diffs.appendSlice(allocator, diffs_b.items);
return diffs;
}
- if (check_lines and before.len > 100 and after.len > 100) {
+ if (check_lines and before.len > dmp.diff_check_lines_over and after.len > dmp.diff_check_lines_over) {
return dmp.diffLineMode(allocator, before, after, deadline);
}
-
return dmp.diffBisect(allocator, before, after, deadline);
}
@@ -255,6 +462,15 @@ const HalfMatchResult = struct {
prefix_after: []const u8,
suffix_after: []const u8,
common_middle: []const u8,
+
+ // Free the HalfMatchResult's memory.
+ pub fn deinit(hmr: HalfMatchResult, allocator: Allocator) void {
+ allocator.free(hmr.prefix_before);
+ allocator.free(hmr.suffix_before);
+ allocator.free(hmr.prefix_after);
+ allocator.free(hmr.suffix_after);
+ allocator.free(hmr.common_middle);
+ }
};
/// Do the two texts share a Substring which is at least half the length of
@@ -270,8 +486,8 @@ fn diffHalfMatch(
allocator: std.mem.Allocator,
before: []const u8,
after: []const u8,
-) DiffError!?HalfMatchResult {
- if (dmp.diff_timeout <= 0) {
+) error{OutOfMemory}!?HalfMatchResult {
+ if (dmp.diff_timeout == 0) {
// Don't risk returning a non-optimal diff if we have unlimited time.
return null;
}
@@ -284,8 +500,14 @@ fn diffHalfMatch(
// First check if the second quarter is the seed for a half-match.
const half_match_1 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 3) / 4);
+ errdefer {
+ if (half_match_1) |h_m| h_m.deinit(allocator);
+ }
// Check again based on the third quarter.
const half_match_2 = try dmp.diffHalfMatchInternal(allocator, long_text, short_text, (long_text.len + 1) / 2);
+ errdefer {
+ if (half_match_2) |h_m| h_m.deinit(allocator);
+ }
var half_match: ?HalfMatchResult = null;
if (half_match_1 == null and half_match_2 == null) {
@@ -296,16 +518,22 @@ fn diffHalfMatch(
half_match = half_match_2.?;
} else {
// Both matched. Select the longest.
- half_match = if (half_match_1.?.common_middle.len > half_match_2.?.common_middle.len)
- half_match_1
- else
- half_match_2;
+ half_match = half: {
+ if (half_match_1.?.common_middle.len > half_match_2.?.common_middle.len) {
+ half_match_2.?.deinit(allocator);
+ break :half half_match_1;
+ } else {
+ half_match_1.?.deinit(allocator);
+ break :half half_match_2;
+ }
+ };
}
// A half-match was found, sort out the return data.
if (before.len > after.len) {
- return half_match;
+ return half_match.?;
} else {
+ // Transfers ownership of all memory to new, permuted, half_match.
const half_match_yes = half_match.?;
return .{
.prefix_before = half_match_yes.prefix_after,
@@ -331,12 +559,13 @@ fn diffHalfMatchInternal(
long_text: []const u8,
short_text: []const u8,
i: usize,
-) DiffError!?HalfMatchResult {
+) error{OutOfMemory}!?HalfMatchResult {
// Start with a 1/4 length Substring at position i as a seed.
const seed = long_text[i .. i + long_text.len / 4];
var j: isize = -1;
var best_common = std.ArrayListUnmanaged(u8){};
+ defer best_common.deinit(allocator);
var best_long_text_a: []const u8 = "";
var best_long_text_b: []const u8 = "";
var best_short_text_a: []const u8 = "";
@@ -350,8 +579,10 @@ fn diffHalfMatchInternal(
const suffix_length = diffCommonSuffix(long_text[0..i], short_text[0..@as(usize, @intCast(j))]);
if (best_common.items.len < suffix_length + prefix_length) {
best_common.items.len = 0;
- try best_common.appendSlice(allocator, short_text[@as(usize, @intCast(j - @as(isize, @intCast(suffix_length)))) .. @as(usize, @intCast(j - @as(isize, @intCast(suffix_length)))) + suffix_length]);
- try best_common.appendSlice(allocator, short_text[@as(usize, @intCast(j)) .. @as(usize, @intCast(j)) + prefix_length]);
+ const a = short_text[@as(usize, @intCast(j - @as(isize, @intCast(suffix_length)))) .. @as(usize, @intCast(j - @as(isize, @intCast(suffix_length)))) + suffix_length];
+ try best_common.appendSlice(allocator, a);
+ const b = short_text[@as(usize, @intCast(j)) .. @as(usize, @intCast(j)) + prefix_length];
+ try best_common.appendSlice(allocator, b);
best_long_text_a = long_text[0 .. i - suffix_length];
best_long_text_b = long_text[i + prefix_length ..];
@@ -360,12 +591,21 @@ fn diffHalfMatchInternal(
}
}
if (best_common.items.len * 2 >= long_text.len) {
+ const prefix_before = try allocator.dupe(u8, best_long_text_a);
+ errdefer allocator.free(prefix_before);
+ const suffix_before = try allocator.dupe(u8, best_long_text_b);
+ errdefer allocator.free(suffix_before);
+ const prefix_after = try allocator.dupe(u8, best_short_text_a);
+ errdefer allocator.free(prefix_after);
+ const suffix_after = try allocator.dupe(u8, best_short_text_b);
+ const best_common_text = try best_common.toOwnedSlice(allocator);
+ errdefer allocator.free(best_common_text); // Keeps the code portable.
return .{
- .prefix_before = best_long_text_a,
- .suffix_before = best_long_text_b,
- .prefix_after = best_short_text_a,
- .suffix_after = best_short_text_b,
- .common_middle = best_common.items,
+ .prefix_before = prefix_before,
+ .suffix_before = suffix_before,
+ .prefix_after = prefix_after,
+ .suffix_after = suffix_after,
+ .common_middle = best_common_text,
};
} else {
return null;
@@ -385,7 +625,7 @@ fn diffBisect(
before: []const u8,
after: []const u8,
deadline: u64,
-) DiffError!DiffList {
+) error{OutOfMemory}!DiffList {
const before_length: isize = @intCast(before.len);
const after_length: isize = @intCast(after.len);
const max_d: isize = @intCast((before.len + after.len + 1) / 2);
@@ -393,8 +633,10 @@ fn diffBisect(
const v_length = 2 * max_d;
var v1 = try ArrayListUnmanaged(isize).initCapacity(allocator, @as(usize, @intCast(v_length)));
+ defer v1.deinit(allocator);
v1.items.len = @intCast(v_length);
var v2 = try ArrayListUnmanaged(isize).initCapacity(allocator, @as(usize, @intCast(v_length)));
+ defer v2.deinit(allocator);
v2.items.len = @intCast(v_length);
var x: usize = 0;
@@ -435,11 +677,13 @@ fn diffBisect(
x1 = v1.items[@intCast(k1_offset - 1)] + 1;
}
var y1 = x1 - k1;
- while (x1 < before_length and
- y1 < after_length and before[@intCast(x1)] == after[@intCast(y1)])
- {
- x1 += 1;
- y1 += 1;
+ while (x1 < before_length and y1 < after_length) {
+ if (before[@intCast(x1)] == after[@intCast(y1)]) {
+ x1 += 1;
+ y1 += 1;
+ } else {
+ break;
+ }
}
v1.items[@intCast(k1_offset)] = x1;
if (x1 > before_length) {
@@ -474,12 +718,13 @@ fn diffBisect(
x2 = v2.items[@intCast(k2_offset - 1)] + 1;
}
var y2: isize = x2 - k2;
- while (x2 < before_length and y2 < after_length and
- before[@intCast(before_length - x2 - 1)] ==
- after[@intCast(after_length - y2 - 1)])
- {
- x2 += 1;
- y2 += 1;
+ while (x2 < before_length and y2 < after_length) {
+ if (before[@intCast(before_length - x2 - 1)] == after[@intCast(after_length - y2 - 1)]) {
+ x2 += 1;
+ y2 += 1;
+ } else {
+ break;
+ }
}
v2.items[@intCast(k2_offset)] = x2;
if (x2 > before_length) {
@@ -506,8 +751,16 @@ fn diffBisect(
// Diff took too long and hit the deadline or
// number of diffs equals number of characters, no commonality at all.
var diffs = DiffList{};
- try diffs.append(allocator, Diff.init(.delete, try allocator.dupe(u8, before)));
- try diffs.append(allocator, Diff.init(.insert, try allocator.dupe(u8, after)));
+ errdefer deinitDiffList(allocator, &diffs);
+ try diffs.ensureUnusedCapacity(allocator, 2);
+ diffs.appendAssumeCapacity(Diff.init(
+ .delete,
+ try allocator.dupe(u8, before),
+ ));
+ diffs.appendAssumeCapacity(Diff.init(
+ .insert,
+ try allocator.dupe(u8, after),
+ ));
return diffs;
}
@@ -527,18 +780,66 @@ fn diffBisectSplit(
x: isize,
y: isize,
deadline: u64,
-) DiffError!DiffList {
- const text1a = text1[0..@intCast(x)];
- const text2a = text2[0..@intCast(y)];
- const text1b = text1[@intCast(x)..];
- const text2b = text2[@intCast(y)..];
+) error{OutOfMemory}!DiffList {
+ const x1 = fixSplitForward(text1, @intCast(x));
+ const y1 = fixSplitBackward(text2, @intCast(y));
+ const text1a = text1[0..x1];
+ const text2a = text2[0..y1];
+ const text1b = text1[x1..];
+ const text2b = text2[y1..];
+
+ if (text1a.len == 0 and text2a.len == 0) {
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
+ try diffs.ensureUnusedCapacity(allocator, 2);
+ diffs.appendAssumeCapacity(Diff.init(
+ .delete,
+ try allocator.dupe(
+ u8,
+ text1b,
+ ),
+ ));
+ diffs.appendAssumeCapacity(Diff.init(
+ .insert,
+ try allocator.dupe(
+ u8,
+ text2b,
+ ),
+ ));
+ return diffs;
+ } else if (text1b.len == 0 and text2b.len == 0) {
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
+ try diffs.ensureUnusedCapacity(allocator, 2);
+ diffs.appendAssumeCapacity(Diff.init(
+ .delete,
+ try allocator.dupe(
+ u8,
+ text2b,
+ ),
+ ));
+ diffs.appendAssumeCapacity(Diff.init(
+ .insert,
+ try allocator.dupe(
+ u8,
+ text2a,
+ ),
+ ));
+ return diffs;
+ }
// Compute both diffs serially.
var diffs = try dmp.diffInternal(allocator, text1a, text2a, false, deadline);
- var diffsb = try dmp.diffInternal(allocator, text1b, text2b, false, deadline);
- defer diffsb.deinit(allocator);
-
- try diffs.appendSlice(allocator, diffsb.items);
+ errdefer deinitDiffList(allocator, &diffs);
+ var diffs_b = try dmp.diffInternal(allocator, text1b, text2b, false, deadline);
+ // Free the list, but not the contents:
+ defer diffs_b.deinit(allocator);
+ errdefer {
+ for (diffs_b.items) |d| {
+ allocator.free(d.text);
+ }
+ }
+ try diffs.appendSlice(allocator, diffs_b.items);
return diffs;
}
@@ -555,17 +856,21 @@ fn diffLineMode(
text1_in: []const u8,
text2_in: []const u8,
deadline: u64,
-) DiffError!DiffList {
+) error{OutOfMemory}!DiffList {
// Scan the text on a line-by-line basis first.
- const a = try diffLinesToChars(allocator, text1_in, text2_in);
+ var a = try diffLinesToChars(allocator, text1_in, text2_in);
+ defer a.deinit(allocator);
const text1 = a.chars_1;
const text2 = a.chars_2;
const line_array = a.line_array;
-
- var diffs: DiffList = try dmp.diffInternal(allocator, text1, text2, false, deadline);
-
- // Convert the diff back to original text.
- try diffCharsToLines(allocator, diffs.items, line_array.items);
+ var diffs: DiffList = undefined;
+ {
+ var char_diffs: DiffList = try dmp.diffInternal(allocator, text1, text2, false, deadline);
+ defer deinitDiffList(allocator, &char_diffs);
+ // Convert the diff back to original text.
+ diffs = try diffCharsToLines(allocator, &char_diffs, line_array.items);
+ }
+ errdefer deinitDiffList(allocator, &diffs);
// Eliminate freak matches (e.g. blank lines)
try diffCleanupSemantic(allocator, &diffs);
@@ -587,19 +892,22 @@ fn diffLineMode(
switch (diffs.items[pointer].operation) {
.insert => {
count_insert += 1;
- // text_insert += diffs.items[pointer].text;
try text_insert.appendSlice(allocator, diffs.items[pointer].text);
},
.delete => {
count_delete += 1;
- // text_delete += diffs.items[pointer].text;
try text_delete.appendSlice(allocator, diffs.items[pointer].text);
},
.equal => {
// Upon reaching an equality, check for prior redundancies.
if (count_delete >= 1 and count_insert >= 1) {
// Delete the offending records and add the merged ones.
- // diffs.RemoveRange(pointer - count_delete - count_insert, count_delete + count_insert);
+ freeRangeDiffList(
+ allocator,
+ &diffs,
+ pointer - count_delete - count_insert,
+ count_delete + count_insert,
+ );
try diffs.replaceRange(
allocator,
pointer - count_delete - count_insert,
@@ -607,9 +915,20 @@ fn diffLineMode(
&.{},
);
pointer = pointer - count_delete - count_insert;
- const sub_diff = try dmp.diffInternal(allocator, text_delete.items, text_insert.items, false, deadline);
- // diffs.InsertRange(pointer, sub_diff);
- try diffs.insertSlice(allocator, pointer, sub_diff.items);
+ var sub_diff = try dmp.diffInternal(
+ allocator,
+ text_delete.items,
+ text_insert.items,
+ false,
+ deadline,
+ );
+ {
+ errdefer deinitDiffList(allocator, &sub_diff);
+ try diffs.ensureUnusedCapacity(allocator, sub_diff.items.len);
+ }
+ defer sub_diff.deinit(allocator);
+ const new_diff = diffs.addManyAtAssumeCapacity(pointer, sub_diff.items.len);
+ @memcpy(new_diff, sub_diff.items);
pointer = pointer + sub_diff.items.len;
}
count_insert = 0;
@@ -620,17 +939,24 @@ fn diffLineMode(
}
pointer += 1;
}
- // diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end.
- diffs.items.len -= 1;
+ diffs.items.len -= 1; // Remove the dummy entry at the end.
return diffs;
}
-const LinesToCharsResult = struct {
- chars_1: []const u8,
- chars_2: []const u8,
- line_array: ArrayListUnmanaged([]const u8),
-};
+// These numbers have a 32 point buffer, to avoid annoyance with
+// c0 control characters. The algorithm drops the bottom points,
+// not the top, that is, it will use 0x10ffff given enough unique
+// lines.
+const UNICODE_MAX = 0x10ffdf;
+const UNICODE_TWO_THIRDS = 742724;
+const UNICODE_ONE_THIRD = 371355;
+const CHAR_OFFSET = 32;
+
+comptime {
+ assert(UNICODE_TWO_THIRDS + UNICODE_ONE_THIRD == UNICODE_MAX);
+ assert(UNICODE_TWO_THIRDS + UNICODE_ONE_THIRD + CHAR_OFFSET == 0x10ffff);
+}
/// Split two texts into a list of strings. Reduce the texts to a string of
/// hashes where each Unicode character represents one line.
@@ -643,22 +969,34 @@ fn diffLinesToChars(
allocator: std.mem.Allocator,
text1: []const u8,
text2: []const u8,
-) DiffError!LinesToCharsResult {
+) error{OutOfMemory}!LinesToCharsResult {
var line_array = ArrayListUnmanaged([]const u8){};
- var line_hash = std.StringHashMapUnmanaged(usize){};
+ errdefer line_array.deinit(allocator);
+ line_array.items.len = 0;
+ var line_hash = std.StringHashMapUnmanaged(u21){};
+ defer line_hash.deinit(allocator);
// e.g. line_array[4] == "Hello\n"
// e.g. line_hash.get("Hello\n") == 4
- // "\x00" is a valid character, but various debuggers don't like it.
- // So we'll insert a junk entry to avoid generating a null character.
- try line_array.append(allocator, "");
-
// Allocate 2/3rds of the space for text1, the rest for text2.
- const chars1 = try diffLinesToCharsMunge(allocator, text1, &line_array, &line_hash, 170);
- const chars2 = try diffLinesToCharsMunge(allocator, text2, &line_array, &line_hash, 255);
+ const chars1 = try diffLinesToCharsMunge(allocator, text1, &line_array, &line_hash, UNICODE_TWO_THIRDS);
+ errdefer allocator.free(chars1);
+ const chars2 = try diffLinesToCharsMunge(allocator, text2, &line_array, &line_hash, UNICODE_ONE_THIRD);
return .{ .chars_1 = chars1, .chars_2 = chars2, .line_array = line_array };
}
+const LinesToCharsResult = struct {
+ chars_1: []const u8,
+ chars_2: []const u8,
+ line_array: ArrayListUnmanaged([]const u8),
+
+ pub fn deinit(self: *LinesToCharsResult, allocator: Allocator) void {
+ allocator.free(self.chars_1);
+ allocator.free(self.chars_2);
+ self.line_array.deinit(allocator);
+ }
+};
+
/// Split a text into a list of strings. Reduce the texts to a string of
/// hashes where each Unicode character represents one line.
/// @param text String to encode.
@@ -670,36 +1008,72 @@ fn diffLinesToCharsMunge(
allocator: std.mem.Allocator,
text: []const u8,
line_array: *ArrayListUnmanaged([]const u8),
- line_hash: *std.StringHashMapUnmanaged(usize),
+ line_hash: *std.StringHashMapUnmanaged(u21),
max_lines: usize,
-) DiffError![]const u8 {
- var line_start: isize = 0;
- var line_end: isize = -1;
- var line: []const u8 = "";
- var chars = ArrayListUnmanaged(u8){};
- // Walk the text, pulling out a Substring for each line.
- // text.split('\n') would would temporarily double our memory footprint.
- // Modifying text would create many large strings to garbage collect.
- while (line_end < @as(isize, @intCast(text.len)) - 1) {
- line_end = b: {
- break :b @as(isize, @intCast(std.mem.indexOf(u8, text[@intCast(line_start)..], "\n") orelse
- break :b @intCast(text.len - 1))) + line_start;
- };
- line = text[@intCast(line_start) .. @as(usize, @intCast(line_start)) + @as(usize, @intCast(line_end + 1 - line_start))];
+) error{OutOfMemory}![]const u8 {
+ var iter = LineIterator{ .text = text };
+ return try diffIteratorToCharsMunge(
+ allocator,
+ line_array,
+ line_hash,
+ &iter,
+ max_lines,
+ );
+}
- if (line_hash.get(line)) |value| {
- try chars.append(allocator, @intCast(value));
+/// Split a text into segments, yielded from an iterator.
+/// Reduce the texts to a string of hashes where each Unicode character
+/// represents one segment.
+///
+/// Iterators must provide: `next()`, which gives the next segment of
+/// the test, and `short_circuit(usize)`, which is called when the
+/// segment limit is reached, and returns the rest of the text. The
+/// parameter provided will be the length of the last segment provided
+/// by `next()`, since the function will not process that segment, and
+/// its text must be included in the remainder.
+///
+/// @param segment_array List of unique string segments.
+/// @param line_hash Map of strings to indices into segment_array.
+/// @param iterator Returns the next segment. Must have functions
+/// next(), returning the next segment, and short_circuit(),
+/// called when max_segments is reached.
+/// @param max_segments Maximum length of lineArray. Limited to
+/// 0x10ffdf.
+/// @return Encoded string.
+fn diffIteratorToCharsMunge(
+ allocator: std.mem.Allocator,
+ segment_array: *ArrayListUnmanaged([]const u8),
+ segment_hash: *std.StringHashMapUnmanaged(u21),
+ iterator: anytype,
+ max_segments: usize,
+) error{OutOfMemory}![]const u8 {
+ // Because we rebase the codepoint off the already counted segments,
+ // this makes the unreachables in the function legitimate:
+ assert(max_segments <= UNICODE_MAX);
+ var chars = ArrayListUnmanaged(u8){};
+ defer chars.deinit(allocator);
+ var codepoint: u21 = CHAR_OFFSET + cast(u21, segment_array.items.len);
+ var char_buf: [4]u8 = undefined;
+ while (iterator.next()) |line| {
+ if (segment_hash.get(line)) |value| {
+ const nbytes = std.unicode.wtf8Encode(value, &char_buf) catch unreachable;
+ try chars.appendSlice(allocator, char_buf[0..nbytes]);
} else {
- if (line_array.items.len == max_lines) {
- // Bail out at 255 because char 256 == char 0.
- line = text[@intCast(line_start)..];
- line_end = @intCast(text.len);
+ if (codepoint - CHAR_OFFSET == max_segments) {
+ // Bail out
+ const final_line = iterator.short_circuit(line.len);
+ try segment_array.append(allocator, final_line);
+ try segment_hash.put(allocator, final_line, codepoint);
+ const nbytes = std.unicode.wtf8Encode(codepoint, &char_buf) catch unreachable;
+ try chars.appendSlice(allocator, char_buf[0..nbytes]);
+ break;
}
- try line_array.append(allocator, line);
- try line_hash.put(allocator, line, line_array.items.len - 1);
- try chars.append(allocator, @intCast(line_array.items.len - 1));
+ try segment_array.append(allocator, line);
+ try segment_hash.put(allocator, line, codepoint);
+ const nbytes = std.unicode.wtf8Encode(codepoint, &char_buf) catch unreachable;
+ try chars.appendSlice(allocator, char_buf[0..nbytes]);
+ codepoint += 1;
}
- line_start = line_end + 1;
}
return try chars.toOwnedSlice(allocator);
}
@@ -709,27 +1083,73 @@ fn diffLinesToCharsMunge(
/// @param diffs List of Diff objects.
/// @param lineArray List of unique strings.
fn diffCharsToLines(
- allocator: std.mem.Allocator,
- diffs: []Diff,
+ allocator: Allocator,
+ char_diffs: *DiffList,
line_array: []const []const u8,
-) DiffError!void {
+) error{OutOfMemory}!DiffList {
var text = ArrayListUnmanaged(u8){};
defer text.deinit(allocator);
-
- for (diffs) |*d| {
- text.items.len = 0;
- var j: usize = 0;
- while (j < d.text.len) : (j += 1) {
- try text.appendSlice(allocator, line_array[d.text[j]]);
+ var diffs = DiffList{};
+ errdefer deinitDiffList(allocator, &diffs);
+ try diffs.ensureUnusedCapacity(allocator, char_diffs.items.len);
+ for (char_diffs.items) |*d| {
+ var cursor: usize = 0;
+ while (cursor < d.text.len) {
+ const cp_len = std.unicode.utf8ByteSequenceLength(d.text[cursor]) catch {
+ @panic("Internal decode error in diffsCharsToLines");
+ };
+ const cp = std.unicode.wtf8Decode(d.text[cursor..][0..cp_len]) catch {
+ @panic("Internal decode error in diffCharsToLines");
+ };
+ try text.appendSlice(allocator, line_array[cp - CHAR_OFFSET]);
+ cursor += cp_len;
}
- d.text = try allocator.dupe(u8, text.items);
+ diffs.appendAssumeCapacity(Diff.init(
+ d.operation,
+ try text.toOwnedSlice(allocator),
+ ));
}
+ return diffs;
}
+/// An iteration struct over lines, which includes the newline if present.
+const LineIterator = struct {
+ cursor: usize = 0,
+ text: []const u8,
+
+ /// Return the next line, including its newline, if one is present.
+ pub fn next(iter: *LineIterator) ?[]const u8 {
+ if (iter.cursor == iter.text.len) return null;
+ const maybe_newline = std.mem.indexOfScalarPos(
+ u8,
+ iter.text,
+ iter.cursor,
+ '\n',
+ );
+ if (maybe_newline) |nl| {
+ const line = iter.text[iter.cursor .. nl + 1];
+ iter.cursor = nl + 1;
+ return line;
+ } else {
+ const line = iter.text[iter.cursor..];
+ iter.cursor = iter.text.len;
+ return line;
+ }
+ }
+
+ /// Terminate the iterator early by returning all remaining text.
+ /// `back_out` parameter is how far before the cursor to slice from.
+ pub fn short_circuit(iter: *LineIterator, back_out: usize) []const u8 {
+ const from = iter.cursor - back_out;
+ iter.cursor = iter.text.len;
+ return iter.text[from..];
+ }
+};
+
/// Reorder and merge like edit sections. Merge equalities.
/// Any edit section can move as long as it doesn't cross an equality.
/// @param diffs List of Diff objects.
-fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!void {
+fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) error{OutOfMemory}!void {
// Add a dummy entry at the end.
try diffs.append(allocator, Diff.init(.equal, ""));
var pointer: usize = 0;
@@ -759,32 +1179,23 @@ fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!vo
// Upon reaching an equality, check for prior redundancies.
if (count_delete + count_insert > 1) {
if (count_delete != 0 and count_insert != 0) {
- // Factor out any common prefixies.
+ // Factor out any common prefixes.
common_length = diffCommonPrefix(text_insert.items, text_delete.items);
if (common_length != 0) {
if ((pointer - count_delete - count_insert) > 0 and
diffs.items[pointer - count_delete - count_insert - 1].operation == .equal)
- {
- // diffs.items[pointer - count_delete - count_insert - 1].text
- // += text_insert.Substring(0, common_length);
-
+ { // The prefix is not at the start of the diffs
const ii = pointer - count_delete - count_insert - 1;
var nt = try allocator.alloc(u8, diffs.items[ii].text.len + common_length);
-
- // try diffs.items[pointer - count_delete - count_insert - 1].text.append(allocator, text_insert.items[0..common_length]);
const ot = diffs.items[ii].text;
@memcpy(nt[0..ot.len], ot);
@memcpy(nt[ot.len..], text_insert.items[0..common_length]);
-
- // allocator.free(diffs.items[ii].text);
diffs.items[ii].text = nt;
+ allocator.free(ot);
} else {
- // diffs.Insert(0, Diff.init(.equal,
- // text_insert.Substring(0, common_length)));
- const text = std.ArrayListUnmanaged(u8){
- .items = try allocator.dupe(u8, text_insert.items[0..common_length]),
- };
- try diffs.insert(allocator, 0, Diff.init(.equal, try allocator.dupe(u8, text.items)));
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ const text = try allocator.dupe(u8, text_insert.items[0..common_length]);
+ diffs.insertAssumeCapacity(0, Diff.init(.equal, text));
pointer += 1;
}
try text_insert.replaceRange(allocator, 0, common_length, &.{});
@@ -794,48 +1205,51 @@ fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!vo
// @ZigPort this seems very wrong
common_length = diffCommonSuffix(text_insert.items, text_delete.items);
if (common_length != 0) {
+ const old_text = diffs.items[pointer].text;
diffs.items[pointer].text = try std.mem.concat(allocator, u8, &.{
text_insert.items[text_insert.items.len - common_length ..],
- diffs.items[pointer].text,
+ old_text,
});
+ allocator.free(old_text);
text_insert.items.len -= common_length;
text_delete.items.len -= common_length;
}
}
// Delete the offending records and add the merged ones.
pointer -= count_delete + count_insert;
- try diffs.replaceRange(allocator, pointer, count_delete + count_insert, &.{});
+ if (count_delete + count_insert > 0) {
+ freeRangeDiffList(allocator, diffs, pointer, count_delete + count_insert);
+ try diffs.replaceRange(allocator, pointer, count_delete + count_insert, &.{});
+ }
if (text_delete.items.len != 0) {
- try diffs.replaceRange(allocator, pointer, 0, &.{
- Diff.init(.delete, try allocator.dupe(u8, text_delete.items)),
- });
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.insertAssumeCapacity(pointer, Diff.init(
+ .delete,
+ try allocator.dupe(u8, text_delete.items),
+ ));
pointer += 1;
}
if (text_insert.items.len != 0) {
- try diffs.replaceRange(allocator, pointer, 0, &.{
- Diff.init(.insert, try allocator.dupe(u8, text_insert.items)),
- });
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.insertAssumeCapacity(pointer, Diff.init(
+ .insert,
+ try allocator.dupe(u8, text_insert.items),
+ ));
pointer += 1;
}
pointer += 1;
} else if (pointer != 0 and diffs.items[pointer - 1].operation == .equal) {
// Merge this equality with the previous one.
- // TODO: Fix using realloc or smth
-
+ // Diff texts are []const u8 so a realloc isn't practical here
var nt = try allocator.alloc(u8, diffs.items[pointer - 1].text.len + diffs.items[pointer].text.len);
-
- // try diffs.items[pointer - count_delete - count_insert - 1].text.append(allocator, text_insert.items[0..common_length]);
const ot = diffs.items[pointer - 1].text;
+ defer (allocator.free(ot));
@memcpy(nt[0..ot.len], ot);
@memcpy(nt[ot.len..], diffs.items[pointer].text);
-
- // allocator.free(diffs.items[pointer - 1].text);
diffs.items[pointer - 1].text = nt;
- // allocator.free(diffs.items[pointer].text);
-
- // try diffs.items[pointer - 1].text.append(allocator, diffs.items[pointer].text.items);
- _ = diffs.orderedRemove(pointer);
+ const dead_diff = diffs.orderedRemove(pointer);
+ allocator.free(dead_diff.text);
} else {
pointer += 1;
}
@@ -849,7 +1263,6 @@ fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!vo
if (diffs.items[diffs.items.len - 1].text.len == 0) {
diffs.items.len -= 1;
}
-
// Second pass: look for single edits surrounded on both sides by
// equalities which can be shifted sideways to eliminate an equality.
// e.g: ABAC -> ABAC
@@ -862,51 +1275,40 @@ fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!vo
{
// This is a single edit surrounded by equalities.
if (std.mem.endsWith(u8, diffs.items[pointer].text, diffs.items[pointer - 1].text)) {
- // Shift the edit over the previous equality.
- // diffs.items[pointer].text = diffs.items[pointer - 1].text +
- // diffs.items[pointer].text[0 .. diffs.items[pointer].text.len -
- // diffs.items[pointer - 1].text.len];
- // diffs.items[pointer + 1].text = diffs.items[pointer - 1].text + diffs.items[pointer + 1].text;
-
+ const old_pt = diffs.items[pointer].text;
const pt = try std.mem.concat(allocator, u8, &.{
diffs.items[pointer - 1].text,
diffs.items[pointer].text[0 .. diffs.items[pointer].text.len -
diffs.items[pointer - 1].text.len],
});
+ allocator.free(old_pt);
+ diffs.items[pointer].text = pt;
+ const old_pt1t = diffs.items[pointer + 1].text;
const p1t = try std.mem.concat(allocator, u8, &.{
diffs.items[pointer - 1].text,
diffs.items[pointer + 1].text,
});
-
- // allocator.free(diffs.items[pointer].text);
- // allocator.free(diffs.items[pointer + 1].text);
-
- diffs.items[pointer].text = pt;
+ allocator.free(old_pt1t);
diffs.items[pointer + 1].text = p1t;
-
+ freeRangeDiffList(allocator, diffs, pointer - 1, 1);
try diffs.replaceRange(allocator, pointer - 1, 1, &.{});
changes = true;
} else if (std.mem.startsWith(u8, diffs.items[pointer].text, diffs.items[pointer + 1].text)) {
- // Shift the edit over the next equality.
- // diffs.items[pointer - 1].text += diffs.items[pointer + 1].text;
- // diffs.items[pointer].text =
- // diffs.items[pointer].text[diffs.items[pointer + 1].text.len..] + diffs.items[pointer + 1].text;
-
+ const old_ptm1 = diffs.items[pointer - 1].text;
const pm1t = try std.mem.concat(allocator, u8, &.{
diffs.items[pointer - 1].text,
diffs.items[pointer + 1].text,
});
+ allocator.free(old_ptm1);
+ diffs.items[pointer - 1].text = pm1t;
+ const old_pt = diffs.items[pointer].text;
const pt = try std.mem.concat(allocator, u8, &.{
diffs.items[pointer].text[diffs.items[pointer + 1].text.len..],
diffs.items[pointer + 1].text,
});
-
- // allocator.free(diffs.items[pointer - 1].text);
- // allocator.free(diffs.items[pointer].text);
-
- diffs.items[pointer - 1].text = pm1t;
+ allocator.free(old_pt);
diffs.items[pointer].text = pt;
-
+ freeRangeDiffList(allocator, diffs, pointer + 1, 1);
try diffs.replaceRange(allocator, pointer + 1, 1, &.{});
changes = true;
}
@@ -922,32 +1324,34 @@ fn diffCleanupMerge(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!vo
/// Reduce the number of edits by eliminating semantically trivial
/// equalities.
/// @param diffs List of Diff objects.
-fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError!void {
+pub fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) error{OutOfMemory}!void {
var changes = false;
// Stack of indices where equalities are found.
- var equalities = ArrayListUnmanaged(isize){};
+ var equalities = ArrayListUnmanaged(usize){};
+ defer equalities.deinit(allocator);
// Always equal to equalities[equalitiesLength-1][1]
var last_equality: ?[]const u8 = null;
- var pointer: isize = 0; // Index of current position.
+ var pointer: usize = 0; // Index of current position.
// Number of characters that changed prior to the equality.
var length_insertions1: usize = 0;
var length_deletions1: usize = 0;
// Number of characters that changed after the equality.
var length_insertions2: usize = 0;
var length_deletions2: usize = 0;
+ var reset_pointer = false;
while (pointer < diffs.items.len) {
- if (diffs.items[@intCast(pointer)].operation == .equal) { // Equality found.
+ if (diffs.items[pointer].operation == .equal) { // Equality found.
try equalities.append(allocator, pointer);
length_insertions1 = length_insertions2;
length_deletions1 = length_deletions2;
length_insertions2 = 0;
length_deletions2 = 0;
- last_equality = diffs.items[@intCast(pointer)].text;
+ last_equality = diffs.items[pointer].text;
} else { // an insertion or deletion
- if (diffs.items[@intCast(pointer)].operation == .insert) {
- length_insertions2 += diffs.items[@intCast(pointer)].text.len;
+ if (diffs.items[pointer].operation == .insert) {
+ length_insertions2 += diffs.items[pointer].text.len;
} else {
- length_deletions2 += diffs.items[@intCast(pointer)].text.len;
+ length_deletions2 += diffs.items[pointer].text.len;
}
// Eliminate an equality that is smaller or equal to the edits on both
// sides of it.
@@ -956,19 +1360,26 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError
(last_equality.?.len <= @max(length_insertions2, length_deletions2)))
{
// Duplicate record.
- try diffs.insert(
- allocator,
- @intCast(equalities.items[equalities.items.len - 1]),
- Diff.init(.delete, try allocator.dupe(u8, last_equality.?)),
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.insertAssumeCapacity(
+ equalities.items[equalities.items.len - 1],
+ Diff.init(
+ .delete,
+ try allocator.dupe(u8, last_equality.?),
+ ),
);
// Change second copy to insert.
- diffs.items[@intCast(equalities.items[equalities.items.len - 1] + 1)].operation = .insert;
+ diffs.items[equalities.items[equalities.items.len - 1] + 1].operation = .insert;
// Throw away the equality we just deleted.
_ = equalities.pop();
if (equalities.items.len > 0) {
_ = equalities.pop();
}
- pointer = if (equalities.items.len > 0) equalities.items[equalities.items.len - 1] else -1;
+ if (equalities.items.len > 0) {
+ pointer = equalities.items[equalities.items.len - 1];
+ } else {
+ reset_pointer = true;
+ }
length_insertions1 = 0; // Reset the counters.
length_deletions1 = 0;
length_insertions2 = 0;
@@ -977,7 +1388,12 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError
changes = true;
}
}
- pointer += 1;
+ if (reset_pointer) {
+ pointer = 0;
+ reset_pointer = false;
+ } else {
+ pointer += 1;
+ }
}
// Normalize the diff.
@@ -994,11 +1410,11 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError
// Only extract an overlap if it is as big as the edit ahead or behind it.
pointer = 1;
while (pointer < diffs.items.len) {
- if (diffs.items[@intCast(pointer - 1)].operation == .delete and
- diffs.items[@intCast(pointer)].operation == .insert)
+ if (diffs.items[pointer - 1].operation == .delete and
+ diffs.items[pointer].operation == .insert)
{
- const deletion = diffs.items[@intCast(pointer - 1)].text;
- const insertion = diffs.items[@intCast(pointer)].text;
+ const deletion = diffs.items[pointer - 1].text;
+ const insertion = diffs.items[pointer].text;
const overlap_length1: usize = diffCommonOverlap(deletion, insertion);
const overlap_length2: usize = diffCommonOverlap(insertion, deletion);
if (overlap_length1 >= overlap_length2) {
@@ -1007,15 +1423,20 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError
{
// Overlap found.
// Insert an equality and trim the surrounding edits.
- try diffs.insert(
- allocator,
- @intCast(pointer),
- Diff.init(.equal, try allocator.dupe(u8, insertion[0..overlap_length1])),
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.insertAssumeCapacity(
+ pointer,
+ Diff.init(
+ .equal,
+ try allocator.dupe(u8, insertion[0..overlap_length1]),
+ ),
);
- diffs.items[@intCast(pointer - 1)].text =
+ diffs.items[pointer - 1].text =
try allocator.dupe(u8, deletion[0 .. deletion.len - overlap_length1]);
- diffs.items[@intCast(pointer + 1)].text =
+ allocator.free(deletion);
+ diffs.items[pointer + 1].text =
try allocator.dupe(u8, insertion[overlap_length1..]);
+ allocator.free(insertion);
pointer += 1;
}
} else {
@@ -1024,17 +1445,23 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError
{
// Reverse overlap found.
// Insert an equality and swap and trim the surrounding edits.
- try diffs.insert(
- allocator,
- @intCast(pointer),
- Diff.init(.equal, try allocator.dupe(u8, deletion[0..overlap_length2])),
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.insertAssumeCapacity(
+ pointer,
+ Diff.init(
+ .equal,
+ try allocator.dupe(u8, deletion[0..overlap_length2]),
+ ),
);
- diffs.items[@intCast(pointer - 1)].operation = .insert;
- diffs.items[@intCast(pointer - 1)].text =
- try allocator.dupe(u8, insertion[0 .. insertion.len - overlap_length2]);
- diffs.items[@intCast(pointer + 1)].operation = .delete;
- diffs.items[@intCast(pointer + 1)].text =
- try allocator.dupe(u8, deletion[overlap_length2..]);
+ const new_minus = try allocator.dupe(u8, insertion[0 .. insertion.len - overlap_length2]);
+ errdefer allocator.free(new_minus); // necessary due to swap
+ const new_plus = try allocator.dupe(u8, deletion[overlap_length2..]);
+ allocator.free(deletion);
+ allocator.free(insertion);
+ diffs.items[pointer - 1].operation = .insert;
+ diffs.items[pointer - 1].text = new_minus;
+ diffs.items[pointer + 1].operation = .delete;
+ diffs.items[pointer + 1].text = new_plus;
pointer += 1;
}
}
@@ -1050,7 +1477,7 @@ fn diffCleanupSemantic(allocator: std.mem.Allocator, diffs: *DiffList) DiffError
pub fn diffCleanupSemanticLossless(
allocator: std.mem.Allocator,
diffs: *DiffList,
-) DiffError!void {
+) error{OutOfMemory}!void {
var pointer: usize = 1;
// Intentionally ignore the first and last element (don't need checking).
while (pointer < @as(isize, @intCast(diffs.items.len)) - 1) {
@@ -1073,17 +1500,15 @@ pub fn diffCleanupSemanticLossless(
// First, shift the edit as far left as possible.
const common_offset = diffCommonSuffix(equality_1.items, edit.items);
if (common_offset > 0) {
- // TODO: Use buffer
const common_string = try allocator.dupe(u8, edit.items[edit.items.len - common_offset ..]);
defer allocator.free(common_string);
equality_1.items.len = equality_1.items.len - common_offset;
- // edit.items.len = edit.items.len - common_offset;
const not_common = try allocator.dupe(u8, edit.items[0 .. edit.items.len - common_offset]);
defer allocator.free(not_common);
- edit.items.len = 0;
+ edit.clearRetainingCapacity();
try edit.appendSlice(allocator, common_string);
try edit.appendSlice(allocator, not_common);
@@ -1136,16 +1561,24 @@ pub fn diffCleanupSemanticLossless(
if (!std.mem.eql(u8, diffs.items[pointer - 1].text, best_equality_1.items)) {
// We have an improvement, save it back to the diff.
if (best_equality_1.items.len != 0) {
+ const old_text = diffs.items[pointer - 1].text;
diffs.items[pointer - 1].text = try allocator.dupe(u8, best_equality_1.items);
+ allocator.free(old_text);
} else {
- _ = diffs.orderedRemove(pointer - 1);
+ const old_diff = diffs.orderedRemove(pointer - 1);
+ allocator.free(old_diff.text);
pointer -= 1;
}
+ const old_text1 = diffs.items[pointer].text;
diffs.items[pointer].text = try allocator.dupe(u8, best_edit.items);
+ defer allocator.free(old_text1);
if (best_equality_2.items.len != 0) {
+ const old_text2 = diffs.items[pointer + 1].text;
diffs.items[pointer + 1].text = try allocator.dupe(u8, best_equality_2.items);
+ allocator.free(old_text2);
} else {
- _ = diffs.orderedRemove(pointer + 1);
+ const old_diff = diffs.orderedRemove(pointer + 1);
+ allocator.free(old_diff.text);
pointer -= 1;
}
}
@@ -1180,10 +1613,8 @@ fn diffCleanupSemanticScore(one: []const u8, two: []const u8) usize {
const lineBreak1 = whitespace1 and std.ascii.isControl(char1);
const lineBreak2 = whitespace2 and std.ascii.isControl(char2);
const blankLine1 = lineBreak1 and
- // BLANKLINEEND.IsMatch(one);
(std.mem.endsWith(u8, one, "\n\n") or std.mem.endsWith(u8, one, "\n\r\n"));
const blankLine2 = lineBreak2 and
- // BLANKLINESTART.IsMatch(two);
(std.mem.startsWith(u8, two, "\n\n") or
std.mem.startsWith(u8, two, "\r\n\n") or
std.mem.startsWith(u8, two, "\n\r\n") or
@@ -1208,29 +1639,20 @@ fn diffCleanupSemanticScore(one: []const u8, two: []const u8) usize {
return 0;
}
-// Define some regex patterns for matching boundaries.
-// private Regex BLANKLINEEND = new Regex("\\n\\r?\\n\\Z");
-// \n\n
-// \n\r\n
-// private Regex BLANKLINESTART = new Regex("\\A\\r?\\n\\r?\\n");
-// \n\n
-// \r\n\n
-// \n\r\n
-// \r\n\r\n
-
/// Reduce the number of edits by eliminating operationally trivial
/// equalities.
pub fn diffCleanupEfficiency(
dmp: DiffMatchPatch,
allocator: std.mem.Allocator,
diffs: *DiffList,
-) DiffError!void {
+) error{OutOfMemory}!void {
var changes = false;
// Stack of indices where equalities are found.
- var equalities = DiffList{};
+ var equalities = std.ArrayList(usize).init(allocator);
+ defer equalities.deinit();
// Always equal to equalities[equalitiesLength-1][1]
- var last_equality = "";
- var pointer: isize = 0; // Index of current position.
+ var last_equality: []const u8 = "";
+ var ipointer: isize = 0; // Index of current position.
// Is there an insertion operation before the last equality.
var pre_ins = false;
// Is there a deletion operation before the last equality.
@@ -1239,11 +1661,12 @@ pub fn diffCleanupEfficiency(
var post_ins = false;
// Is there a deletion operation after the last equality.
var post_del = false;
- while (pointer < diffs.Count) {
+ while (ipointer < diffs.items.len) {
+ const pointer: usize = @intCast(ipointer);
if (diffs.items[pointer].operation == .equal) { // Equality found.
if (diffs.items[pointer].text.len < dmp.diff_edit_cost and (post_ins or post_del)) {
// Candidate found.
- equalities.Push(pointer);
+ try equalities.append(pointer);
pre_ins = post_ins;
pre_del = post_del;
last_equality = diffs.items[pointer].text;
@@ -1266,16 +1689,19 @@ pub fn diffCleanupEfficiency(
// ABXC
// AXCD
// ABXC
- if ((last_equality.Length != 0) and
+ if ((last_equality.len != 0) and
((pre_ins and pre_del and post_ins and post_del) or
- ((last_equality.Length < dmp.diff_edit_cost / 2) and
- ((if (pre_ins) 1 else 0) + (if (pre_del) 1 else 0) + (if (post_ins) 1 else 0) + (if (post_del) 1 else 0)) == 3)))
+ ((last_equality.len < dmp.diff_edit_cost / 2) and
+ (boolInt(pre_ins) + boolInt(pre_del) + boolInt(post_ins) + boolInt(post_del) == 3))))
{
// Duplicate record.
- try diffs.insert(
- allocator,
+ try diffs.ensureUnusedCapacity(allocator, 1);
+ diffs.insertAssumeCapacity(
equalities.items[equalities.items.len - 1],
- Diff.init(.delete, try allocator.dupe(u8, last_equality)),
+ Diff.init(
+ .delete,
+ try allocator.dupe(u8, last_equality),
+ ),
);
// Change second copy to insert.
diffs.items[equalities.items[equalities.items.len - 1] + 1].operation = .insert;
@@ -1291,14 +1717,14 @@ pub fn diffCleanupEfficiency(
_ = equalities.pop();
}
- pointer = if (equalities.items.len > 0) equalities.items[equalities.items.len - 1] else -1;
+ ipointer = if (equalities.items.len > 0) @intCast(equalities.items[equalities.items.len - 1]) else -1;
post_ins = false;
post_del = false;
}
changes = true;
}
}
- pointer += 1;
+ ipointer += 1;
}
if (changes) {
@@ -1339,10 +1765,10 @@ fn diffCommonOverlap(text1_in: []const u8, text2_in: []const u8) usize {
// Performance analysis: https://neil.fraser.name/news/2010/11/04/
var best: usize = 0;
var length: usize = 1;
- while (true) {
+ const best_idx = idx: while (true) {
const pattern = text1[text_length - length ..];
const found = std.mem.indexOf(u8, text2, pattern) orelse
- return best;
+ break :idx best;
length += found;
@@ -1350,855 +1776,4262 @@ fn diffCommonOverlap(text1_in: []const u8, text2_in: []const u8) usize {
best = length;
length += 1;
}
+ };
+ if (best_idx == 0) return best_idx;
+ // This would mean a truncation: lead or follow, followed by a follow
+ // which differs (or it would be included in our overlap).
+ // TODO this currently appears to be dead code, keep an eye on that.
+ // Reasoning: we're looking for a suffix which matches a prefix, and
+ // we've already assured that edits end with a follow byte, and begin
+ // with a lead byte, ASCII being both for our purposes. So a split
+ // should not be possible.
+ // I'm going to add a panic just so I know if test cases of any sort
+ // trigger this code path.
+ // XXX Remove this before merge if it can't be triggered.
+ if (is_follow(text2[best_idx])) {
+ // back out
+ return fixSplitBackward(text2, best_idx);
}
+ return best_idx;
}
-// pub fn main() void {
-// var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
-// defer arena.deinit();
-
-// var bruh = default.diff(arena.allocator(), "Hello World.", "Goodbye World.", true);
-// std.log.err("{any}", .{bruh});
-// }
-
-// test {
-// var arena = std.heap.ArenaAllocator.init(testing.allocator);
-// defer arena.deinit();
+/// loc is a location in text1, compute and return the equivalent location in
+/// text2.
+/// e.g. "The cat" vs "The big cat", 1->1, 5->8
+/// @param diffs List of Diff objects.
+/// @param loc Location within text1.
+/// @return Location within text2.
+///
+pub fn diffIndex(diffs: DiffList, u_loc: usize) usize {
+ var chars1: isize = 0;
+ var chars2: isize = 0;
+ var last_chars1: isize = 0;
+ var last_chars2: isize = 0;
+ const loc: isize = @intCast(u_loc);
+ // Dummy diff
+ var last_diff: Diff = Diff{ .operation = .equal, .text = "" };
+ for (diffs.items) |a_diff| {
+ if (a_diff.operation != .insert) {
+ // Equality or deletion.
+ chars1 += @intCast(a_diff.text.len);
+ }
+ if (a_diff.operation != .delete) {
+ // Equality or insertion.
+ chars2 += @intCast(a_diff.text.len);
+ }
+ if (chars1 > loc) {
+ // Overshot the location.
+ last_diff = a_diff;
+ break;
+ }
+ }
+ last_chars1 = chars1;
+ last_chars2 = chars2;
-// var bruh = try default.diff(arena.allocator(), "Hello World.", "Goodbye World.", true);
-// try diffCleanupSemantic(arena.allocator(), &bruh);
-// for (bruh.items) |b| {
-// std.log.err("{any}", .{b});
-// }
+ if (last_diff.text.len != 0 and last_diff.operation == .delete) {
+ // The location was deleted.
+ return @intCast(last_chars2);
+ }
+ // Add the remaining character length.
+ return @intCast(last_chars2 + (loc - last_chars1));
+}
-// // for (bruh.items) |b| {
-// // std.log.err("{s} {s}", .{ switch (b.operation) {
-// // .equal => "",
-// // .insert => "+",
-// // .delete => "-",
-// // }, b.text });
-// // }
-// }
+/// A struct holding bookends for `diffPrittyFormat(diffs)`.
+///
+/// May include a function taking an allocator and the Diff,
+/// which shall return the text of the Diff, appropriately munged.
+/// This allows for tasks like proper HTML escaping. Note that if
+/// the function is provided, all text returned will be freed, so
+/// it should always return a copy whether or not edits are needed.
+pub const DiffDecorations = struct {
+ delete_start: []const u8 = "",
+ delete_end: []const u8 = "",
+ insert_start: []const u8 = "",
+ insert_end: []const u8 = "",
+ equals_start: []const u8 = "",
+ equals_end: []const u8 = "",
+ pre_process: ?fn (Allocator, Diff) error{OutOfMemory}![]const u8 = null,
+};
-// TODO: Allocate all text in diffs to
-// not cause segfault while freeing; not a problem
-// at the moment because we don't free anything :P
+/// Decorations for classic Xterm printing: red for delete and
+/// green for insert.
+pub const xterm_classic = DiffDecorations{
+ .delete_start = "\x1b[91m",
+ .delete_end = "\x1b[m",
+ .insert_start = "\x1b[92m",
+ .insert_end = "\x1b[m",
+};
-test diffCommonPrefix {
- // Detect any common suffix.
- try testing.expectEqual(@as(usize, 0), diffCommonPrefix("abc", "xyz")); // Null case
- try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234abcdef", "1234xyz")); // Non-null case
- try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234", "1234xyz")); // Whole case
+/// Return text representing a pretty-formatted `DiffList`.
+/// See `DiffDecorations` for how to customize this output.
+pub fn diffPrettyFormat(
+ allocator: Allocator,
+ diffs: DiffList,
+ deco: DiffDecorations,
+) ![]const u8 {
+ var out = std.ArrayList(u8).init(allocator);
+ defer out.deinit();
+ const writer = out.writer();
+ _ = try writeDiffPrettyFormat(allocator, writer, diffs, deco);
+ return out.toOwnedSlice();
}
-test diffCommonSuffix {
- // Detect any common suffix.
- try testing.expectEqual(@as(usize, 0), diffCommonSuffix("abc", "xyz")); // Null case
- try testing.expectEqual(@as(usize, 4), diffCommonSuffix("abcdef1234", "xyz1234")); // Non-null case
- try testing.expectEqual(@as(usize, 4), diffCommonSuffix("1234", "xyz1234")); // Whole case
+/// Pretty-print a diff for output to a terminal.
+pub fn diffPrettyFormatXTerm(allocator: Allocator, diffs: DiffList) ![]const u8 {
+ return try diffPrettyFormat(allocator, diffs, xterm_classic);
}
-test diffCommonOverlap {
- // Detect any suffix/prefix overlap.
- try testing.expectEqual(@as(usize, 0), diffCommonOverlap("", "abcd")); // Null case
- try testing.expectEqual(@as(usize, 3), diffCommonOverlap("abc", "abcd")); // Whole case
- try testing.expectEqual(@as(usize, 0), diffCommonOverlap("123456", "abcd")); // No overlap
- try testing.expectEqual(@as(usize, 3), diffCommonOverlap("123456xxx", "xxxabcd")); // Overlap
-
- // Some overly clever languages (C#) may treat ligatures as equal to their
- // component letters. E.g. U+FB01 == 'fi'
- try testing.expectEqual(@as(usize, 0), diffCommonOverlap("fi", "\u{fb01}")); // Unicode
+/// Write a pretty-formatted `DiffList` to `writer`. The `Allocator`
+/// is only used if a custom text formatter is defined for
+/// `DiffDecorations`. Returns number of bytes written.
+pub fn writeDiffPrettyFormat(
+ allocator: Allocator,
+ writer: anytype,
+ diffs: DiffList,
+ deco: DiffDecorations,
+) !usize {
+ var written: usize = 0;
+ for (diffs.items) |d| {
+ const text = if (deco.pre_process) |lambda|
+ try lambda(allocator, d)
+ else
+ d.text;
+ defer {
+ if (deco.pre_process) |_|
+ allocator.free(text);
+ }
+ switch (d.operation) {
+ .delete => {
+ //
+ written += try writer.write(deco.delete_start);
+ written += try writer.write(text);
+ written += try writer.write(deco.delete_end);
+ },
+ .insert => {
+ written += try writer.write(deco.insert_start);
+ written += try writer.write(text);
+ written += try writer.write(deco.insert_end);
+ },
+ .equal => {
+ written += try writer.write(deco.equals_start);
+ written += try writer.write(text);
+ written += try writer.write(deco.equals_end);
+ },
+ }
+ }
+ return written;
}
-test diffHalfMatch {
- var arena = std.heap.ArenaAllocator.init(testing.allocator);
- defer arena.deinit();
-
- var one_timeout = DiffMatchPatch{};
- one_timeout.diff_timeout = 1;
+///
+/// Compute and return the source text (all equalities and deletions).
+/// @param diffs List of `Diff` objects.
+/// @return Source text.
+///
+pub fn diffBeforeText(allocator: Allocator, diffs: DiffList) error{OutOfMemory}![]const u8 {
+ var chars = ArrayListUnmanaged(u8){};
+ defer chars.deinit(allocator);
+ for (diffs.items) |d| {
+ if (d.operation != .insert) {
+ try chars.appendSlice(allocator, d.text);
+ }
+ }
+ return chars.toOwnedSlice(allocator);
+}
- try testing.expectEqual(
- @as(?HalfMatchResult, null),
- try one_timeout.diffHalfMatch(arena.allocator(), "1234567890", "abcdef"),
- ); // No match #1
- try testing.expectEqual(
- @as(?HalfMatchResult, null),
- try one_timeout.diffHalfMatch(arena.allocator(), "12345", "23"),
- ); // No match #2
+///
+/// Compute and return the destination text (all equalities and insertions).
+/// @param diffs List of `Diff` objects.
+/// @return Destination text.
+///
+pub fn diffAfterText(allocator: Allocator, diffs: DiffList) error{OutOfMemory}![]const u8 {
+ var chars = ArrayListUnmanaged(u8){};
+ defer chars.deinit(allocator);
+ for (diffs.items) |d| {
+ if (d.operation != .delete) {
+ try chars.appendSlice(allocator, d.text);
+ }
+ }
+ return chars.toOwnedSlice(allocator);
+}
- // Single matches
- try testing.expectEqualDeep(@as(?HalfMatchResult, HalfMatchResult{
- .prefix_before = "12",
- .suffix_before = "90",
- .prefix_after = "a",
- .suffix_after = "z",
- .common_middle = "345678",
- }), try one_timeout.diffHalfMatch(arena.allocator(), "1234567890", "a345678z")); // Single Match #1
-
- try testing.expectEqualDeep(@as(?HalfMatchResult, HalfMatchResult{
- .prefix_before = "a",
- .suffix_before = "z",
- .prefix_after = "12",
- .suffix_after = "90",
- .common_middle = "345678",
- }), try one_timeout.diffHalfMatch(arena.allocator(), "a345678z", "1234567890")); // Single Match #2
-
- try testing.expectEqualDeep(@as(?HalfMatchResult, HalfMatchResult{
- .prefix_before = "abc",
- .suffix_before = "z",
- .prefix_after = "1234",
- .suffix_after = "0",
- .common_middle = "56789",
- }), try one_timeout.diffHalfMatch(arena.allocator(), "abc56789z", "1234567890")); // Single Match #3
-
- try testing.expectEqualDeep(@as(?HalfMatchResult, HalfMatchResult{
- .prefix_before = "a",
- .suffix_before = "xyz",
- .prefix_after = "1",
- .suffix_after = "7890",
- .common_middle = "23456",
- }), try one_timeout.diffHalfMatch(arena.allocator(), "a23456xyz", "1234567890")); // Single Match #4
-
- // Multiple matches
- try testing.expectEqualDeep(
- @as(?HalfMatchResult, HalfMatchResult{
- .prefix_before = "12123",
- .suffix_before = "123121",
- .prefix_after = "a",
- .suffix_after = "z",
- .common_middle = "1234123451234",
- }),
- try one_timeout.diffHalfMatch(arena.allocator(), "121231234123451234123121", "a1234123451234z"),
- ); // Multiple Matches #1
+///
+/// Compute the Levenshtein distance; the number of inserted,
+/// deleted or substituted characters.
+///
+/// @param diffs List of Diff objects.
+/// @return Number of changes.
+///
+pub fn diffLevenshtein(diffs: DiffList) f64 {
+ var inserts: usize = 0;
+ var deletes: usize = 0;
+ var levenshtein: usize = 0;
+ for (diffs.items) |a_diff| {
+ switch (a_diff.operation) {
+ .insert => {
+ inserts += a_diff.text.len;
+ },
+ .delete => {
+ deletes += a_diff.text.len;
+ },
+ .equal => {
+ // A deletion and an insertion is one substitution.
+ levenshtein = @max(inserts, deletes);
+ inserts = 0;
+ deletes = 0;
+ },
+ }
+ }
+
+ return @floatFromInt(levenshtein + @max(inserts, deletes));
+}
+
+test diffLevenshtein {
+ const allocator = testing.allocator;
+ // These diffs don't get text freed
+ {
+ var diffs = DiffList{};
+ defer diffs.deinit(allocator);
+ try diffs.appendSlice(allocator, &.{
+ Diff.init(.delete, "abc"),
+ Diff.init(.insert, "1234"),
+ Diff.init(.equal, "xyz"),
+ });
+ try testing.expectEqual(4, diffLevenshtein(diffs));
+ }
+ {
+ var diffs = DiffList{};
+ defer diffs.deinit(allocator);
+ try diffs.appendSlice(allocator, &.{
+ Diff.init(.equal, "xyz"),
+ Diff.init(.delete, "abc"),
+ Diff.init(.insert, "1234"),
+ });
+ try testing.expectEqual(4, diffLevenshtein(diffs));
+ }
+ {
+ var diffs = DiffList{};
+ defer diffs.deinit(allocator);
+ try diffs.appendSlice(allocator, &.{
+ Diff.init(.delete, "abc"),
+ Diff.init(.equal, "xyz"),
+ Diff.init(.insert, "1234"),
+ });
+ try testing.expectEqual(7, diffLevenshtein(diffs));
+ }
+}
+
+//| MATCH FUNCTIONS
+
+/// Locate the best instance of 'pattern' in 'text' near 'loc'.
+/// Returns -1 if no match found.
+/// @param text The text to search.
+/// @param pattern The pattern to search for.
+/// @param loc The location to search around.
+/// @return Best match index or -1.
+pub fn matchMain(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ text: []const u8,
+ pattern: []const u8,
+ passed_loc: usize,
+) error{OutOfMemory}!?usize {
+ // Clamp the loc to fit within text.
+ const loc = @min(passed_loc, text.len);
+ if (std.mem.eql(u8, text, pattern)) {
+ // Shortcut
+ return 0;
+ } else if (text.len == 0) {
+ // Nothing to match.
+ return null;
+ } else if (loc + pattern.len <= text.len and std.mem.eql(u8, text[loc .. loc + pattern.len], pattern)) {
+ // Perfect match at the perfect spot! (Includes case of null pattern)
+ return loc;
+ } else {
+ // Do a fuzzy compare.
+ return dmp.matchBitap(allocator, text, pattern, loc);
+ }
+}
+
+const sh_one: u64 = 1;
+
+/// Locate the best instance of `pattern` in `text` near `loc` using the
+/// Bitap algorithm. Returns -1 if no match found.
+///
+/// @param text The text to search.
+/// @param pattern The pattern to search for.
+/// @param loc The location to search around.
+/// @return Best match index or -1.
+fn matchBitap(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ text: []const u8,
+ pattern: []const u8,
+ loc: usize,
+) error{OutOfMemory}!?usize {
+ // TODO decide what to do here:
+ // assert (Match_MaxBits == 0 || pattern.Length <= Match_MaxBits)
+ // : "Pattern too long for this application.";
+ assert(text.len != 0 and pattern.len != 0);
+
+ // Initialise the alphabet.
+ var map = try matchAlphabet(allocator, pattern);
+ defer map.deinit();
+ // Highest score beyond which we give up.
+ var score_threshold = dmp.match_threshold;
+ // Is there a nearby exact match? (speedup)
+ // TODO obviously if we want a speedup here, we do this:
+ // if (threshold == 0.0) return best_loc; #proof in comments
+ // We don't have to unwrap best_loc because the retval is ?usize already
+ // #proof axiom: threshold is between 0.0 and 1.0 (doc comment)
+ var best_loc = std.mem.indexOfPos(u8, text, loc, pattern);
+ if (best_loc) |best| { // #proof this returns 0.0 for exact match (see comments in function)
+ score_threshold = @min(dmp.matchBitapScore(0, best, loc, pattern), score_threshold);
+ }
+ // What about in the other direction? (speedup)
+ const trunc_text = text[0..@min(loc + pattern.len, text.len)];
+ best_loc = std.mem.lastIndexOf(u8, trunc_text, pattern);
+ if (best_loc) |best| { // #proof same here obviously
+ score_threshold = @min(dmp.matchBitapScore(0, best, loc, pattern), score_threshold);
+ }
+ // Initialise the bit arrays.
+ const shift: u6 = @intCast(pattern.len - 1);
+ const matchmask = sh_one << shift;
+ best_loc = null;
+ // Zig is very insistent about integer width and signedness.
+ const i_textlen: isize = @intCast(text.len);
+ const i_patlen: isize = @intCast(pattern.len);
+
+ const i_loc: isize = @intCast(loc);
+ var bin_min: isize = undefined;
+ var bin_mid: isize = undefined;
+ var bin_max: isize = i_patlen + i_textlen;
+ // null last_rd to simplify freeing memory
+ var last_rd: []usize = try allocator.alloc(usize, 0);
+ errdefer allocator.free(last_rd);
+ for (0..pattern.len) |d| {
+ // Scan for the best match; each iteration allows for one more error.
+ // Run a binary search to determine how far from 'loc' we can stray at
+ // this error level.
+ bin_min = 0;
+ bin_mid = bin_max;
+ while (bin_min < bin_mid) {
+ // #proof lemma: if threshold == 0.0, this never happens
+ if (dmp.matchBitapScore(d, @intCast(i_loc + bin_mid), loc, pattern) <= score_threshold) {
+ bin_min = bin_mid;
+ } else {
+ bin_max = bin_mid;
+ }
+ bin_mid = @divTrunc(bin_max - bin_min, 2) + bin_min;
+ }
+ // Use the result from this iteration as the maximum for the next.
+ bin_max = bin_mid;
+ var start: usize = @intCast(@max(1, i_loc - bin_mid + 1));
+ const finish: usize = @intCast(@min(i_loc + bin_mid, i_textlen) + i_patlen);
+ var rd: []usize = try allocator.alloc(usize, finish + 2);
+ errdefer allocator.free(rd);
+ const dshift: u6 = @intCast(d);
+ rd[finish + 1] = (sh_one << dshift) - 1;
+ var j = finish;
+ while (j >= start) : (j -= 1) {
+ const char_match: usize = if (text.len <= j - 1 or !map.contains(text[j - 1]))
+ // Out of range.
+ 0
+ else
+ map.get(text[j - 1]).?;
+ if (d == 0) {
+ // First pass: exact match.
+ rd[j] = ((rd[j + 1] << 1) | 1) & char_match;
+ } else {
+ // Subsequent passes: fuzzy match.
+ rd[j] = ((rd[j + 1] << 1) | 1) & char_match | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1];
+ }
+ if ((rd[j] & matchmask) != 0) {
+ const score = dmp.matchBitapScore(d, j - 1, loc, pattern);
+ // This match will almost certainly be better than any existing
+ // match. But check anyway.
+ // #proof: the smoking gun. This can only be equal not less.
+ if (score <= score_threshold) {
+ // Told you so.
+ score_threshold = score;
+ best_loc = j - 1;
+ if (best_loc.? > loc) {
+ // When passing loc, don't exceed our current distance from loc.
+ const i_best_loc: isize = @intCast(best_loc.?);
+ start = @max(1, 2 * i_loc - i_best_loc);
+ } else {
+ // Already passed loc, downhill from here on in.
+ break;
+ }
+ }
+ }
+ } // #proof Anything else will do this.
+ // #proof d + 1 starts at 1, so (see function) this will always break.
+ if (dmp.matchBitapScore(d + 1, loc, loc, pattern) > score_threshold) {
+ // No hope for a (better) match at greater error levels.
+ allocator.free(rd);
+ break;
+ }
+ allocator.free(last_rd);
+ last_rd = rd;
+ }
+ allocator.free(last_rd);
+ return best_loc;
+}
+
+/// Compute and return the score for a match with e errors and x location.
+/// @param e Number of errors in match.
+/// @param x Location of match.
+/// @param loc Expected location of match.
+/// @param pattern Pattern being sought.
+/// @return Overall score for match (0.0 = good, 1.0 = bad).
+fn matchBitapScore(
+ dmp: DiffMatchPatch,
+ e: usize,
+ x: usize,
+ loc: usize,
+ pattern: []const u8,
+) f64 {
+ // shortcut? TODO, proof in comments
+ // if (e == 0 and x == loc) return 0.0;
+ const e_float: f64 = @floatFromInt(e);
+ const len_float: f64 = @floatFromInt(pattern.len);
+ // if e == 0, accuracy == 0: 0/x = 0
+ const accuracy = e_float / len_float;
+ // if loc == x, proximity == 0
+ const proximity = if (loc >= x) loc - x else x - loc;
+ if (dmp.match_distance == 0) {
+ // Dodge divide by zero
+ if (proximity == 0) // therefore this returns 0
+ return accuracy
+ else
+ return 1.0;
+ }
+ const float_match: f64 = @floatFromInt(dmp.match_distance);
+ const float_proximity: f64 = @floatFromInt(proximity);
+ // or this is 0 + 0/f_m aka 0
+ return accuracy + (float_proximity / float_match);
+}
+
+/// Initialise the alphabet for the Bitap algorithm.
+/// @param pattern The text to encode.
+/// @return Hash of character locations.
+fn matchAlphabet(allocator: Allocator, pattern: []const u8) error{OutOfMemory}!std.AutoHashMap(u8, usize) {
+ var map = std.AutoHashMap(u8, usize).init(allocator);
+ errdefer map.deinit();
+ for (pattern) |c| {
+ if (!map.contains(c)) {
+ try map.put(c, 0);
+ }
+ }
+ for (pattern, 0..) |c, i| {
+ const shift: u6 = @intCast(pattern.len - i - 1);
+ const value: usize = map.get(c).? | (@as(usize, 1) << shift);
+ try map.put(c, value);
+ }
+ return map;
+}
+
+//| PATCH FUNCTIONS
+
+/// Increase the context until it is unique, but don't let the pattern
+/// expand beyond DiffMatchPatch.match_max_bits.
+///
+/// @param patch The patch to grow.
+/// @param text Source text.
+fn patchAddContext(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ patch: *Patch,
+ text: []const u8,
+) error{OutOfMemory}!void {
+ if (text.len == 0) return;
+ // TODO the fixup logic here might make patterns too large?
+ // It should be ok, because big patches get broken up. Hmm.
+ // Also, the SimpleNote maintained branch does it this way.
+ var padding: usize = 0;
+ { // Grow the pattern around the patch until unique, to set padding amount.
+ var pattern = text[patch.start2 .. patch.start2 + patch.length1];
+ const max_width: usize = dmp.match_max_bits - (2 * dmp.patch_margin);
+ while (std.mem.indexOf(u8, text, pattern) != std.mem.lastIndexOf(u8, text, pattern) and pattern.len < max_width) {
+ padding += dmp.patch_margin;
+ const pat_start = if (padding > patch.start2) 0 else patch.start2 - padding;
+ const pat_end = @min(text.len, patch.start2 + patch.length1 + padding);
+ pattern = text[pat_start..pat_end];
+ }
+ }
+ // Add one chunk for good luck.
+ padding += dmp.patch_margin;
+ // Add the prefix.
+ const prefix = pre: {
+ var pre_start = if (padding > patch.start2) 0 else patch.start2 - padding;
+ // Make sure we're not breaking a codepoint.
+ pre_start = fixSplitBackward(text, pre_start);
+ // Assuming we did everything else right, pre_end should be
+ // properly placed.
+ break :pre text[pre_start..patch.start2];
+ };
+ if (prefix.len != 0) {
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ patch.diffs.insertAssumeCapacity(0, Diff.init(
+ .equal,
+ try allocator.dupe(u8, prefix),
+ ));
+ }
+ // Add the suffix.
+ const suffix = post: {
+ const post_start = patch.start2 + patch.length1;
+ var post_end = @min(text.len, patch.start2 + patch.length1 + padding);
+ // Prevent broken codepoints here as well
+ post_end = fixSplitForward(text, post_end);
+ break :post text[post_start..post_end];
+ };
+ if (suffix.len != 0) {
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ patch.diffs.appendAssumeCapacity(
+ Diff.init(
+ .equal,
+ try allocator.dupe(u8, suffix),
+ ),
+ );
+ }
+ // Roll back the start points.
+ patch.start1 -= prefix.len;
+ patch.start2 -= prefix.len;
+ // Extend the lengths.
+ patch.length1 += prefix.len + suffix.len;
+ patch.length2 += prefix.len + suffix.len;
+}
+
+/// Determines how to handle Diffs in a patch. Functions which create
+/// the diffs internally can use `.own`: the Diffs will be copied to
+/// the patch list, new ones allocated, and old ones freed. Then call
+/// `deinit` on the DiffList, but not `deinitDiffList`. This *must not*
+/// be used if the DiffList is not immediately freed, because some of
+/// the diffs will contain spuriously empty text.
+///
+/// Functions which operate on an existing DiffList should use `.copy`:
+/// as the name indicates, copies of the Diffs will be made, and the
+/// original memory must be freed separately.
+const DiffHandling = enum {
+ copy,
+ own,
+};
+
+pub fn diffAndMakePatch(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ text1: []const u8,
+ text2: []const u8,
+) error{OutOfMemory}!PatchList {
+ var diffs = try dmp.diff(allocator, text1, text2, true);
+ defer deinitDiffList(allocator, &diffs);
+ if (diffs.items.len > 2) {
+ try diffCleanupSemantic(allocator, &diffs);
+ try dmp.diffCleanupEfficiency(allocator, &diffs);
+ }
+ return try dmp.makePatchInternal(allocator, text1, diffs, .own);
+}
+
+/// @return List of Patch objects.
+fn makePatchInternal(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ text: []const u8,
+ diffs: DiffList,
+ diff_act: DiffHandling,
+) error{OutOfMemory}!PatchList {
+ var patches = PatchList{};
+ errdefer deinitPatchList(allocator, &patches);
+ if (diffs.items.len == 0) {
+ return patches; // Empty diff means empty patchlist
+ }
+
+ var char_count1: usize = 0;
+ var char_count2: usize = 0;
+ // This avoids freeing the original copy of the text:
+ var first_patch = true;
+ var prepatch_text = text;
+ defer {
+ if (!first_patch)
+ allocator.free(prepatch_text);
+ }
+ // Calculate amount of extra bytes needed.
+ // This should let the allocator reuse freed space.
+ var extra: isize = 0;
+ for (diffs.items) |a_diff| {
+ switch (a_diff.operation) {
+ .insert => {
+ extra += @intCast(a_diff.text.len);
+ },
+ .delete => {
+ extra -= @intCast(a_diff.text.len);
+ },
+ .equal => continue,
+ }
+ }
+ const extra_u: usize = if (extra > 0) @intCast(extra) else 0;
+ const dummy_diff = Diff{ .operation = .equal, .text = "" };
+ var postpatch = try std.ArrayList(u8).initCapacity(allocator, text.len + extra_u);
+ defer postpatch.deinit();
+ postpatch.appendSliceAssumeCapacity(text);
+ var patch = Patch{};
+ for (diffs.items, 0..) |a_diff, i| {
+ errdefer patch.deinit(allocator);
+ if (patch.diffs.items.len == 0 and a_diff.operation != .equal) {
+ patch.start1 = char_count1;
+ patch.start2 = char_count2;
+ }
+ switch (a_diff.operation) {
+ .insert => {
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ const d = the_diff: {
+ if (diff_act == .copy) {
+ const new = try a_diff.clone(allocator);
+ break :the_diff new;
+ } else {
+ assert(a_diff.eql(diffs.items[i]));
+ diffs.items[i] = dummy_diff;
+ break :the_diff a_diff;
+ }
+ };
+ patch.diffs.appendAssumeCapacity(d);
+ patch.length2 += a_diff.text.len;
+ try postpatch.insertSlice(char_count2, a_diff.text);
+ },
+ .delete => {
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ const d = the_diff: {
+ if (diff_act == .copy) {
+ const new = try a_diff.clone(allocator);
+ break :the_diff new;
+ } else {
+ assert(a_diff.eql(diffs.items[i]));
+ diffs.items[i] = dummy_diff;
+ break :the_diff a_diff;
+ }
+ };
+ patch.diffs.appendAssumeCapacity(d);
+ patch.length1 += a_diff.text.len;
+ try postpatch.replaceRange(char_count2, a_diff.text.len, "");
+ },
+ .equal => {
+ //
+ if (a_diff.text.len <= 2 * dmp.patch_margin and patch.diffs.items.len != 0 and !a_diff.eql(diffs.getLast())) {
+ // Small equality inside a patch.
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ const d = the_diff: {
+ if (diff_act == .copy) {
+ const new = try a_diff.clone(allocator);
+ break :the_diff new;
+ } else {
+ assert(a_diff.eql(diffs.items[i]));
+ diffs.items[i] = dummy_diff;
+ break :the_diff a_diff;
+ }
+ };
+ patch.diffs.appendAssumeCapacity(d);
+ patch.length1 += a_diff.text.len;
+ patch.length2 += a_diff.text.len;
+ }
+ if (a_diff.text.len >= 2 * dmp.patch_margin) {
+ // Time for a new patch.
+ if (patch.diffs.items.len != 0) {
+ // Free the Diff if we own it.
+ if (diff_act == .own) {
+ assert(a_diff.eql(diffs.items[i]));
+ allocator.free(a_diff.text);
+ diffs.items[i] = dummy_diff;
+ }
+ try dmp.patchAddContext(allocator, &patch, prepatch_text);
+ try patches.ensureUnusedCapacity(allocator, 1);
+ patches.appendAssumeCapacity(patch);
+ patch = Patch{};
+ // Unlike Unidiff, our patch lists have a rolling context.
+ // https://github.com/google/diff-match-patch/wiki/Unidiff
+ // Update prepatch text & pos to reflect the application of the
+ // just completed patch.
+ const free_patch_text = prepatch_text;
+ prepatch_text = try allocator.dupe(u8, postpatch.items);
+ if (first_patch) {
+ // no free on first, we don't own the original text
+ first_patch = false;
+ } else {
+ allocator.free(free_patch_text);
+ }
+ char_count1 = char_count2;
+ }
+ }
+ },
+ }
+ // Update the current character count.
+ if (a_diff.operation != .insert) {
+ char_count1 += a_diff.text.len;
+ }
+ if (a_diff.operation != .delete) {
+ char_count2 += a_diff.text.len;
+ }
+ } // end for loop
+ errdefer patch.deinit(allocator);
+ // Pick up the leftover patch if not empty.
+ if (patch.diffs.items.len != 0) {
+ try dmp.patchAddContext(allocator, &patch, prepatch_text);
+ try patches.ensureUnusedCapacity(allocator, 1);
+ patches.appendAssumeCapacity(patch);
+ }
+ return patches;
+}
+
+/// Compute a list of patches to turn text1 into text2.
+/// text2 is not provided, diffs are the delta between text1 and text2.
+///
+/// @param text1 Old text.
+/// @param diffs Array of Diff objects for text1 to text2.
+pub fn makePatch(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ text: []const u8,
+ diffs: DiffList,
+) error{OutOfMemory}!PatchList {
+ return try dmp.makePatchInternal(allocator, text, diffs, .copy);
+}
+
+pub fn makePatchFromDiffs(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ diffs: DiffList,
+) error{OutOfMemory}!PatchList {
+ const text1 = try diffBeforeText(allocator, diffs);
+ defer allocator.free(text1);
+ return try dmp.makePatch(allocator, text1, diffs);
+}
+
+/// Merge a set of patches onto the text. Returns a tuple: the first of which
+/// is the patched text, the second of which is...
+///
+/// TODO I'm just going to return a boolean saying whether all patches
+/// were successful. Rethink this at some point. Possibility: build up a
+/// patch string with all unsuccessful patches, it's a legible plain-text
+/// format containing the failed edits, which could be converted into a patch
+/// again, or used directly in an error message, or the slop turned up on the
+/// dmp object and the patch reattempted. The delta allows us to adjust any
+/// failed patches so they "fit" the next text.
+///
+/// @param patches Array of Patch objects
+/// @param text Old text.
+/// @return Two element Object array, containing the new text and an array of
+/// bool values.
+pub fn patchApply(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ og_patches: *PatchList,
+ og_text: []const u8,
+) error{OutOfMemory}!struct { []const u8, bool } {
+ if (og_patches.items.len == 0) {
+ // As silly as this is, we dupe the text, because something
+ // passing an empty patchset isn't going to check, and will
+ // end up double-freeing if we don't. Going with 'true' as
+ // the null patchset was successfully 'applied' here.
+ return .{ try allocator.dupe(u8, og_text), true };
+ }
+ // So we can report if all patches were applied:
+ var all_applied = true;
+ // Deep copy the patches so that no changes are made to originals.
+ var patches = try patchListClone(allocator, og_patches);
+ defer deinitPatchList(allocator, &patches);
+ const null_padding = try dmp.patchAddPadding(allocator, &patches);
+ defer allocator.free(null_padding);
+ var text = try std.ArrayList(u8).initCapacity(allocator, og_text.len + 2 * null_padding.len);
+ defer text.deinit();
+ text.appendSliceAssumeCapacity(null_padding);
+ text.appendSliceAssumeCapacity(og_text);
+ text.appendSliceAssumeCapacity(null_padding);
+ try dmp.patchSplitMax(allocator, &patches);
+ // delta keeps track of the offset between the expected and actual
+ // location of the previous patch. If there are patches expected at
+ // positions 10 and 20, but the first patch was found at 12, delta is 2
+ // and the second patch has an effective expected position of 22.
+ var delta: isize = 0;
+ for (patches.items) |a_patch| {
+ const expected_loc = cast(usize, (cast(isize, a_patch.start2) + delta));
+ const text1 = try diffBeforeText(allocator, a_patch.diffs);
+ defer allocator.free(text1);
+ var maybe_start: ?usize = null;
+ var maybe_end: ?usize = null;
+ const m_max_b = dmp.match_max_bits;
+ if (text1.len > m_max_b) {
+ // patchSplitMax will only provide an oversized pattern
+ // in the case of a monster delete.
+ maybe_start = try dmp.matchMain(
+ allocator,
+ text.items,
+ text1[0..m_max_b],
+ expected_loc,
+ );
+ if (maybe_start) |start| {
+ // Ok because we tested and text1.len is larger.
+ const e_start = text1.len - m_max_b;
+ maybe_end = try dmp.matchMain(
+ allocator,
+ text.items,
+ text1[e_start..],
+ e_start + expected_loc,
+ );
+ // No match if a) no end_loc or b) the matches cross each other.
+ if (maybe_end) |end| {
+ if (start >= end) {
+ maybe_start = null;
+ }
+ } else {
+ maybe_start = null;
+ }
+ }
+ } else {
+ maybe_start = try dmp.matchMain(allocator, text.items, text1, expected_loc);
+ }
+ if (maybe_start) |start| {
+ // Found a match. :)
+ delta = cast(isize, start) - cast(isize, expected_loc);
+ // results[x] = true;
+ const text2 = t2: {
+ if (maybe_end) |end| {
+ break :t2 text.items[start..@min(end + m_max_b, text.items.len)];
+ } else {
+ break :t2 text.items[start..@min(start + text1.len, text.items.len)];
+ }
+ };
+ if (std.mem.eql(u8, text1, text2)) {
+ // Perfect match, just shove the replacement text in.
+ const diff_text = try diffAfterText(allocator, a_patch.diffs);
+ defer allocator.free(diff_text);
+ try text.replaceRange(start, text1.len, diff_text);
+ } else {
+ // Imperfect match. Run a diff to get a framework of equivalent
+ // indices.
+ var diffs = try dmp.diff(
+ allocator,
+ text1,
+ text2,
+ false,
+ );
+ defer deinitDiffList(allocator, &diffs);
+ const t1_l_float: f64 = @floatFromInt(text1.len);
+ const levenshtein: f64 = diffLevenshtein(diffs);
+ const bad_match = levenshtein / t1_l_float > dmp.patch_delete_threshold;
+ if (text1.len > m_max_b and bad_match) {
+ // The end points match, but the content is unacceptably bad.
+ // results[x] = false;
+ all_applied = false;
+ } else {
+ try diffCleanupSemanticLossless(allocator, &diffs);
+ var index1: usize = 0;
+ for (a_patch.diffs.items) |a_diff| {
+ if (a_diff.operation != .equal) {
+ const index2 = diffIndex(diffs, index1);
+ if (a_diff.operation == .insert) {
+ // Insertion
+ try text.insertSlice(start + index2, a_diff.text);
+ } else if (a_diff.operation == .delete) {
+ // Deletion
+ const delete_at = diffIndex(diffs, index1 + a_diff.text.len) - index2;
+ text.replaceRangeAssumeCapacity(
+ start + index2,
+ delete_at,
+ &.{},
+ );
+ }
+ }
+ if (a_diff.operation != .delete) {
+ index1 += a_diff.text.len;
+ }
+ }
+ }
+ }
+ } else {
+ // No match found. :(
+ all_applied = false;
+ // Subtract the delta for this failed patch from subsequent patches.
+ delta -= cast(isize, a_patch.length2) - cast(isize, a_patch.length1);
+ }
+ }
+ // strip padding
+ text.replaceRangeAssumeCapacity(0, null_padding.len, &.{});
+ text.items.len -= null_padding.len;
+ return .{ try text.toOwnedSlice(), all_applied };
+}
+
+// Look through the patches and break up any which are longer than the
+// maximum limit of the match algorithm.
+// Intended to be called only from within patchApply.
+// @param patches List of Patch objects.
+fn patchSplitMax(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ patches: *PatchList,
+) error{OutOfMemory}!void {
+ const patch_size = dmp.match_max_bits;
+ const patch_margin = dmp.patch_margin;
+ const max_patch_len = patch_size - patch_margin;
+ // Mutating an array while iterating it? Sure, lets!
+ var x_i: isize = 0;
+ while (x_i < patches.items.len) : (x_i += 1) {
+ const x: usize = @intCast(x_i);
+ if (patches.items[x].length1 <= patch_size) continue;
+
+ // We have a big ol' patch.
+ var bigpatch = patches.orderedRemove(x);
+ defer bigpatch.deinit(allocator);
+ // Prevent incrementing past the next patch:
+ x_i -= 1;
+ var start1 = bigpatch.start1;
+ var start2 = bigpatch.start2;
+ // start with an empty precontext so that we can deinit consistently
+ var precontext: []const u8 = try allocator.alloc(u8, 0);
+ while (bigpatch.diffs.items.len != 0) {
+ var guard_precontext = true;
+ errdefer {
+ if (guard_precontext) {
+ allocator.free(precontext);
+ }
+ }
+ // Create one of several smaller patches.
+ var patch = Patch{};
+ errdefer patch.deinit(allocator);
+ var empty = true;
+ patch.start1 = start1 - precontext.len;
+ patch.start2 = start2 - precontext.len;
+ if (precontext.len != 0) {
+ patch.length2 = precontext.len;
+ patch.length1 = precontext.len;
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ guard_precontext = false;
+ patch.diffs.appendAssumeCapacity(
+ Diff{
+ .operation = .equal,
+ .text = precontext,
+ },
+ );
+ }
+ while (bigpatch.diffs.items.len != 0 and patch.length1 < max_patch_len) {
+ const diff_type = bigpatch.diffs.items[0].operation;
+ const diff_text = bigpatch.diffs.items[0].text;
+ if (diff_type == .insert) {
+ // Insertions are harmless.
+ patch.length2 += diff_text.len;
+ start2 += diff_text.len;
+ // Move the patch (transfers ownership)
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ patch.diffs.appendAssumeCapacity(bigpatch.diffs.orderedRemove(0));
+ empty = false;
+ } else if (patch.diffs.items.len == 1 and cond: {
+ // zig fmt simply will not line break if clauses :/
+ const a = diff_type == .delete;
+ const b = patch.diffs.items[0].operation == .equal;
+ const c = diff_text.len > 2 * patch_size;
+ break :cond a and b and c;
+ }) {
+ // This is a large deletion. Let it pass in one chunk.
+ patch.length1 += diff_text.len;
+ start1 += diff_text.len;
+ empty = false;
+ // Transfer to patch:
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ patch.diffs.appendAssumeCapacity(bigpatch.diffs.orderedRemove(0));
+ } else {
+ // Deletion or equality. Only take as much as we can stomach.
+ // Note: because this is an internal function, we don't care
+ // about codepoint splitting, which won't affect the final
+ // result.
+ const text_end = @min(diff_text.len, patch_size - patch.length1 - patch_margin);
+ const new_diff_text = diff_text[0..text_end];
+ patch.length1 += new_diff_text.len;
+ start1 += new_diff_text.len;
+ if (diff_type == .equal) {
+ patch.length2 += new_diff_text.len;
+ start2 += new_diff_text.len;
+ } else {
+ empty = false;
+ }
+ // Now check if we did anything.
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ if (new_diff_text.len == diff_text.len) {
+ // We can reuse the diff.
+ patch.diffs.appendAssumeCapacity(bigpatch.diffs.orderedRemove(0));
+ } else {
+ // Free and dupe
+ patch.diffs.appendAssumeCapacity(Diff{
+ .operation = diff_type,
+ .text = try allocator.dupe(u8, new_diff_text),
+ });
+ const old_diff = bigpatch.diffs.items[0];
+ bigpatch.diffs.items[0] = Diff{
+ .operation = diff_type,
+ .text = try allocator.dupe(u8, diff_text[new_diff_text.len..]),
+ };
+ allocator.free(old_diff.text);
+ }
+ }
+ }
+ // Append the end context for this patch.
+ const post_text = try diffBeforeText(allocator, bigpatch.diffs);
+ const postcontext = post: {
+ if (post_text.len > patch_margin) {
+ defer allocator.free(post_text);
+ const truncated = try allocator.dupe(u8, post_text[0..patch_margin]);
+ break :post truncated;
+ } else {
+ break :post post_text;
+ }
+ };
+ var guard_postcontext = true;
+ errdefer {
+ if (guard_postcontext) {
+ allocator.free(postcontext);
+ }
+ }
+ // Compute the head context for the next patch, if we're going to
+ // need it.
+ if (bigpatch.diffs.items.len != 0) {
+ const after_text = try diffAfterText(allocator, patch.diffs);
+ if (patch_margin > after_text.len) {
+ precontext = after_text;
+ } else {
+ defer allocator.free(after_text);
+ precontext = try allocator.dupe(u8, after_text[after_text.len - patch_margin ..]);
+ }
+ guard_precontext = true;
+ }
+ if (postcontext.len != 0) {
+ try patch.diffs.ensureUnusedCapacity(allocator, 1);
+ patch.length1 += postcontext.len;
+ patch.length2 += postcontext.len;
+ const last_diff = patch.diffs.getLastOrNull();
+ if (last_diff != null and last_diff.?.operation == .equal) {
+ // Free this diff and swap in a new one.
+ defer {
+ allocator.free(last_diff.?.text);
+ allocator.free(postcontext);
+ guard_postcontext = false;
+ }
+ patch.diffs.items.len -= 1;
+ const new_diff_text = try std.mem.concat(
+ allocator,
+ u8,
+ &.{
+ last_diff.?.text,
+ postcontext,
+ },
+ );
+ patch.diffs.appendAssumeCapacity(
+ Diff{ .operation = .equal, .text = new_diff_text },
+ );
+ } else {
+ // New diff from postcontext.
+ patch.diffs.appendAssumeCapacity(
+ Diff{ .operation = .equal, .text = postcontext },
+ );
+ }
+ guard_postcontext = false;
+ }
+ if (!empty) {
+ // Insert the next patch
+ // Goes after x, and we need increment to skip:
+ x_i += 1;
+ try patches.insert(allocator, @intCast(x_i), patch);
+ } else {
+ patch.deinit(allocator);
+ }
+ } // We don't use the last precontext
+ // allocator.free(precontext);
+ }
+}
+
+/// Add some padding on text start and end so that edges can match something.
+/// Intended to be called only from within patchApply.
+/// @param patches Array of Patch objects.
+/// @return The padding string added to each side.
+fn patchAddPadding(
+ dmp: DiffMatchPatch,
+ allocator: Allocator,
+ patches: *PatchList,
+) error{OutOfMemory}![]const u8 {
+ if (patches.items.len == 0) return "";
+ const pad_len = dmp.patch_margin;
+ var paddingcodes = try std.ArrayList(u8).initCapacity(allocator, pad_len);
+ defer paddingcodes.deinit();
+
+ {
+ var control_code: u8 = 1;
+ while (control_code <= pad_len) : (control_code += 1) {
+ paddingcodes.appendAssumeCapacity(control_code);
+ }
+ }
+ // Bump all the patches forward.
+ for (patches.items) |*a_patch| {
+ a_patch.*.start1 += pad_len;
+ a_patch.*.start2 += pad_len;
+ }
+ // Add some padding on start of first diff.
+ var patch_start = &patches.items[0];
+ var diffs_start = &patch_start.diffs;
+ if (diffs_start.items.len == 0 or diffs_start.items[0].operation != .equal) {
+ // Add nullPadding equality.
+ try diffs_start.ensureUnusedCapacity(allocator, 1);
+ diffs_start.insertAssumeCapacity(
+ 0,
+ Diff{
+ .operation = .equal,
+ .text = try allocator.dupe(u8, paddingcodes.items),
+ },
+ );
+ // Should be 0 due to prior patch bump
+ patch_start.start1 -= pad_len;
+ assert(patch_start.start1 == 0);
+ patch_start.start2 -= pad_len;
+ assert(patch_start.start2 == 0);
+ patch_start.length1 += pad_len;
+ patch_start.length2 += pad_len;
+ // patches.items[0].diffs = diffs_start;
+ } else if (pad_len > diffs_start.items[0].text.len) {
+ // Grow first equality.
+ var diff1 = &diffs_start.items[0];
+ const old_diff_text = diff1.text;
+ const extra_len = pad_len - diff1.text.len;
+ diff1.text = try std.mem.concat(
+ allocator,
+ u8,
+ &.{ paddingcodes.items[diff1.text.len..], diff1.text },
+ );
+ allocator.free(old_diff_text);
+ patch_start.start1 -= extra_len;
+ patch_start.start2 -= extra_len;
+ patch_start.length1 += extra_len;
+ patch_start.length2 += extra_len;
+ }
+ // Add some padding on end of last diff.
+ var patch_end = &patches.items[patches.items.len - 1];
+ var diffs_end = &patch_end.diffs;
+ if ((diffs_end.items.len == 0) or (diffs_end.getLast().operation != .equal)) {
+ // Add nullPadding equality.
+ try diffs_end.ensureUnusedCapacity(allocator, 1);
+ diffs_end.appendAssumeCapacity(
+ Diff{
+ .operation = .equal,
+ .text = try allocator.dupe(u8, paddingcodes.items),
+ },
+ );
+ patch_end.length1 += pad_len;
+ patch_end.length2 += pad_len;
+ } else if (pad_len > diffs_end.getLast().text.len) {
+ // Grow last equality.
+ var last_diff = &diffs_end.items[diffs_end.items.len - 1];
+ const old_diff_text = last_diff.text;
+ const extra_len = pad_len - last_diff.text.len;
+ last_diff.text = try std.mem.concat(
+ allocator,
+ u8,
+ &.{ last_diff.text, paddingcodes.items[0..extra_len] },
+ );
+ allocator.free(old_diff_text);
+ patch_end.length2 += extra_len;
+ patch_end.length1 += extra_len;
+ }
+ return paddingcodes.toOwnedSlice();
+}
+
+/// Given an array of patches, return another array that is identical.
+/// @param patches Array of Patch objects.
+/// @return Array of Patch objects.
+fn patchListClone(allocator: Allocator, patches: *PatchList) error{OutOfMemory}!PatchList {
+ var new_patches = PatchList{};
+ errdefer deinitPatchList(allocator, &new_patches);
+ try new_patches.ensureTotalCapacity(allocator, patches.items.len);
+ for (patches.items) |patch| {
+ new_patches.appendAssumeCapacity(try patch.clone(allocator));
+ }
+ return new_patches;
+}
+
+/// Take a list of patches and return a textual representation.
+/// @param patches List of Patch objects.
+/// @return Text representation of patches.
+pub fn patchToText(allocator: Allocator, patches: PatchList) error{OutOfMemory}![]const u8 {
+ var text_array = std.ArrayList(u8).init(allocator);
+ defer text_array.deinit();
+ const writer = text_array.writer();
+ try writePatch(writer, patches);
+ return text_array.toOwnedSlice();
+}
+
+/// Stream a `PatchList` to the provided Writer.
+pub fn writePatch(writer: anytype, patches: PatchList) !void {
+ for (patches.items) |a_patch| {
+ try a_patch.writeText(writer);
+ }
+}
+
+/// Parse a textual representation of patches and return a List of Patch
+/// objects.
+/// @param textline Text representation of patches.
+/// @return List of Patch objects.
+/// @throws ArgumentException If invalid input.
+pub fn patchFromText(allocator: Allocator, text: []const u8) DiffError!PatchList {
+ if (text.len == 0) return PatchList{};
+ var patches = PatchList{};
+ errdefer deinitPatchList(allocator, &patches);
+ var cursor: usize = 0;
+ while (cursor < text.len) {
+ // TODO catch BadPatchString here and print diagnostic
+ try patches.ensureUnusedCapacity(allocator, 1);
+ const cursor_delta, const patch = try patchFromHeader(allocator, text[cursor..]);
+ cursor += cursor_delta;
+ patches.appendAssumeCapacity(patch);
+ }
+ return patches;
+}
+
+fn patchFromHeader(allocator: Allocator, text: []const u8) DiffError!struct { usize, Patch } {
+ var patch = Patch{ .diffs = DiffList{} };
+ errdefer patch.deinit(allocator);
+ var cursor: usize = undefined;
+ if (std.mem.eql(u8, text[0..4], PATCH_HEAD)) {
+ // Parse location and length in before text
+ const count = 4 + countDigits(text[4..]);
+ if (count == 4) return error.BadPatchString;
+ patch.start1 = std.fmt.parseInt(
+ usize,
+ text[4..count],
+ 10,
+ ) catch return error.BadPatchString;
+ cursor = count;
+ if (text[cursor] != ',') {
+ patch.start1 -= 1;
+ patch.length1 = 1;
+ } else {
+ cursor += 1;
+ const delta = countDigits(text[cursor..]);
+ patch.length1 = std.fmt.parseInt(
+ usize,
+ text[cursor .. cursor + delta],
+ 10,
+ ) catch return error.BadPatchString;
+ if (delta == 0) return error.BadPatchString;
+ cursor += delta;
+ if (patch.length1 != 0) {
+ patch.start1 -= 1;
+ }
+ }
+ } else return error.BadPatchString;
+ // Parse location and length in after text.
+ if (text[cursor] == ' ' and text[cursor + 1] == '+') {
+ cursor += 2;
+ const delta1 = countDigits(text[cursor..]);
+ if (delta1 == 0) return error.BadPatchString;
+ patch.start2 = std.fmt.parseInt(
+ usize,
+ text[cursor .. cursor + delta1],
+ 10,
+ ) catch return error.BadPatchString;
+ cursor += delta1;
+ if (text[cursor] != ',') {
+ patch.start2 -= 1;
+ patch.length2 = 1;
+ } else {
+ cursor += 1;
+ const delta2 = countDigits(text[cursor..]);
+ if (delta2 == 0) return error.BadPatchString;
+ patch.length2 = std.fmt.parseInt(
+ usize,
+ text[cursor .. cursor + delta2],
+ 10,
+ ) catch return error.BadPatchString;
+ cursor += delta2;
+ if (patch.length2 != 0) {
+ patch.start2 -= 1;
+ }
+ }
+ } else return error.BadPatchString;
+ if (cursor + 4 <= text.len and std.mem.eql(u8, text[cursor .. cursor + 4], PATCH_TAIL)) {
+ cursor += 4;
+ } else return error.BadPatchString;
+ // Eat the diffs
+ var patch_lines = std.mem.splitScalar(
+ u8,
+ text[cursor..],
+ '\n',
+ );
+ // `splitScalar` means blank lines, but we need that to
+ // track the cursor.
+ patch_loop: while (patch_lines.next()) |line| {
+ cursor += line.len + 1;
+ if (line.len == 0) continue;
+ // Microsoft encodes spaces as +, we don't, so we don't need this:
+ // line = line.Replace("+", "%2b");
+ const diff_line = decodeUri(allocator, line[1..]) catch |e| {
+ switch (e) {
+ error.OutOfMemory => return e,
+ else => return error.BadPatchString,
+ }
+ };
+ errdefer allocator.free(diff_line);
+ switch (line[0]) {
+ '+' => { // Insertion
+ try patch.diffs.append(
+ allocator,
+ Diff{
+ .operation = .insert,
+ .text = diff_line,
+ },
+ );
+ },
+ '-' => { // Deletion
+ try patch.diffs.append(
+ allocator,
+ Diff{
+ .operation = .delete,
+ .text = diff_line,
+ },
+ );
+ },
+ ' ' => { // Minor equality
+ try patch.diffs.append(
+ allocator,
+ Diff{
+ .operation = .equal,
+ .text = diff_line,
+ },
+ );
+ },
+ '@' => { // Start of next patch
+ // back out cursor
+ allocator.free(diff_line);
+ cursor -= line.len + 1;
+ break :patch_loop;
+ },
+ else => return error.BadPatchString,
+ }
+ } // end while
+ return .{ cursor, patch };
+}
+
+/// Decode our URI-esque escaping
+fn decodeUri(allocator: Allocator, line: []const u8) DiffError![]const u8 {
+ if (std.mem.indexOf(u8, line, "%")) |first| {
+ // Text to decode.
+ // Result will always be shorter than line:
+ var new_line = try std.ArrayList(u8).initCapacity(allocator, line.len);
+ defer new_line.deinit();
+ try new_line.appendSlice(line[0..first]);
+ var out_buf: [1]u8 = .{0};
+ var codeunit = std.fmt.hexToBytes(
+ &out_buf,
+ line[first + 1 .. first + 3],
+ ) catch return error.BadPatchString;
+ try new_line.append(codeunit[0]);
+ var cursor = first + 3;
+ while (std.mem.indexOfScalarPos(u8, line, cursor, '%')) |next| {
+ codeunit = std.fmt.hexToBytes(
+ &out_buf,
+ line[next + 1 .. next + 3],
+ ) catch return error.BadPatchString;
+ try new_line.append(codeunit[0]);
+ cursor = next + 3;
+ } else {
+ try new_line.appendSlice(line[cursor..]);
+ }
+ return new_line.toOwnedSlice();
+ } else {
+ return allocator.dupe(u8, line);
+ }
+}
+
+///
+/// Borrowed from https://github.com/elerch/aws-sdk-for-zig/blob/master/src/aws_http.zig
+/// under the MIT license. Thanks!
+///
+/// Modified to implement Unidiff escaping, documented here:
+/// https://github.com/google/diff-match-patch/wiki/Unidiff
+///
+/// The documentation reads:
+///
+/// > Special characters are encoded using %xx notation. The set of
+/// > characters which are encoded matches JavaScript's `encodeURI()`
+/// > function, with the exception of spaces which are not encoded.
+///
+/// So we encode everything but the characters defined by Moz:
+/// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
+///
+/// These: !#$&'()*+,-./:;=?@_~ (and alphanumeric ASCII)
+///
+/// There is a nice contiguous run of 10 symbols between `&` and `/`, which we
+/// can test in two comparisons, leaving these assorted:
+///
+/// !#$:;=?@_~
+///
+/// Each URI encoded byte is formed by a '%' and the two-digit
+/// hexadecimal value of the byte.
+///
+/// Letters in the hexadecimal value must be uppercase, for example "%1A".
+///
+fn writeUriEncoded(writer: anytype, text: []const u8) !usize {
+ const remaining_characters = "!#$:;=?@_~";
+ var written: usize = 0;
+ for (text) |c| {
+ const should_encode = should: {
+ if (c == ' ' or std.ascii.isAlphanumeric(c)) {
+ break :should false;
+ }
+ if ('&' <= c and c <= '/') {
+ break :should false;
+ }
+ for (remaining_characters) |r| {
+ if (r == c) {
+ break :should false;
+ }
+ }
+ break :should true;
+ };
+
+ if (!should_encode) {
+ try writer.writeByte(c);
+ written += 1;
+ continue;
+ }
+ // Whatever remains, encode it
+ try writer.writeByte('%');
+ written += 1;
+ const hexen = std.fmt.bytesToHex(&[_]u8{c}, .upper);
+ written += try writer.write(&hexen);
+ }
+ return written;
+}
+
+fn encodeUri(allocator: std.mem.Allocator, text: []const u8) ![]u8 {
+ var charlist = try std.ArrayList(u8).initCapacity(allocator, text.len);
+ defer charlist.deinit();
+ const writer = charlist.writer();
+ _ = try writeUriEncoded(writer, text);
+ return charlist.toOwnedSlice();
+}
+
+//|
+//| UTILITIES
+//|
+
+inline fn boolInt(b: bool) u8 {
+ return @intFromBool(b);
+}
+
+inline fn fixSplitForward(text: []const u8, i: usize) usize {
+ var idx = i;
+ while (idx < text.len and is_follow(text[idx])) : (idx += 1) {}
+ return idx;
+}
+
+inline fn fixSplitBackward(text: []const u8, i: usize) usize {
+ var idx = i;
+ if (idx < text.len) while (idx != 0 and is_follow(text[idx])) : (idx -= 1) {};
+ return idx;
+}
+
+inline fn cast(as: type, val: anytype) as {
+ return @intCast(val);
+}
+
+fn countDigits(text: []const u8) usize {
+ var idx: usize = 0;
+ while (std.ascii.isDigit(text[idx])) : (idx += 1) {}
+ return idx;
+}
+
+//|
+//| TESTS
+//|
+
+test "encodeUri" {
+ const allocator = std.testing.allocator;
+ const special_chars = "!#$&'()*+,-./:;=?@_~";
+ const special_encoded = try encodeUri(allocator, special_chars);
+ defer allocator.free(special_encoded);
+ try testing.expectEqualStrings(special_chars, special_encoded);
+ const alphaspace = " ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ const alpha_encoded = try encodeUri(allocator, alphaspace);
+ defer allocator.free(alpha_encoded);
+ try testing.expectEqualStrings(alphaspace, alpha_encoded);
+ const to_encode = "\"%<>[\\]^`{|}δ";
+ const encoded = try encodeUri(allocator, to_encode);
+ defer allocator.free(encoded);
+ try testing.expectEqualStrings("%22%25%3C%3E%5B%5C%5D%5E%60%7B%7C%7D%CE%B4", encoded);
+ const decoded = try decodeUri(allocator, encoded);
+ defer allocator.free(decoded);
+ try testing.expectEqualStrings(to_encode, decoded);
+}
+
+test diffCommonPrefix {
+ // Detect any common suffix.
+ try testing.expectEqual(@as(usize, 0), diffCommonPrefix("abc", "xyz")); // Null case
+ try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234abcdef", "1234xyz")); // Non-null case
+ try testing.expectEqual(@as(usize, 4), diffCommonPrefix("1234", "1234xyz")); // Whole case
+}
+
+test diffCommonSuffix {
+ // Detect any common suffix.
+ try testing.expectEqual(@as(usize, 0), diffCommonSuffix("abc", "xyz")); // Null case
+ try testing.expectEqual(@as(usize, 4), diffCommonSuffix("abcdef1234", "xyz1234")); // Non-null case
+ try testing.expectEqual(@as(usize, 4), diffCommonSuffix("1234", "xyz1234")); // Whole case
+}
+
+test diffCommonOverlap {
+ // Detect any suffix/prefix overlap.
+ try testing.expectEqual(@as(usize, 0), diffCommonOverlap("", "abcd")); // Null case
+ try testing.expectEqual(@as(usize, 3), diffCommonOverlap("abc", "abcd")); // Whole case
+ try testing.expectEqual(@as(usize, 0), diffCommonOverlap("123456", "abcd")); // No overlap
+ try testing.expectEqual(@as(usize, 3), diffCommonOverlap("123456xxx", "xxxabcd")); // Overlap
+
+ // Some overly clever languages (C#) may treat ligatures as equal to their
+ // component letters. E.g. U+FB01 == 'fi'
+ try testing.expectEqual(@as(usize, 0), diffCommonOverlap("fi", "\u{fb01}")); // Unicode
+}
+
+fn testDiffHalfMatch(
+ allocator: std.mem.Allocator,
+ params: struct {
+ dmp: DiffMatchPatch,
+ before: []const u8,
+ after: []const u8,
+ expected: ?HalfMatchResult,
+ },
+) !void {
+ const maybe_result = try params.dmp.diffHalfMatch(allocator, params.before, params.after);
+ defer if (maybe_result) |result| result.deinit(allocator);
+ try testing.expectEqualDeep(params.expected, maybe_result);
+}
+
+fn testdiffHalfMatchLeak(allocator: Allocator) !void {
+ const dmp = DiffMatchPatch{};
+ const text1 = "The quick brown fox jumps over the lazy dog.";
+ const text2 = "That quick brown fox jumped over a lazy dog.";
+ var diffs = try dmp.diff(allocator, text2, text1, true);
+ deinitDiffList(allocator, &diffs);
+}
+
+test "diffHalfMatch leak regression test" {
+ try testing.checkAllAllocationFailures(testing.allocator, testdiffHalfMatchLeak, .{});
+}
+
+test diffHalfMatch {
+ const one_timeout: DiffMatchPatch = .{ .diff_timeout = 1 };
+
+ // No match #1
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "1234567890",
+ .after = "abcdef",
+ .expected = null,
+ }});
+
+ // No match #2
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "12345",
+ .after = "23",
+ .expected = null,
+ }});
- try testing.expectEqualDeep(
- @as(?HalfMatchResult, HalfMatchResult{
+ // Single matches
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "1234567890",
+ .after = "a345678z",
+ .expected = .{
+ .prefix_before = "12",
+ .suffix_before = "90",
+ .prefix_after = "a",
+ .suffix_after = "z",
+ .common_middle = "345678",
+ },
+ }});
+
+ // Single Match #2
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "a345678z",
+ .after = "1234567890",
+ .expected = .{
+ .prefix_before = "a",
+ .suffix_before = "z",
+ .prefix_after = "12",
+ .suffix_after = "90",
+ .common_middle = "345678",
+ },
+ }});
+
+ // Single Match #3
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "abc56789z",
+ .after = "1234567890",
+ .expected = .{
+ .prefix_before = "abc",
+ .suffix_before = "z",
+ .prefix_after = "1234",
+ .suffix_after = "0",
+ .common_middle = "56789",
+ },
+ }});
+
+ // Single Match #4
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "a23456xyz",
+ .after = "1234567890",
+ .expected = .{
+ .prefix_before = "a",
+ .suffix_before = "xyz",
+ .prefix_after = "1",
+ .suffix_after = "7890",
+ .common_middle = "23456",
+ },
+ }});
+
+ // Multiple matches #1
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "121231234123451234123121",
+ .after = "a1234123451234z",
+ .expected = .{
+ .prefix_before = "12123",
+ .suffix_before = "123121",
+ .prefix_after = "a",
+ .suffix_after = "z",
+ .common_middle = "1234123451234",
+ },
+ }});
+
+ // Multiple Matches #2
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "x-=-=-=-=-=-=-=-=-=-=-=-=",
+ .after = "xx-=-=-=-=-=-=-=",
+ .expected = .{
.prefix_before = "",
.suffix_before = "-=-=-=-=-=",
.prefix_after = "x",
.suffix_after = "",
.common_middle = "x-=-=-=-=-=-=-=",
- }),
- try one_timeout.diffHalfMatch(arena.allocator(), "x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-="),
- ); // Multiple Matches #2
-
- try testing.expectEqualDeep(@as(?HalfMatchResult, HalfMatchResult{
- .prefix_before = "-=-=-=-=-=",
- .suffix_before = "",
- .prefix_after = "",
- .suffix_after = "y",
- .common_middle = "-=-=-=-=-=-=-=y",
- }), try one_timeout.diffHalfMatch(arena.allocator(), "-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); // Multiple Matches #3
+ },
+ }});
+
+ // Multiple Matches #3
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "-=-=-=-=-=-=-=-=-=-=-=-=y",
+ .after = "-=-=-=-=-=-=-=yy",
+ .expected = .{
+ .prefix_before = "-=-=-=-=-=",
+ .suffix_before = "",
+ .prefix_after = "",
+ .suffix_after = "y",
+ .common_middle = "-=-=-=-=-=-=-=y",
+ },
+ }});
// Other cases
+
// Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy
- try testing.expectEqualDeep(@as(?HalfMatchResult, HalfMatchResult{
- .prefix_before = "qHillo",
- .suffix_before = "w",
- .prefix_after = "x",
- .suffix_after = "Hulloy",
- .common_middle = "HelloHe",
- }), try one_timeout.diffHalfMatch(arena.allocator(), "qHilloHelloHew", "xHelloHeHulloy")); // Non-optimal halfmatch
+ // Non-optimal halfmatch
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = one_timeout,
+ .before = "qHilloHelloHew",
+ .after = "xHelloHeHulloy",
+ .expected = .{
+ .prefix_before = "qHillo",
+ .suffix_before = "w",
+ .prefix_after = "x",
+ .suffix_after = "Hulloy",
+ .common_middle = "HelloHe",
+ },
+ }});
+
+ // Non-optimal halfmatch
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffHalfMatch, .{.{
+ .dmp = .{ .diff_timeout = 0 },
+ .before = "qHilloHelloHew",
+ .after = "xHelloHeHulloy",
+ .expected = null,
+ }});
+}
+
+test diffLinesToChars {
+ const allocator = testing.allocator;
+ // Convert lines down to characters.
+ var tmp_array_list = std.ArrayList([]const u8).init(allocator);
+ defer tmp_array_list.deinit();
+ try tmp_array_list.append("alpha\n");
+ try tmp_array_list.append("beta\n");
+
+ var result = try diffLinesToChars(allocator, "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n");
+ try testing.expectEqualStrings(" ! ", result.chars_1); // Shared lines #1
+ try testing.expectEqualStrings("! !", result.chars_2); // Shared lines #2
+ try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Shared lines #3
+ result.deinit(allocator);
+
+ tmp_array_list.items.len = 0;
+ try tmp_array_list.append("alpha\r\n");
+ try tmp_array_list.append("beta\r\n");
+ try tmp_array_list.append("\r\n");
+
+ result = try diffLinesToChars(allocator, "", "alpha\r\nbeta\r\n\r\n\r\n");
+ try testing.expectEqualStrings("", result.chars_1); // Empty string and blank lines #1
+ try testing.expectEqualStrings(" !\"\"", result.chars_2); // Empty string and blank lines #2
+ try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Empty string and blank lines #3
+ result.deinit(allocator);
+ tmp_array_list.items.len = 0;
+ try tmp_array_list.append("a");
+ try tmp_array_list.append("b");
+
+ result = try diffLinesToChars(allocator, "a", "b");
+ try testing.expectEqualStrings(" ", result.chars_1); // No linebreaks #1.
+ try testing.expectEqualStrings("!", result.chars_2); // No linebreaks #2.
+ try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // No linebreaks #3.
+ result.deinit(allocator);
+
+ {
+ const n: u21 = 1024;
+
+ var line_list = std.ArrayList(u8).init(allocator);
+ defer line_list.deinit();
+ var char_list = std.ArrayList(u8).init(allocator);
+ defer char_list.deinit();
+
+ var i: u21 = CHAR_OFFSET;
+ var char_buf: [4]u8 = undefined;
+ while (i < n) : (i += 1) {
+ const nbytes = std.unicode.wtf8Encode(i, &char_buf) catch unreachable;
+ try line_list.appendSlice(char_buf[0..nbytes]);
+ try line_list.append('\n');
+ try char_list.appendSlice(char_buf[0..nbytes]);
+ }
+ const codepoint_len = std.unicode.utf8CountCodepoints(char_list.items) catch unreachable;
+ try testing.expectEqual(@as(usize, n - CHAR_OFFSET), codepoint_len);
+ result = try diffLinesToChars(allocator, line_list.items, "");
+ try testing.expectEqual(char_list.items.len, result.chars_1.len);
+ try testing.expectEqualSlices(u8, char_list.items, result.chars_1);
+ try testing.expectEqualStrings("", result.chars_2);
+ result.deinit(allocator);
+
+ // Test iterator stop
+ // TODO this isn't a complete test, it verifies that iteration
+ // stops, but not that it does so correctly.
+ var line_array = ArrayListUnmanaged([]const u8){};
+ defer line_array.deinit(allocator);
+ line_array.items.len = 0;
+ var line_hash = std.StringHashMapUnmanaged(u21){};
+ defer line_hash.deinit(allocator);
+ const char_out = try diffLinesToCharsMunge(allocator, line_list.items, &line_array, &line_hash, 950);
+ defer allocator.free(char_out);
+ try testing.expectEqualStrings(
+ "ϖ\nϗ\nϘ\nϙ\nϚ\nϛ\nϜ\nϝ\nϞ\nϟ\nϠ\nϡ\nϢ\nϣ\nϤ\nϥ\nϦ\nϧ\nϨ\nϩ\nϪ\nϫ\nϬ\nϭ\nϮ\nϯ\nϰ\nϱ\nϲ\nϳ\nϴ\nϵ\n϶\nϷ\nϸ\nϹ\nϺ\nϻ\nϼ\nϽ\nϾ\nϿ\n",
+ line_array.getLast(),
+ );
+ }
+}
+
+fn testDiffCharsToLines(
+ allocator: std.mem.Allocator,
+ params: struct {
+ diffs: []const Diff,
+ line_array: []const []const u8,
+ expected: []const Diff,
+ },
+) !void {
+ var char_diffs = try DiffList.initCapacity(allocator, params.diffs.len);
+ defer deinitDiffList(allocator, &char_diffs);
+
+ for (params.diffs) |item| {
+ char_diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
+ }
+
+ var diffs = try diffCharsToLines(allocator, &char_diffs, params.line_array);
+ defer deinitDiffList(allocator, &diffs);
+
+ try testing.expectEqualDeep(params.expected, diffs.items);
+}
+
+test diffCharsToLines {
+ // Convert chars up to lines.
+ var diff_list = DiffList{};
+ defer deinitDiffList(testing.allocator, &diff_list);
+ try diff_list.ensureTotalCapacity(testing.allocator, 2);
+ diff_list.appendSliceAssumeCapacity(&.{
+ Diff.init(.equal, try testing.allocator.dupe(u8, " ! ")),
+ Diff.init(.insert, try testing.allocator.dupe(u8, "! !")),
+ });
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testDiffCharsToLines,
+ .{.{
+ .diffs = diff_list.items,
+ .line_array = &[_][]const u8{
+ "alpha\n",
+ "beta\n",
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "alpha\nbeta\nalpha\n" },
+ .{ .operation = .insert, .text = "beta\nalpha\nbeta\n" },
+ },
+ }},
+ );
+}
+
+fn testDiffCleanupMerge(allocator: std.mem.Allocator, params: struct {
+ input: []const Diff,
+ expected: []const Diff,
+}) !void {
+ var diffs = try DiffList.initCapacity(allocator, params.input.len);
+ defer deinitDiffList(allocator, &diffs);
+
+ for (params.input) |item| {
+ diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
+ }
+
+ try diffCleanupMerge(allocator, &diffs);
+
+ try testing.expectEqualDeep(params.expected, diffs.items);
+}
+
+test diffCleanupMerge {
+ // Cleanup a messy diff.
+
+ // No change case
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "b" },
+ .{ .operation = .insert, .text = "c" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "b" },
+ .{ .operation = .insert, .text = "c" },
+ },
+ }});
+
+ // Merge equalities
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .equal, .text = "b" },
+ .{ .operation = .equal, .text = "c" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "abc" },
+ },
+ }});
+
+ // Merge deletions
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .delete, .text = "b" },
+ .{ .operation = .delete, .text = "c" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abc" },
+ },
+ }});
+
+ // Merge insertions
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .insert, .text = "a" },
+ .{ .operation = .insert, .text = "b" },
+ .{ .operation = .insert, .text = "c" },
+ },
+ .expected = &.{
+ .{ .operation = .insert, .text = "abc" },
+ },
+ }});
+
+ // Merge interweave
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .insert, .text = "b" },
+ .{ .operation = .delete, .text = "c" },
+ .{ .operation = .insert, .text = "d" },
+ .{ .operation = .equal, .text = "e" },
+ .{ .operation = .equal, .text = "f" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "ac" },
+ .{ .operation = .insert, .text = "bd" },
+ .{ .operation = .equal, .text = "ef" },
+ },
+ }});
+
+ // Prefix and suffix detection
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .insert, .text = "abc" },
+ .{ .operation = .delete, .text = "dc" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "d" },
+ .{ .operation = .insert, .text = "b" },
+ .{ .operation = .equal, .text = "c" },
+ },
+ }});
+
+ // Prefix and suffix detection with equalities
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "x" },
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .insert, .text = "abc" },
+ .{ .operation = .delete, .text = "dc" },
+ .{ .operation = .equal, .text = "y" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "xa" },
+ .{ .operation = .delete, .text = "d" },
+ .{ .operation = .insert, .text = "b" },
+ .{ .operation = .equal, .text = "cy" },
+ },
+ }});
+
+ // Slide edit left
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .insert, .text = "ba" },
+ .{ .operation = .equal, .text = "c" },
+ },
+ .expected = &.{
+ .{ .operation = .insert, .text = "ab" },
+ .{ .operation = .equal, .text = "ac" },
+ },
+ }});
+
+ // Slide edit right
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "c" },
+ .{ .operation = .insert, .text = "ab" },
+ .{ .operation = .equal, .text = "a" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "ca" },
+ .{ .operation = .insert, .text = "ba" },
+ },
+ }});
+
+ // Slide edit left recursive
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "b" },
+ .{ .operation = .equal, .text = "c" },
+ .{ .operation = .delete, .text = "ac" },
+ .{ .operation = .equal, .text = "x" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abc" },
+ .{ .operation = .equal, .text = "acx" },
+ },
+ }});
+
+ // Slide edit right recursive
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "x" },
+ .{ .operation = .delete, .text = "ca" },
+ .{ .operation = .equal, .text = "c" },
+ .{ .operation = .delete, .text = "b" },
+ .{ .operation = .equal, .text = "a" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "xca" },
+ .{ .operation = .delete, .text = "cba" },
+ },
+ }});
+
+ // Empty merge
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "b" },
+ .{ .operation = .insert, .text = "ab" },
+ .{ .operation = .equal, .text = "c" },
+ },
+ .expected = &.{
+ .{ .operation = .insert, .text = "a" },
+ .{ .operation = .equal, .text = "bc" },
+ },
+ }});
+
+ // Empty equality
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupMerge, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "" },
+ .{ .operation = .insert, .text = "a" },
+ .{ .operation = .equal, .text = "b" },
+ },
+ .expected = &.{
+ .{ .operation = .insert, .text = "a" },
+ .{ .operation = .equal, .text = "b" },
+ },
+ }});
+}
+
+fn testDiffCleanupSemanticLossless(
+ allocator: std.mem.Allocator,
+ params: struct {
+ input: []const Diff,
+ expected: []const Diff,
+ },
+) !void {
+ var diffs = try DiffList.initCapacity(allocator, params.input.len);
+ defer deinitDiffList(allocator, &diffs);
+
+ for (params.input) |item| {
+ diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
+ }
+
+ try diffCleanupSemanticLossless(allocator, &diffs);
+
+ try testing.expectEqualDeep(params.expected, diffs.items);
+}
+
+fn sliceToDiffList(allocator: Allocator, diff_slice: []const Diff) !DiffList {
+ var diff_list = DiffList{};
+ errdefer deinitDiffList(allocator, &diff_list);
+ try diff_list.ensureTotalCapacity(allocator, diff_slice.len);
+ for (diff_slice) |d| {
+ diff_list.appendAssumeCapacity(Diff.init(
+ d.operation,
+ try allocator.dupe(u8, d.text),
+ ));
+ }
+ return diff_list;
+}
+
+test diffCleanupSemanticLossless {
+ // Null case
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &[_]Diff{},
+ .expected = &[_]Diff{},
+ }});
+
+ //defer deinitDiffList(allocator, &diffs);
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "AAA\r\n\r\nBBB" },
+ .{ .operation = .insert, .text = "\r\nDDD\r\n\r\nBBB" },
+ .{ .operation = .equal, .text = "\r\nEEE" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "AAA\r\n\r\n" },
+ .{ .operation = .insert, .text = "BBB\r\nDDD\r\n\r\n" },
+ .{ .operation = .equal, .text = "BBB\r\nEEE" },
+ },
+ }});
+
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "AAA\r\nBBB" },
+ .{ .operation = .insert, .text = " DDD\r\nBBB" },
+ .{ .operation = .equal, .text = " EEE" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "AAA\r\n" },
+ .{ .operation = .insert, .text = "BBB DDD\r\n" },
+ .{ .operation = .equal, .text = "BBB EEE" },
+ },
+ }});
+
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "The c" },
+ .{ .operation = .insert, .text = "ow and the c" },
+ .{ .operation = .equal, .text = "at." },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "The " },
+ .{ .operation = .insert, .text = "cow and the " },
+ .{ .operation = .equal, .text = "cat." },
+ },
+ }});
+
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "The-c" },
+ .{ .operation = .insert, .text = "ow-and-the-c" },
+ .{ .operation = .equal, .text = "at." },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "The-" },
+ .{ .operation = .insert, .text = "cow-and-the-" },
+ .{ .operation = .equal, .text = "cat." },
+ },
+ }});
+
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .equal, .text = "ax" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .equal, .text = "aax" },
+ },
+ }});
+
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "xa" },
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .equal, .text = "a" },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "xaa" },
+ .{ .operation = .delete, .text = "a" },
+ },
+ }});
+
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemanticLossless, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "The xxx. The " },
+ .{ .operation = .insert, .text = "zzz. The " },
+ .{ .operation = .equal, .text = "yyy." },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "The xxx." },
+ .{ .operation = .insert, .text = " The zzz." },
+ .{ .operation = .equal, .text = " The yyy." },
+ },
+ }});
+}
+
+fn rebuildtexts(allocator: std.mem.Allocator, diffs: DiffList) ![2][]const u8 {
+ var text = [2]std.ArrayList(u8){
+ std.ArrayList(u8).init(allocator),
+ std.ArrayList(u8).init(allocator),
+ };
+ errdefer {
+ text[0].deinit();
+ text[1].deinit();
+ }
+
+ for (diffs.items) |myDiff| {
+ if (myDiff.operation != .insert) {
+ try text[0].appendSlice(myDiff.text);
+ }
+ if (myDiff.operation != .delete) {
+ try text[1].appendSlice(myDiff.text);
+ }
+ }
+ return .{
+ try text[0].toOwnedSlice(),
+ try text[1].toOwnedSlice(),
+ };
+}
+
+fn testRebuildTexts(allocator: Allocator, diffs: DiffList, params: struct {
+ before: []const u8,
+ after: []const u8,
+}) !void {
+ const texts = try rebuildtexts(allocator, diffs);
+ defer {
+ allocator.free(texts[0]);
+ allocator.free(texts[1]);
+ }
+ try testing.expectEqualStrings(params.before, texts[0]);
+ try testing.expectEqualStrings(params.after, texts[1]);
+}
+
+test rebuildtexts {
+ {
+ var diffs = try sliceToDiffList(testing.allocator, &.{
+ .{ .operation = .insert, .text = "abcabc" },
+ .{ .operation = .equal, .text = "defdef" },
+ .{ .operation = .delete, .text = "ghighi" },
+ });
+ defer deinitDiffList(testing.allocator, &diffs);
+ try testing.checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{
+ diffs,
+ .{
+ .before = "defdefghighi",
+ .after = "abcabcdefdef",
+ },
+ });
+ }
+ {
+ var diffs = try sliceToDiffList(testing.allocator, &.{
+ .{ .operation = .insert, .text = "xxx" },
+ .{ .operation = .delete, .text = "yyy" },
+ });
+ defer deinitDiffList(testing.allocator, &diffs);
+ try testing.checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{
+ diffs,
+ .{
+ .before = "yyy",
+ .after = "xxx",
+ },
+ });
+ }
+ {
+ var diffs = try sliceToDiffList(testing.allocator, &.{
+ .{ .operation = .equal, .text = "xyz" },
+ .{ .operation = .equal, .text = "pdq" },
+ });
+ defer deinitDiffList(testing.allocator, &diffs);
+ try testing.checkAllAllocationFailures(testing.allocator, testRebuildTexts, .{
+ diffs,
+ .{
+ .before = "xyzpdq",
+ .after = "xyzpdq",
+ },
+ });
+ }
+}
+
+fn testDiffBisect(
+ allocator: std.mem.Allocator,
+ params: struct {
+ dmp: DiffMatchPatch,
+ before: []const u8,
+ after: []const u8,
+ deadline: u64,
+ expected: []const Diff,
+ },
+) !void {
+ var diffs = try params.dmp.diffBisect(allocator, params.before, params.after, params.deadline);
+ defer deinitDiffList(allocator, &diffs);
+ try testing.expectEqualDeep(params.expected, diffs.items);
+}
+
+test diffBisect {
+ const this: DiffMatchPatch = .{ .diff_timeout = 0 };
+
+ const a = "cat";
+ const b = "map";
+
+ // Normal
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffBisect, .{.{
+ .dmp = this,
+ .before = a,
+ .after = b,
+ // std.time returns an i64
+ .deadline = std.math.maxInt(i64),
+ .expected = &.{
+ .{ .operation = .delete, .text = "c" },
+ .{ .operation = .insert, .text = "m" },
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "t" },
+ .{ .operation = .insert, .text = "p" },
+ },
+ }});
+
+ // Timeout
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffBisect, .{.{
+ .dmp = this,
+ .before = a,
+ .after = b,
+ .deadline = 0, // Do not run prior to 1970
+ .expected = &.{
+ .{ .operation = .delete, .text = "cat" },
+ .{ .operation = .insert, .text = "map" },
+ },
+ }});
+}
+
+fn testDiff(
+ allocator: std.mem.Allocator,
+ params: struct {
+ dmp: DiffMatchPatch,
+ before: []const u8,
+ after: []const u8,
+ check_lines: bool,
+ expected: []const Diff,
+ },
+) !void {
+ var diffs = try params.dmp.diff(allocator, params.before, params.after, params.check_lines);
+ defer deinitDiffList(allocator, &diffs);
+ try testing.expectEqualDeep(params.expected, diffs.items);
+}
+
+test diff {
+ const dmp: DiffMatchPatch = .{ .diff_timeout = 0 };
+
+ // Null case.
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "",
+ .after = "",
+ .check_lines = false,
+ .expected = &[_]Diff{},
+ }});
+
+ // Equality.
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "abc",
+ .after = "abc",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .equal, .text = "abc" },
+ },
+ }});
+
+ // Simple insertion.
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "abc",
+ .after = "ab123c",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .equal, .text = "ab" },
+ .{ .operation = .insert, .text = "123" },
+ .{ .operation = .equal, .text = "c" },
+ },
+ }});
+
+ // Simple deletion.
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "a123bc",
+ .after = "abc",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "123" },
+ .{ .operation = .equal, .text = "bc" },
+ },
+ }});
+
+ // Two insertions.
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "abc",
+ .after = "a123b456c",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .insert, .text = "123" },
+ .{ .operation = .equal, .text = "b" },
+ .{ .operation = .insert, .text = "456" },
+ .{ .operation = .equal, .text = "c" },
+ },
+ }});
+
+ // Two deletions.
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "a123b456c",
+ .after = "abc",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "123" },
+ .{ .operation = .equal, .text = "b" },
+ .{ .operation = .delete, .text = "456" },
+ .{ .operation = .equal, .text = "c" },
+ },
+ }});
+
+ // Simple case #1
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "a",
+ .after = "b",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .insert, .text = "b" },
+ },
+ }});
+
+ // Simple case #2
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "Apples are a fruit.",
+ .after = "Bananas are also fruit.",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .delete, .text = "Apple" },
+ .{ .operation = .insert, .text = "Banana" },
+ .{ .operation = .equal, .text = "s are a" },
+ .{ .operation = .insert, .text = "lso" },
+ .{ .operation = .equal, .text = " fruit." },
+ },
+ }});
+
+ // Simple case #3
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "ax\t",
+ .after = "\u{0680}x\x00",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .insert, .text = "\u{0680}" },
+ .{ .operation = .equal, .text = "x" },
+ .{ .operation = .delete, .text = "\t" },
+ .{ .operation = .insert, .text = "\x00" },
+ },
+ }});
+
+ // Overlap #1
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "1ayb2",
+ .after = "abxab",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .delete, .text = "1" },
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "y" },
+ .{ .operation = .equal, .text = "b" },
+ .{ .operation = .delete, .text = "2" },
+ .{ .operation = .insert, .text = "xab" },
+ },
+ }});
+
+ // Overlap #2
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "abcy",
+ .after = "xaxcxabc",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .insert, .text = "xaxcx" },
+ .{ .operation = .equal, .text = "abc" },
+ .{ .operation = .delete, .text = "y" },
+ },
+ }});
+
+ // Overlap #3
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg",
+ .after = "a-bcd-efghijklmnopqrs",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .delete, .text = "ABCD" },
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .delete, .text = "=" },
+ .{ .operation = .insert, .text = "-" },
+ .{ .operation = .equal, .text = "bcd" },
+ .{ .operation = .delete, .text = "=" },
+ .{ .operation = .insert, .text = "-" },
+ .{ .operation = .equal, .text = "efghijklmnopqrs" },
+ .{ .operation = .delete, .text = "EFGHIJKLMNOefg" },
+ },
+ }});
+
+ // Large equality
+ try testing.checkAllAllocationFailures(testing.allocator, testDiff, .{.{
+ .dmp = dmp,
+ .before = "a [[Pennsylvania]] and [[New",
+ .after = " and [[Pennsylvania]]",
+ .check_lines = false,
+ .expected = &.{
+ .{ .operation = .insert, .text = " " },
+ .{ .operation = .equal, .text = "a" },
+ .{ .operation = .insert, .text = "nd" },
+ .{ .operation = .equal, .text = " [[Pennsylvania]]" },
+ .{ .operation = .delete, .text = " and [[New" },
+ },
+ }});
+
+ const allocator = testing.allocator;
+ // TODO these tests should be checked for allocation failure
+
+ // Increase the text lengths by 1024 times to ensure a timeout.
+ {
+ const a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n" ** 1024;
+ const b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n" ** 1024;
+
+ const with_timout: DiffMatchPatch = .{
+ .diff_timeout = 100, // 100ms
+ };
+
+ const start_time = std.time.milliTimestamp();
+ {
+ var time_diff = try with_timout.diff(allocator, a, b, false);
+ defer deinitDiffList(allocator, &time_diff);
+ }
+ const end_time = std.time.milliTimestamp();
+
+ // Test that we took at least the timeout period.
+ try testing.expect(with_timout.diff_timeout <= end_time - start_time); // diff: Timeout min.
+ // Test that we didn't take forever (be forgiving).
+ // Theoretically this test could fail very occasionally if the
+ // OS task swaps or locks up for a second at the wrong moment.
+ try testing.expect((with_timout.diff_timeout) * 10000 * 2 > end_time - start_time); // diff: Timeout max.
+ }
+}
+
+fn testDiffLineMode(
+ allocator: Allocator,
+ dmp: *DiffMatchPatch,
+ before: []const u8,
+ after: []const u8,
+) !void {
+ dmp.diff_check_lines_over = 20;
+ var diff_checked = try dmp.diff(allocator, before, after, true);
+ defer deinitDiffList(allocator, &diff_checked);
+
+ var diff_unchecked = try dmp.diff(allocator, before, after, false);
+ defer deinitDiffList(allocator, &diff_unchecked);
+
+ try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Simple line-mode.
+ dmp.diff_check_lines_over = 100;
+}
- one_timeout.diff_timeout = 0;
- try testing.expectEqualDeep(@as(?HalfMatchResult, null), try one_timeout.diffHalfMatch(arena.allocator(), "qHilloHelloHew", "xHelloHeHulloy")); // Non-optimal halfmatch
+test "diffLineMode" {
+ var dmp: DiffMatchPatch = .{ .diff_timeout = 0 };
+ const allocator = testing.allocator;
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testDiffLineMode,
+ .{
+ &dmp,
+ "1234567890\n1234567890\n1234567890",
+ "abcdefghij\nabcdefghij\nabcdefghij",
+ },
+ );
+
+ {
+ const a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
+ const b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
+
+ var diff_checked = try dmp.diff(allocator, a, b, true);
+ defer deinitDiffList(allocator, &diff_checked);
+
+ var diff_unchecked = try dmp.diff(allocator, a, b, false);
+ defer deinitDiffList(allocator, &diff_unchecked);
+
+ try testing.expectEqualDeep(diff_checked.items, diff_unchecked.items); // diff: Single line-mode.
+ }
+
+ {
+ // diff: Overlap line-mode.
+ const a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
+ const b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
+
+ var diffs_linemode = try dmp.diff(allocator, a, b, true);
+ defer deinitDiffList(allocator, &diffs_linemode);
+
+ const texts_linemode = try rebuildtexts(allocator, diffs_linemode);
+ defer {
+ allocator.free(texts_linemode[0]);
+ allocator.free(texts_linemode[1]);
+ }
+
+ var diffs_textmode = try dmp.diff(allocator, a, b, false);
+ defer deinitDiffList(allocator, &diffs_textmode);
+
+ const texts_textmode = try rebuildtexts(allocator, diffs_textmode);
+ defer {
+ allocator.free(texts_textmode[0]);
+ allocator.free(texts_textmode[1]);
+ }
+
+ try testing.expectEqualStrings(texts_textmode[0], texts_linemode[0]);
+ try testing.expectEqualStrings(texts_textmode[1], texts_linemode[1]);
+ }
+}
+
+/// Round-trip a diff, confirming that the result matches the original.
+fn diffRoundTrip(allocator: Allocator, dmp: DiffMatchPatch, diff_slice: []const Diff) !void {
+ var diffs_before = try DiffList.initCapacity(allocator, diff_slice.len);
+ defer deinitDiffList(allocator, &diffs_before);
+ for (diff_slice) |item| {
+ diffs_before.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
+ }
+ const text_before = try diffBeforeText(allocator, diffs_before);
+ defer allocator.free(text_before);
+ const text_after = try diffAfterText(allocator, diffs_before);
+ defer allocator.free(text_after);
+ var diffs_after = try dmp.diff(allocator, text_before, text_after, false);
+ defer deinitDiffList(allocator, &diffs_after);
+ // Should change nothing:
+ try diffCleanupSemantic(allocator, &diffs_after);
+ try testing.expectEqualDeep(diffs_before.items, diffs_after.items);
+}
+
+test "Unicode diffs" {
+ const allocator = std.testing.allocator;
+ var dmp = DiffMatchPatch{};
+ dmp.diff_timeout = 0;
+ {
+ var greek_diff = try dmp.diff(
+ allocator,
+ "αβγ",
+ "αβδ",
+ false,
+ );
+ defer deinitDiffList(allocator, &greek_diff);
+ try testing.expectEqualDeep(@as([]const Diff, &.{
+ Diff.init(.equal, "αβ"),
+ Diff.init(.delete, "γ"),
+ Diff.init(.insert, "δ"),
+ }), greek_diff.items);
+ }
+ {
+ // ө is 0xd3, 0xa9, թ is 0xd6, 0xa9
+ var prefix_diff = try dmp.diff(
+ allocator,
+ "abө",
+ "abթ",
+ false,
+ );
+ defer deinitDiffList(allocator, &prefix_diff);
+ try testing.expectEqualDeep(@as([]const Diff, &.{
+ Diff.init(.equal, "ab"),
+ Diff.init(.delete, "ө"),
+ Diff.init(.insert, "թ"),
+ }), prefix_diff.items);
+ }
+ {
+ var mid_diff = try dmp.diff(
+ allocator,
+ "αөβ",
+ "αթβ",
+ false,
+ );
+ defer deinitDiffList(allocator, &mid_diff);
+ try testing.expectEqualDeep(@as([]const Diff, &.{
+ Diff.init(.equal, "α"),
+ Diff.init(.delete, "ө"),
+ Diff.init(.insert, "թ"),
+ Diff.init(.equal, "β"),
+ }), mid_diff.items);
+ }
+ {
+ var mid_prefix = try dmp.diff(
+ allocator,
+ "αβλ",
+ "αδλ",
+ false,
+ );
+ defer deinitDiffList(allocator, &mid_prefix);
+ try testing.expectEqualDeep(@as([]const Diff, &.{
+ Diff.init(.equal, "α"),
+ Diff.init(.delete, "β"),
+ Diff.init(.insert, "δ"),
+ Diff.init(.equal, "λ"),
+ }), mid_prefix.items);
+ }
+ { // "三亥临" Three-byte, one different suffix
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "三亥" },
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "临" },
+ },
+ },
+ );
+ }
+ { // "三亥乤" Three-byte, one middle difference in suffix
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "三亥" },
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "乤" },
+ },
+ },
+ );
+ }
+ { // "三亥帤" Three-byte, one prefix difference in suffix
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "三亥" },
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "帤" },
+ },
+ },
+ );
+ }
+ { // "三帤亥" Three-byte, one prefix difference in middle
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "三" },
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "帤" },
+ .{ .operation = .equal, .text = "亥" },
+ },
+ },
+ );
+ }
+ { // "三乤亥" Three-byte, one middle difference in middle
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "三" },
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "乤" },
+ .{ .operation = .equal, .text = "亥" },
+ },
+ },
+ );
+ }
+ { // "三临亥" Three-byte, one suffix difference in middle
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "三" },
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "临" },
+ .{ .operation = .equal, .text = "亥" },
+ },
+ },
+ );
+ }
+ { // "临三亥" Three-byte, one suffix difference in prefix
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "临" },
+ .{ .operation = .equal, .text = "三亥" },
+ },
+ },
+ );
+ }
+ { // "乤三亥" Three-byte, one middle difference in prefix
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "乤" },
+ .{ .operation = .equal, .text = "三亥" },
+ },
+ },
+ );
+ }
+ { // "乤三亥" Three-byte, one prefix difference in prefix
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .delete, .text = "两" },
+ .{ .operation = .insert, .text = "帤" },
+ .{ .operation = .equal, .text = "三亥" },
+ },
+ },
+ );
+ }
+ { // "三临亥" → "三丿亥" Three-byte, one suffix difference
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "三" },
+ .{ .operation = .delete, .text = "临" },
+ .{ .operation = .insert, .text = "丿" },
+ .{ .operation = .equal, .text = "亥" },
+ },
+ },
+ );
+ }
+ { // Four-byte permutation #1
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "😹💋" },
+ .{ .operation = .delete, .text = "\xf0\x9f\xa5\xb9" },
+ .{ .operation = .insert, .text = "丿" },
+ .{ .operation = .equal, .text = "👀🫵" },
+ },
+ },
+ );
+ }
+ { // Four-byte permutation #1
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "😹💋" },
+ .{ .operation = .delete, .text = "\xf0\x9f\xa5\xb9" },
+ .{ .operation = .insert, .text = "\xf1\x9f\xa5\xb9" },
+ .{ .operation = .equal, .text = "👀🫵" },
+ },
+ },
+ );
+ }
+ { // Four-byte permutation #2
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "😹💋" },
+ .{ .operation = .delete, .text = "\xf0\x9f\xa5\xb9" },
+ .{ .operation = .insert, .text = "\xf0\xa0\xa5\xb9" },
+ .{ .operation = .equal, .text = "👀🫵" },
+ },
+ },
+ );
+ }
+ { // Four-byte permutation #3
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "😹💋" },
+ .{ .operation = .delete, .text = "\xf0\x9f\xa5\xb9" },
+ .{ .operation = .insert, .text = "\xf0\x9f\xa4\xb9" },
+ .{ .operation = .equal, .text = "👀🫵" },
+ },
+ },
+ );
+ }
+ { // Four-byte permutation #4
+ try testing.checkAllAllocationFailures(
+ allocator,
+ diffRoundTrip,
+ .{
+ dmp, &.{
+ .{ .operation = .equal, .text = "😹💋" },
+ .{ .operation = .delete, .text = "\xf0\x9f\xa5\xb9" },
+ .{ .operation = .insert, .text = "\xf0\x9f\xa5\xb4" },
+ .{ .operation = .equal, .text = "👀🫵" },
+ },
+ },
+ );
+ }
+ {
+ const before = "red blue green yellow";
+ const after = "red♦︎ blue♦︎∅ green♦︎∅♦︎ yellow";
+ var diffs = try dmp.diff(allocator, before, after, false);
+ defer deinitDiffList(allocator, &diffs);
+ const before_2 = try diffBeforeText(allocator, diffs);
+ defer allocator.free(before_2);
+ try testing.expectEqualStrings(before, before_2);
+ const after_2 = try diffAfterText(allocator, diffs);
+ defer allocator.free(after_2);
+ try testing.expectEqualStrings(after, after_2);
+ }
}
-test diffLinesToChars {
- var arena = std.heap.ArenaAllocator.init(testing.allocator);
- defer arena.deinit();
-
- // Convert lines down to characters.
- var tmp_array_list = std.ArrayList([]const u8).init(arena.allocator());
- try tmp_array_list.append("");
- try tmp_array_list.append("alpha\n");
- try tmp_array_list.append("beta\n");
-
- var result = try diffLinesToChars(arena.allocator(), "alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n");
- try testing.expectEqualStrings("\u{0001}\u{0002}\u{0001}", result.chars_1); // Shared lines #1
- try testing.expectEqualStrings("\u{0002}\u{0001}\u{0002}", result.chars_2); // Shared lines #2
- try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Shared lines #3
-
- tmp_array_list.items.len = 0;
- try tmp_array_list.append("");
- try tmp_array_list.append("alpha\r\n");
- try tmp_array_list.append("beta\r\n");
- try tmp_array_list.append("\r\n");
-
- result = try diffLinesToChars(arena.allocator(), "", "alpha\r\nbeta\r\n\r\n\r\n");
- try testing.expectEqualStrings("", result.chars_1); // Empty string and blank lines #1
- try testing.expectEqualStrings("\u{0001}\u{0002}\u{0003}\u{0003}", result.chars_2); // Empty string and blank lines #2
- try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // Empty string and blank lines #3
-
- tmp_array_list.items.len = 0;
- try tmp_array_list.append("");
- try tmp_array_list.append("a");
- try tmp_array_list.append("b");
-
- result = try diffLinesToChars(arena.allocator(), "a", "b");
- try testing.expectEqualStrings("\u{0001}", result.chars_1); // No linebreaks #1.
- try testing.expectEqualStrings("\u{0002}", result.chars_2); // No linebreaks #2.
- try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items); // No linebreaks #3.
-
- // TODO: More than 256 to reveal any 8-bit limitations but this requires
- // some unicode logic that I don't want to deal with
-
- // TODO: Fix this
+test "Diff format" {
+ const a_diff = Diff{ .operation = .insert, .text = "add me" };
+ const expect = "(+, \"add me\")";
+ var out_buf: [13]u8 = undefined;
+ const out_string = try std.fmt.bufPrint(&out_buf, "{}", .{a_diff});
+ try testing.expectEqualStrings(expect, out_string);
+}
- // const n: u8 = 255;
- // tmp_array_list.items.len = 0;
+fn testDiffCleanupSemantic(
+ allocator: std.mem.Allocator,
+ params: struct {
+ input: []const Diff,
+ expected: []const Diff,
+ },
+) !void {
+ var diffs = try DiffList.initCapacity(allocator, params.input.len);
+ defer deinitDiffList(allocator, &diffs);
+
+ for (params.input) |item| {
+ diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
+ }
- // var line_list = std.ArrayList(u8).init(arena.allocator());
- // var char_list = std.ArrayList(u8).init(arena.allocator());
+ try diffCleanupSemantic(allocator, &diffs);
- // var i: u8 = 0;
- // while (i < n) : (i += 1) {
- // try tmp_array_list.append(&.{ i, '\n' });
- // try line_list.appendSlice(&.{ i, '\n' });
- // try char_list.append(i);
- // }
- // try testing.expectEqual(@as(usize, n), tmp_array_list.items.len); // Test initialization fail #1
- // try testing.expectEqual(@as(usize, n), char_list.items.len); // Test initialization fail #2
- // try tmp_array_list.insert(0, "");
- // result = try diffLinesToChars(arena.allocator(), line_list.items, "");
- // try testing.expectEqualStrings(char_list.items, result.chars_1);
- // try testing.expectEqualStrings("", result.chars_2);
- // try testing.expectEqualDeep(tmp_array_list.items, result.line_array.items);
+ try testing.expectEqualDeep(params.expected, diffs.items);
}
-test diffCharsToLines {
- var arena = std.heap.ArenaAllocator.init(testing.allocator);
- defer arena.deinit();
-
- try testing.expect((Diff.init(.equal, "a")).eql(Diff.init(.equal, "a")));
- try testing.expect(!(Diff.init(.insert, "a")).eql(Diff.init(.equal, "a")));
- try testing.expect(!(Diff.init(.equal, "a")).eql(Diff.init(.equal, "b")));
- try testing.expect(!(Diff.init(.equal, "a")).eql(Diff.init(.delete, "b")));
+test diffCleanupSemantic {
+ // Null case.
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &[_]Diff{},
+ .expected = &[_]Diff{},
+ }});
+
+ // No elimination #1
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "ab" },
+ .{ .operation = .insert, .text = "cd" },
+ .{ .operation = .equal, .text = "12" },
+ .{ .operation = .delete, .text = "e" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "ab" },
+ .{ .operation = .insert, .text = "cd" },
+ .{ .operation = .equal, .text = "12" },
+ .{ .operation = .delete, .text = "e" },
+ },
+ }});
+
+ // No elimination #2
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "abc" },
+ .{ .operation = .insert, .text = "ABC" },
+ .{ .operation = .equal, .text = "1234" },
+ .{ .operation = .delete, .text = "wxyz" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abc" },
+ .{ .operation = .insert, .text = "ABC" },
+ .{ .operation = .equal, .text = "1234" },
+ .{ .operation = .delete, .text = "wxyz" },
+ },
+ }});
+
+ // Simple elimination
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "a" },
+ .{ .operation = .equal, .text = "b" },
+ .{ .operation = .delete, .text = "c" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abc" },
+ .{ .operation = .insert, .text = "b" },
+ },
+ }});
+
+ // Backpass elimination
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "ab" },
+ .{ .operation = .equal, .text = "cd" },
+ .{ .operation = .delete, .text = "e" },
+ .{ .operation = .equal, .text = "f" },
+ .{ .operation = .insert, .text = "g" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abcdef" },
+ .{ .operation = .insert, .text = "cdfg" },
+ },
+ }});
+
+ // Multiple elimination
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .insert, .text = "1" },
+ .{ .operation = .equal, .text = "A" },
+ .{ .operation = .delete, .text = "B" },
+ .{ .operation = .insert, .text = "2" },
+ .{ .operation = .equal, .text = "_" },
+ .{ .operation = .insert, .text = "1" },
+ .{ .operation = .equal, .text = "A" },
+ .{ .operation = .delete, .text = "B" },
+ .{ .operation = .insert, .text = "2" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "AB_AB" },
+ .{ .operation = .insert, .text = "1A2_1A2" },
+ },
+ }});
+
+ // Word boundaries
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .equal, .text = "The c" },
+ .{ .operation = .delete, .text = "ow and the c" },
+ .{ .operation = .equal, .text = "at." },
+ },
+ .expected = &.{
+ .{ .operation = .equal, .text = "The " },
+ .{ .operation = .delete, .text = "cow and the " },
+ .{ .operation = .equal, .text = "cat." },
+ },
+ }});
+
+ // No overlap elimination
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "abcxx" },
+ .{ .operation = .insert, .text = "xxdef" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abcxx" },
+ .{ .operation = .insert, .text = "xxdef" },
+ },
+ }});
+
+ // Overlap elimination
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "abcxxx" },
+ .{ .operation = .insert, .text = "xxxdef" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abc" },
+ .{ .operation = .equal, .text = "xxx" },
+ .{ .operation = .insert, .text = "def" },
+ },
+ }});
+
+ // Reverse overlap elimination
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "xxxabc" },
+ .{ .operation = .insert, .text = "defxxx" },
+ },
+ .expected = &.{
+ .{ .operation = .insert, .text = "def" },
+ .{ .operation = .equal, .text = "xxx" },
+ .{ .operation = .delete, .text = "abc" },
+ },
+ }});
+
+ // Two overlap eliminations
+ try testing.checkAllAllocationFailures(testing.allocator, testDiffCleanupSemantic, .{.{
+ .input = &.{
+ .{ .operation = .delete, .text = "abcd1212" },
+ .{ .operation = .insert, .text = "1212efghi" },
+ .{ .operation = .equal, .text = "----" },
+ .{ .operation = .delete, .text = "A3" },
+ .{ .operation = .insert, .text = "3BC" },
+ },
+ .expected = &.{
+ .{ .operation = .delete, .text = "abcd" },
+ .{ .operation = .equal, .text = "1212" },
+ .{ .operation = .insert, .text = "efghi" },
+ .{ .operation = .equal, .text = "----" },
+ .{ .operation = .delete, .text = "A" },
+ .{ .operation = .equal, .text = "3" },
+ .{ .operation = .insert, .text = "BC" },
+ },
+ }});
+}
- // Convert chars up to lines.
- var diffs = std.ArrayList(Diff).init(arena.allocator());
- try diffs.appendSlice(&.{
- Diff{ .operation = .equal, .text = try arena.allocator().dupe(u8, "\u{0001}\u{0002}\u{0001}") },
- Diff{ .operation = .insert, .text = try arena.allocator().dupe(u8, "\u{0002}\u{0001}\u{0002}") },
- });
- var tmp_vector = std.ArrayList([]const u8).init(arena.allocator());
- try tmp_vector.append("");
- try tmp_vector.append("alpha\n");
- try tmp_vector.append("beta\n");
- try diffCharsToLines(arena.allocator(), diffs.items, tmp_vector.items);
+fn testDiffCleanupEfficiency(
+ allocator: Allocator,
+ dmp: DiffMatchPatch,
+ params: struct {
+ input: []const Diff,
+ expected: []const Diff,
+ },
+) !void {
+ var diffs = try DiffList.initCapacity(allocator, params.input.len);
+ defer deinitDiffList(allocator, &diffs);
+ for (params.input) |item| {
+ diffs.appendAssumeCapacity(.{ .operation = item.operation, .text = try allocator.dupe(u8, item.text) });
+ }
+ try dmp.diffCleanupEfficiency(allocator, &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- Diff.init(.equal, "alpha\nbeta\nalpha\n"),
- Diff.init(.insert, "beta\nalpha\nbeta\n"),
- }), diffs.items);
+ try testing.expectEqualDeep(params.expected, diffs.items);
+}
- // TODO: Implement exhaustive tests
+test diffCleanupEfficiency {
+ const allocator = testing.allocator;
+ var dmp = DiffMatchPatch{};
+ dmp.diff_edit_cost = 4;
+ { // Null case.
+ var diffs = DiffList{};
+ try dmp.diffCleanupEfficiency(allocator, &diffs);
+ try testing.expectEqualDeep(DiffList{}, diffs);
+ }
+ { // No elimination.
+ const dslice: []const Diff = &.{
+ .{ .operation = .delete, .text = "ab" },
+ .{ .operation = .insert, .text = "12" },
+ .{ .operation = .equal, .text = "wxyz" },
+ .{ .operation = .delete, .text = "cd" },
+ .{ .operation = .insert, .text = "34" },
+ };
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testDiffCleanupEfficiency,
+ .{
+ dmp,
+ .{ .input = dslice, .expected = dslice },
+ },
+ );
+ }
+ { // Four-edit elimination.
+ const dslice: []const Diff = &.{
+ .{ .operation = .delete, .text = "ab" },
+ .{ .operation = .insert, .text = "12" },
+ .{ .operation = .equal, .text = "xyz" },
+ .{ .operation = .delete, .text = "cd" },
+ .{ .operation = .insert, .text = "34" },
+ };
+ const d_after: []const Diff = &.{
+ .{ .operation = .delete, .text = "abxyzcd" },
+ .{ .operation = .insert, .text = "12xyz34" },
+ };
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testDiffCleanupEfficiency,
+ .{
+ dmp,
+ .{ .input = dslice, .expected = d_after },
+ },
+ );
+ }
+ { // Three-edit elimination.
+ const dslice: []const Diff = &.{
+ .{ .operation = .insert, .text = "12" },
+ .{ .operation = .equal, .text = "x" },
+ .{ .operation = .delete, .text = "cd" },
+ .{ .operation = .insert, .text = "34" },
+ };
+ const d_after: []const Diff = &.{
+ .{ .operation = .delete, .text = "xcd" },
+ .{ .operation = .insert, .text = "12x34" },
+ };
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testDiffCleanupEfficiency,
+ .{
+ dmp,
+ .{ .input = dslice, .expected = d_after },
+ },
+ );
+ }
+ { // Backpass elimination.
+ const dslice: []const Diff = &.{
+ .{ .operation = .delete, .text = "ab" },
+ .{ .operation = .insert, .text = "12" },
+ .{ .operation = .equal, .text = "xy" },
+ .{ .operation = .insert, .text = "34" },
+ .{ .operation = .equal, .text = "z" },
+ .{ .operation = .delete, .text = "cd" },
+ .{ .operation = .insert, .text = "56" },
+ };
+ const d_after: []const Diff = &.{
+ .{ .operation = .delete, .text = "abxyzcd" },
+ .{ .operation = .insert, .text = "12xy34z56" },
+ };
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testDiffCleanupEfficiency,
+ .{
+ dmp,
+ .{ .input = dslice, .expected = d_after },
+ },
+ );
+ }
+ { // High cost elimination.
+ dmp.diff_edit_cost = 5;
+ const dslice: []const Diff = &.{
+ .{ .operation = .delete, .text = "ab" },
+ .{ .operation = .insert, .text = "12" },
+ .{ .operation = .equal, .text = "wxyz" },
+ .{ .operation = .delete, .text = "cd" },
+ .{ .operation = .insert, .text = "34" },
+ };
+ const d_after: []const Diff = &.{
+ .{ .operation = .delete, .text = "abwxyzcd" },
+ .{ .operation = .insert, .text = "12wxyz34" },
+ };
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testDiffCleanupEfficiency,
+ .{
+ dmp,
+ .{ .input = dslice, .expected = d_after },
+ },
+ );
+ dmp.diff_edit_cost = 4;
+ }
}
-test diffCleanupMerge {
- var arena = std.heap.ArenaAllocator.init(testing.allocator);
- defer arena.deinit();
+test "diff before and after text" {
+ const dmp = DiffMatchPatch{};
+ const allocator = testing.allocator;
+ const before = "The cat in the hat.";
+ const after = "The bat in the belfry.";
+ var diffs = try dmp.diff(allocator, before, after, false);
+ defer deinitDiffList(allocator, &diffs);
+ const before1 = try diffBeforeText(allocator, diffs);
+ defer allocator.free(before1);
+ const after1 = try diffAfterText(allocator, diffs);
+ defer allocator.free(after1);
+ try testing.expectEqualStrings(before, before1);
+ try testing.expectEqualStrings(after, after1);
+}
- // Cleanup a messy diff.
- var diffs = DiffList{};
- try testing.expectEqualDeep(@as([]const Diff, &[0]Diff{}), diffs.items); // Null case
+test diffIndex {
+ const dmp = DiffMatchPatch{};
+ {
+ var diffs = try dmp.diff(
+ testing.allocator,
+ "The midnight train",
+ "The blue midnight train",
+ false,
+ );
+ defer deinitDiffList(testing.allocator, &diffs);
+ try testing.expectEqual(0, diffIndex(diffs, 0));
+ try testing.expectEqual(9, diffIndex(diffs, 4));
+ }
+ {
+ var diffs = try dmp.diff(
+ testing.allocator,
+ "Better still to live and learn",
+ "Better yet to learn and live",
+ false,
+ );
+ defer deinitDiffList(testing.allocator, &diffs);
+ try testing.expectEqual(11, diffIndex(diffs, 13));
+ try testing.expectEqual(20, diffIndex(diffs, 21));
+ }
+}
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .equal, .text = "a" },
- .{ .operation = .delete, .text = "b" },
- .{ .operation = .insert, .text = "c" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .equal, .text = "a" },
- .{ .operation = .delete, .text = "b" },
- .{ .operation = .insert, .text = "c" },
- }), diffs.items); // No change case
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .equal, .text = "a" },
- .{ .operation = .equal, .text = "b" },
- .{ .operation = .equal, .text = "c" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .equal, .text = "abc" },
- }), diffs.items); // Merge equalities
+test diffPrettyFormat {
+ const test_deco = DiffDecorations{
+ .delete_start = "<+>",
+ .delete_end = "+>",
+ .insert_start = "<->",
+ .insert_end = "->",
+ .equals_start = "<=>",
+ .equals_end = "=>",
+ };
+ const dmp = DiffMatchPatch{};
+ const allocator = std.testing.allocator;
+ var diffs = try dmp.diff(
+ allocator,
+ "A thing of beauty is a joy forever",
+ "Singular beauty is enjoyed forever",
+ false,
+ );
+ defer deinitDiffList(allocator, &diffs);
+ try diffCleanupSemantic(allocator, &diffs);
+ const out_text = try diffPrettyFormat(allocator, diffs, test_deco);
+ defer allocator.free(out_text);
+ try testing.expectEqualStrings(
+ "<+>A thing of+><->Singular-><=> beauty is =><+>a +><->en-><=>joy=><->ed-><=> forever=>",
+ out_text,
+ );
+}
- diffs.items.len = 0;
+fn testMapSubsetEquality(left: anytype, right: anytype) !void {
+ var map_iter = left.iterator();
+ while (map_iter.next()) |entry| {
+ const key = entry.key_ptr.*;
+ const value = entry.value_ptr.*;
+ try testing.expectEqual(value, right.get(key));
+ }
+}
+test "matchAlphabet" {
+ var map = std.AutoHashMap(u8, usize).init(testing.allocator);
+ defer map.deinit();
+ try map.put('a', 4);
+ try map.put('b', 2);
+ try map.put('c', 1);
+ var bitap_map = try matchAlphabet(testing.allocator, "abc");
+ defer bitap_map.deinit();
+ try testMapSubsetEquality(map, bitap_map);
+ map.clearRetainingCapacity();
+ try map.put('a', 37);
+ try map.put('b', 18);
+ try map.put('c', 8);
+ var bitap_map2 = try matchAlphabet(testing.allocator, "abcaba");
+ defer bitap_map2.deinit();
+ try testMapSubsetEquality(map, bitap_map2);
+}
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .delete, .text = "a" },
- .{ .operation = .delete, .text = "b" },
- .{ .operation = .delete, .text = "c" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .delete, .text = "abc" },
- }), diffs.items); // Merge deletions
+fn testMatchBitap(
+ allocator: Allocator,
+ dmp: DiffMatchPatch,
+ params: struct {
+ text: []const u8,
+ pattern: []const u8,
+ loc: usize,
+ expect: ?usize,
+ },
+) !void {
+ const best_loc = try dmp.matchBitap(
+ allocator,
+ params.text,
+ params.pattern,
+ params.loc,
+ );
+ try testing.expectEqual(params.expect, best_loc);
+}
- diffs.items.len = 0;
+test matchBitap {
+ var dmp = DiffMatchPatch{};
+ dmp.match_distance = 500;
+ dmp.match_threshold = 0.5;
+ // Exact match #1.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "fgh",
+ .loc = 5,
+ .expect = 5,
+ },
+ },
+ );
+ // Exact match #2.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "fgh",
+ .loc = 0,
+ .expect = 5,
+ },
+ },
+ );
+ // Fuzzy match #1
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "efxhi",
+ .loc = 0,
+ .expect = 4,
+ },
+ },
+ );
+ // Fuzzy match #2.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "cdefxyhijk",
+ .loc = 5,
+ .expect = 2,
+ },
+ },
+ );
+ // Fuzzy match #3.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "bxy",
+ .loc = 1,
+ .expect = null,
+ },
+ },
+ );
+ // Overflow.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "123456789xx0",
+ .pattern = "3456789x0",
+ .loc = 2,
+ .expect = 2,
+ },
+ },
+ );
+ //Before start match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdef",
+ .pattern = "xxabc",
+ .loc = 4,
+ .expect = 0,
+ },
+ },
+ );
+ //
+ // Beyond end match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdef",
+ .pattern = "defyy",
+ .loc = 4,
+ .expect = 3,
+ },
+ },
+ );
+ // Oversized pattern.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdef",
+ .pattern = "xabcdefy",
+ .loc = 0,
+ .expect = 0,
+ },
+ },
+ );
+ dmp.match_threshold = 0.4;
+ // Threshold #1.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "efxyhi",
+ .loc = 1,
+ .expect = 4,
+ },
+ },
+ );
+ dmp.match_threshold = 0.3;
+ // Threshold #2.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "efxyhi",
+ .loc = 1,
+ .expect = null,
+ },
+ },
+ );
+ dmp.match_threshold = 0.0;
+ // Threshold #3.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijk",
+ .pattern = "bcdef",
+ .loc = 1,
+ .expect = 1,
+ },
+ },
+ );
+ dmp.match_threshold = 0.5;
+ // Multiple select #1.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdexyzabcde",
+ .pattern = "abccde",
+ .loc = 5,
+ .expect = 8,
+ },
+ },
+ );
+ dmp.match_distance = 10; // Strict location.
+ // Distance test #1.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijklmnopqrstuvwxyz",
+ .pattern = "abcdefg",
+ .loc = 1,
+ .expect = 0,
+ },
+ },
+ );
+ // Distance test #2.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijklmnopqrstuvwxyz",
+ .pattern = "abcdxxefg",
+ .loc = 1,
+ .expect = 0,
+ },
+ },
+ );
+ dmp.match_distance = 1000; // Loose location.
+ // Distance test #3.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMatchBitap,
+ .{
+ dmp,
+ .{
+ .text = "abcdefghijklmnopqrstuvwxyz",
+ .pattern = "abcdefg",
+ .loc = 24,
+ .expect = 0,
+ },
+ },
+ );
+}
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .insert, .text = "a" },
- .{ .operation = .insert, .text = "b" },
- .{ .operation = .insert, .text = "c" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .insert, .text = "abc" },
- }), diffs.items); // Merge insertions
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .delete, .text = "a" },
- .{ .operation = .insert, .text = "b" },
- .{ .operation = .delete, .text = "c" },
- .{ .operation = .insert, .text = "d" },
- .{ .operation = .equal, .text = "e" },
- .{ .operation = .equal, .text = "f" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .delete, .text = "ac" },
- .{ .operation = .insert, .text = "bd" },
- .{ .operation = .equal, .text = "ef" },
- }), diffs.items); // Merge interweave
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .delete, .text = "a" },
- .{ .operation = .insert, .text = "abc" },
- .{ .operation = .delete, .text = "dc" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .equal, .text = "a" },
- .{ .operation = .delete, .text = "d" },
- .{ .operation = .insert, .text = "b" },
- .{ .operation = .equal, .text = "c" },
- }), diffs.items); // Prefix and suffix detection
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .equal, .text = "x" },
- .{ .operation = .delete, .text = "a" },
- .{ .operation = .insert, .text = "abc" },
- .{ .operation = .delete, .text = "dc" },
- .{ .operation = .equal, .text = "y" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .equal, .text = "xa" },
- .{ .operation = .delete, .text = "d" },
- .{ .operation = .insert, .text = "b" },
- .{ .operation = .equal, .text = "cy" },
- }), diffs.items); // Prefix and suffix detection with equalities
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .equal, .text = "a" },
- .{ .operation = .insert, .text = "ba" },
- .{ .operation = .equal, .text = "c" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .insert, .text = "ab" },
- .{ .operation = .equal, .text = "ac" },
- }), diffs.items); // Slide edit left
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- .{ .operation = .equal, .text = "c" },
- .{ .operation = .insert, .text = "ab" },
- .{ .operation = .equal, .text = "a" },
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- .{ .operation = .equal, .text = "ca" },
- .{ .operation = .insert, .text = "ba" },
- }), diffs.items); // Slide edit right
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- Diff.init(.equal, "a"),
- Diff.init(.delete, "b"),
- Diff.init(.equal, "c"),
- Diff.init(.delete, "ac"),
- Diff.init(.equal, "x"),
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- Diff.init(.delete, "abc"),
- Diff.init(.equal, "acx"),
- }), diffs.items); // Slide edit left recursive
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- Diff.init(.equal, "x"),
- Diff.init(.delete, "ca"),
- Diff.init(.equal, "c"),
- Diff.init(.delete, "b"),
- Diff.init(.equal, "a"),
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- Diff.init(.equal, "xca"),
- Diff.init(.delete, "cba"),
- }), diffs.items); // Slide edit right recursive
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- Diff.init(.delete, "b"),
- Diff.init(.insert, "ab"),
- Diff.init(.equal, "c"),
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- Diff.init(.insert, "a"),
- Diff.init(.equal, "bc"),
- }), diffs.items); // Empty merge
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &[_]Diff{
- Diff.init(.equal, ""),
- Diff.init(.insert, "a"),
- Diff.init(.equal, "b"),
- });
- try diffCleanupMerge(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{
- Diff.init(.insert, "a"),
- Diff.init(.equal, "b"),
- }), diffs.items); // Empty equality
+test matchMain {
+ var dmp = DiffMatchPatch{};
+ dmp.match_threshold = 0.5;
+ dmp.match_distance = 100;
+ const allocator = testing.allocator;
+ // Equality.
+ try testing.expectEqual(0, dmp.matchMain(
+ allocator,
+ "abcdefg",
+ "abcdefg",
+ 1000,
+ ));
+ // Null text
+ try testing.expectEqual(null, dmp.matchMain(
+ allocator,
+ "",
+ "abcdefg",
+ 1,
+ ));
+ // Null pattern.
+ try testing.expectEqual(3, dmp.matchMain(
+ allocator,
+ "abcdefg",
+ "",
+ 3,
+ ));
+ // Exact match.
+ try testing.expectEqual(3, dmp.matchMain(
+ allocator,
+ "abcdefg",
+ "de",
+ 3,
+ ));
+ // Beyond end match.
+ try testing.expectEqual(3, dmp.matchMain(
+ allocator,
+ "abcdef",
+ "defy",
+ 4,
+ ));
+
+ // Oversized pattern.
+ try testing.expectEqual(0, dmp.matchMain(
+ allocator,
+ "abcdef",
+ "abcdefy",
+ 0,
+ ));
+ dmp.match_threshold = 0.7;
+ // Complex match.
+ try testing.expectEqual(4, dmp.matchMain(
+ allocator,
+ "I am the very model of a modern major general.",
+ " that berry ",
+ 5,
+ ));
+ dmp.match_threshold = 0.5;
}
-test diffCleanupSemanticLossless {
- var arena = std.heap.ArenaAllocator.init(testing.allocator);
- defer arena.deinit();
+fn testPatchToText(allocator: Allocator) !void {
+ //
+ var p: Patch = Patch{
+ .start1 = 20,
+ .start2 = 21,
+ .length1 = 18,
+ .length2 = 17,
+ .diffs = try sliceToDiffList(allocator, &.{
+ .{ .operation = .equal, .text = "jump" },
+ .{ .operation = .delete, .text = "s" },
+ .{ .operation = .insert, .text = "ed" },
+ .{ .operation = .equal, .text = " over " },
+ .{ .operation = .delete, .text = "the" },
+ .{ .operation = .insert, .text = "a" },
+ .{ .operation = .equal, .text = "\nlaz" },
+ }),
+ };
+ defer p.deinit(allocator);
+ const strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n";
+ const patch_str = try p.asText(allocator);
+ defer allocator.free(patch_str);
+ try testing.expectEqualStrings(strp, patch_str);
+}
- var diffs = DiffList{};
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[0]Diff{}), diffs.items); // Null case
+test "patch to text" {
+ try std.testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchToText,
+ .{},
+ );
+}
- diffs.items.len = 0;
+fn testPatchRoundTrip(allocator: Allocator, patch_in: []const u8) !void {
+ var patches = try patchFromText(allocator, patch_in);
+ defer deinitPatchList(allocator, &patches);
+ const patch_out = try patchToText(allocator, patches);
+ defer allocator.free(patch_out);
+ try testing.expectEqualStrings(patch_in, patch_out);
+}
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "AAA\r\n\r\nBBB"),
- Diff.init(.insert, "\r\nDDD\r\n\r\nBBB"),
- Diff.init(.equal, "\r\nEEE"),
- });
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &.{
- Diff.init(.equal, "AAA\r\n\r\n"),
- Diff.init(.insert, "BBB\r\nDDD\r\n\r\n"),
- Diff.init(.equal, "BBB\r\nEEE"),
- }), diffs.items);
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "AAA\r\nBBB"),
- Diff.init(.insert, " DDD\r\nBBB"),
- Diff.init(.equal, " EEE"),
- });
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &.{
- Diff.init(.equal, "AAA\r\n"),
- Diff.init(.insert, "BBB DDD\r\n"),
- Diff.init(.equal, "BBB EEE"),
- }), diffs.items);
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "The c"),
- Diff.init(.insert, "ow and the c"),
- Diff.init(.equal, "at."),
- });
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &.{
- Diff.init(.equal, "The "),
- Diff.init(.insert, "cow and the "),
- Diff.init(.equal, "cat."),
- }), diffs.items);
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "The-c"),
- Diff.init(.insert, "ow-and-the-c"),
- Diff.init(.equal, "at."),
- });
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &.{
- Diff.init(.equal, "The-"),
- Diff.init(.insert, "cow-and-the-"),
- Diff.init(.equal, "cat."),
- }), diffs.items);
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "a"),
- Diff.init(.delete, "a"),
- Diff.init(.equal, "ax"),
- });
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &.{
- Diff.init(.delete, "a"),
- Diff.init(.equal, "aax"),
- }), diffs.items);
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "xa"),
- Diff.init(.delete, "a"),
- Diff.init(.equal, "a"),
- });
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &.{
- Diff.init(.equal, "xaa"),
- Diff.init(.delete, "a"),
- }), diffs.items);
-
- diffs.items.len = 0;
-
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "The xxx. The "),
- Diff.init(.insert, "zzz. The "),
- Diff.init(.equal, "yyy."),
- });
- try diffCleanupSemanticLossless(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &.{
- Diff.init(.equal, "The xxx."),
- Diff.init(.insert, " The zzz."),
- Diff.init(.equal, " The yyy."),
- }), diffs.items);
+test "workshop" {
+ try testPatchRoundTrip(
+ testing.allocator,
+ "@@ -0,0 +1,3 @@\n+abc\n@@ -0,0 +1,3 @@\n+abc\n",
+ );
}
-fn rebuildtexts(allocator: std.mem.Allocator, diffs: DiffList) ![2][]const u8 {
- var text = [2]std.ArrayList(u8){
- std.ArrayList(u8).init(allocator),
- std.ArrayList(u8).init(allocator),
- };
+test "patch from text" {
+ const allocator = testing.allocator;
+ var p0 = try patchFromText(allocator, "");
+ defer deinitPatchList(allocator, &p0);
+ try testing.expectEqual(0, p0.items.len);
+ try std.testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchRoundTrip,
+ .{"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"},
+ );
+ try std.testing.checkAllAllocationFailures(
+ allocator,
+ testPatchRoundTrip,
+ .{"@@ -1 +1 @@\n-a\n+b\n"},
+ );
+ try std.testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchRoundTrip,
+ .{"@@ -1,3 +0,0 @@\n-abc\n"},
+ );
+ try std.testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchRoundTrip,
+ .{"@@ -0,0 +1,3 @@\n+abc\n"},
+ );
+ try std.testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchRoundTrip,
+ .{"@@ -0,0 +1,3 @@\n+abc\n@@ -0,0 +1,3 @@\n+abc\n"},
+ );
+}
- for (diffs.items) |myDiff| {
- if (myDiff.operation != .insert) {
- try text[0].appendSlice(myDiff.text);
- }
- if (myDiff.operation != .delete) {
- try text[1].appendSlice(myDiff.text);
+fn testBadPatchString(allocator: Allocator, patch: []const u8) !void {
+ _ = patchFromText(allocator, patch) catch |e| {
+ switch (e) {
+ error.OutOfMemory => return error.OutOfMemory,
+ else => {
+ try testing.expectEqual(error.BadPatchString, e);
+ },
}
- }
- return .{
- try text[0].toOwnedSlice(),
- try text[1].toOwnedSlice(),
};
}
-test diffBisect {
- var arena = std.heap.ArenaAllocator.init(talloc);
- defer arena.deinit();
+test "error.BadPatchString" {
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"Bad\nPatch\nString\n"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ foo"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ +no"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -no"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -1,no"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ !1,no"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -1,3 +???"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -1,no"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -1,3 +4,5 ##"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -1,10??"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -1,10 ?"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@@ -1,3 +4,5 @!"},
+ );
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@@ -1,3 +4,5 +add\n@!"},
+ );
+ try std.testing.checkAllAllocationFailures(
+ testing.allocator,
+ testBadPatchString,
+ .{"@@ -0,0 +1,3 @@\n+abc\n@@ -0,0 +1,3 @@\n+abc\n!!!"},
+ );
+}
- // Normal.
- const a = "cat";
- const b = "map";
- // Since the resulting diff hasn't been normalized, it would be ok if
- // the insertion and deletion pairs are swapped.
- // If the order changes, tweak this test as required.
- var diffs = DiffList{};
- defer diffs.deinit(arena.allocator());
- var this = default;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "c"),
- Diff.init(.insert, "m"),
- Diff.init(.equal, "a"),
- Diff.init(.delete, "t"),
- Diff.init(.insert, "p"),
- });
- // Travis TODO not sure if maxInt(u64) is correct for DateTime.MaxValue
- try testing.expectEqualDeep(diffs, try this.diffBisect(arena.allocator(), a, b, std.math.maxInt(u64))); // Normal.
-
- // Timeout.
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "cat"),
- Diff.init(.insert, "map"),
- });
- // Travis TODO not sure if 0 is correct for DateTime.MinValue
- try testing.expectEqualDeep(diffs, try this.diffBisect(arena.allocator(), a, b, 0)); // Timeout.
+fn testPatchAddContext(
+ allocator: Allocator,
+ dmp: DiffMatchPatch,
+ patch_text: []const u8,
+ text: []const u8,
+ expect: []const u8,
+) !void {
+ _, var patch = try patchFromHeader(allocator, patch_text);
+ defer patch.deinit(allocator);
+ const patch_og = try patch.asText(allocator);
+ defer allocator.free(patch_og);
+ try testing.expectEqualStrings(patch_text, patch_og);
+ try dmp.patchAddContext(allocator, &patch, text);
+ const patch_out = try patch.asText(allocator);
+ defer allocator.free(patch_out);
+ try testing.expectEqualStrings(expect, patch_out);
}
-const talloc = testing.allocator;
-test diff {
- var arena = std.heap.ArenaAllocator.init(talloc);
- defer arena.deinit();
+test "testPatchAddContext" {
+ const allocator = testing.allocator;
+ var dmp = DiffMatchPatch{};
+ dmp.patch_margin = 4;
+ // Simple case.
+ try std.testing.checkAllAllocationFailures(
+ allocator,
+ testPatchAddContext,
+ .{
+ dmp,
+ "@@ -21,4 +21,10 @@\n-jump\n+somersault\n",
+ "The quick brown fox jumps over the lazy dog.",
+ "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n",
+ },
+ );
+ // Not enough trailing context.
+ try std.testing.checkAllAllocationFailures(
+ allocator,
+ testPatchAddContext,
+ .{
+ dmp,
+ "@@ -21,4 +21,10 @@\n-jump\n+somersault\n",
+ "The quick brown fox jumps.",
+ "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n",
+ },
+ );
+ // Not enough leading context.
+ try std.testing.checkAllAllocationFailures(
+ allocator,
+ testPatchAddContext,
+ .{
+ dmp,
+ "@@ -3 +3,2 @@\n-e\n+at\n",
+ "The quick brown fox jumps.",
+ "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n",
+ },
+ );
+ // Ambiguity.
+ try std.testing.checkAllAllocationFailures(
+ allocator,
+ testPatchAddContext,
+ .{
+ dmp,
+ "@@ -3 +3,2 @@\n-e\n+at\n",
+ "The quick brown fox jumps. The quick brown fox crashes.",
+ "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n",
+ },
+ );
+ // Unicode
+ try std.testing.checkAllAllocationFailures(
+ allocator,
+ testPatchAddContext,
+ .{
+ dmp,
+ "@@ -9,6 +10,3 @@\n-remove\n+add\n",
+ "⊗⊘⊙remove⊙⊘⊗",
+ \\@@ -3,18 +4,15 @@
+ \\ %E2%8A%98%E2%8A%99
+ \\-remove
+ \\+add
+ \\ %E2%8A%99%E2%8A%98
+ \\
+ },
+ );
+}
- // Perform a trivial diff.
- var diffs = DiffList{};
- defer diffs.deinit(arena.allocator());
- var this = DiffMatchPatch{};
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "", "", false)).items); // diff: Null case.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{Diff.init(.equal, "abc")});
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "abc", "abc", false)).items); // diff: Equality.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.equal, "ab"), Diff.init(.insert, "123"), Diff.init(.equal, "c") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "abc", "ab123c", false)).items); // diff: Simple insertion.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.equal, "a"), Diff.init(.delete, "123"), Diff.init(.equal, "bc") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "a123bc", "abc", false)).items); // diff: Simple deletion.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.equal, "a"), Diff.init(.insert, "123"), Diff.init(.equal, "b"), Diff.init(.insert, "456"), Diff.init(.equal, "c") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "abc", "a123b456c", false)).items); // diff: Two insertions.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.equal, "a"), Diff.init(.delete, "123"), Diff.init(.equal, "b"), Diff.init(.delete, "456"), Diff.init(.equal, "c") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "a123b456c", "abc", false)).items); // diff: Two deletions.
-
- // Perform a real diff.
- // Switch off the timeout.
- this.diff_timeout = 0;
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.delete, "a"), Diff.init(.insert, "b") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "a", "b", false)).items); // diff: Simple case #1.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.delete, "Apple"), Diff.init(.insert, "Banana"), Diff.init(.equal, "s are a"), Diff.init(.insert, "lso"), Diff.init(.equal, " fruit.") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "Apples are a fruit.", "Bananas are also fruit.", false)).items); // diff: Simple case #2.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.delete, "a"), Diff.init(.insert, "\u{0680}"), Diff.init(.equal, "x"), Diff.init(.delete, "\t"), Diff.init(.insert, "\x00") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "ax\t", "\u{0680}x\x00", false)).items); // diff: Simple case #3.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.delete, "1"), Diff.init(.equal, "a"), Diff.init(.delete, "y"), Diff.init(.equal, "b"), Diff.init(.delete, "2"), Diff.init(.insert, "xab") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "1ayb2", "abxab", false)).items); // diff: Overlap #1.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.insert, "xaxcx"), Diff.init(.equal, "abc"), Diff.init(.delete, "y") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "abcy", "xaxcxabc", false)).items); // diff: Overlap #2.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.delete, "ABCD"), Diff.init(.equal, "a"), Diff.init(.delete, "="), Diff.init(.insert, "-"), Diff.init(.equal, "bcd"), Diff.init(.delete, "="), Diff.init(.insert, "-"), Diff.init(.equal, "efghijklmnopqrs"), Diff.init(.delete, "EFGHIJKLMNOefg") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false)).items); // diff: Overlap #3.
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{ Diff.init(.insert, " "), Diff.init(.equal, "a"), Diff.init(.insert, "nd"), Diff.init(.equal, " [[Pennsylvania]]"), Diff.init(.delete, " and [[New") });
- try testing.expectEqualDeep(diffs.items, (try this.diff(arena.allocator(), "a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false)).items); // diff: Large equality.
-
- this.diff_timeout = 100; // 100ms
- // Increase the text lengths by 1024 times to ensure a timeout.
+fn testMakePatch(allocator: Allocator) !void {
+ var dmp = DiffMatchPatch{};
+ dmp.match_max_bits = 32; // Need this for compat with translated tests
+ var null_patch = try dmp.diffAndMakePatch(allocator, "", "");
+ defer deinitPatchList(allocator, &null_patch);
+ const null_patch_text = try patchToText(allocator, null_patch);
+ defer allocator.free(null_patch_text);
+ try testing.expectEqualStrings("", null_patch_text);
+ const text1 = "The quick brown fox jumps over the lazy dog.";
+ const text2 = "That quick brown fox jumped over a lazy dog.";
+ { // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context.
+ const expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n";
+ var patches = try dmp.diffAndMakePatch(allocator, text2, text1);
+ defer deinitPatchList(allocator, &patches);
+ const patch_text = try patchToText(allocator, patches);
+ defer allocator.free(patch_text);
+ try testing.expectEqualStrings(expectedPatch, patch_text);
+ }
{
- const a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n" ** 1024;
- const b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n" ** 1024;
- const start_time = std.time.milliTimestamp();
- _ = try this.diff(arena.allocator(), a, b, false); // Travis - TODO not sure what the third arg should be
- const end_time = std.time.milliTimestamp();
- // Test that we took at least the timeout period.
- try testing.expect(this.diff_timeout <= end_time - start_time); // diff: Timeout min.
- // Test that we didn't take forever (be forgiving).
- // Theoretically this test could fail very occasionally if the
- // OS task swaps or locks up for a second at the wrong moment.
- try testing.expect((this.diff_timeout) * 10000 * 2 > end_time - start_time); // diff: Timeout max.
- this.diff_timeout = 0;
+ const expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n";
+ var patches = try dmp.diffAndMakePatch(allocator, text1, text2);
+ defer deinitPatchList(allocator, &patches);
+ const patch_text = try patchToText(allocator, patches);
+ defer allocator.free(patch_text);
+ try testing.expectEqualStrings(expectedPatch, patch_text);
+ var diffs = try dmp.diff(allocator, text1, text2, false);
+ defer deinitDiffList(allocator, &diffs);
+ var patches2 = try dmp.makePatch(allocator, text1, diffs);
+ defer deinitPatchList(allocator, &patches2);
+ const patch_text_2 = try patchToText(allocator, patches);
+ defer allocator.free(patch_text_2);
+ try testing.expectEqualStrings(expectedPatch, patch_text_2);
}
+ const expectedPatch2 = "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n";
{
- // Test the linemode speedup.
- // Must be long to pass the 100 char cutoff.
- const a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
- const b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n";
- try testing.expectEqualDeep(try this.diff(arena.allocator(), a, b, true), try this.diff(arena.allocator(), a, b, false)); // diff: Simple line-mode.
+ var patches = try dmp.diffAndMakePatch(
+ allocator,
+ "`1234567890-=[]\\;',./",
+ "~!@#$%^&*()_+{}|:\"<>?",
+ );
+ defer deinitPatchList(allocator, &patches);
+ const patch_text = try patchToText(allocator, patches);
+ defer allocator.free(patch_text);
+ try testing.expectEqualStrings(expectedPatch2, patch_text);
}
{
- const a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890";
- const b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij";
- try testing.expectEqualDeep(try this.diff(arena.allocator(), a, b, true), try this.diff(arena.allocator(), a, b, false)); // diff: Single line-mode.
+ var diffs = try sliceToDiffList(allocator, &.{
+ .{ .operation = .delete, .text = "`1234567890-=[]\\;',./" },
+ .{ .operation = .insert, .text = "~!@#$%^&*()_+{}|:\"<>?" },
+ });
+ defer deinitDiffList(allocator, &diffs);
+ var patches = try dmp.makePatchFromDiffs(allocator, diffs);
+ defer deinitPatchList(allocator, &patches);
+ for (patches.items[0].diffs.items, 0..) |a_diff, idx| {
+ try testing.expect(a_diff.eql(diffs.items[idx]));
+ }
+ }
+ {
+ const text1a = "abcdef" ** 100;
+ const text2a = text1a ++ "123";
+ const expected_patch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n";
+ var patches = try dmp.diffAndMakePatch(allocator, text1a, text2a);
+ defer deinitPatchList(allocator, &patches);
+ const patch_text = try patchToText(allocator, patches);
+ defer allocator.free(patch_text);
+ try testing.expectEqualStrings(expected_patch, patch_text);
}
+}
- const a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n";
- const b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n";
- const texts_linemode = try rebuildtexts(arena.allocator(), try this.diff(arena.allocator(), a, b, true));
- defer {
- arena.allocator().free(texts_linemode[0]);
- arena.allocator().free(texts_linemode[1]);
+test makePatch {
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testMakePatch,
+ .{},
+ );
+}
+
+fn testPatchSplitMax(allocator: Allocator) !void {
+ var dmp = DiffMatchPatch{};
+ // TODO get some tests which cover the max split we actually use: bitsize(usize)
+ dmp.match_max_bits = 32;
+ {
+ var patches = try dmp.diffAndMakePatch(
+ allocator,
+ "abcdefghijklmnopqrstuvwxyz01234567890",
+ "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0",
+ );
+ defer deinitPatchList(allocator, &patches);
+ const expected_patch = "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n";
+ try dmp.patchSplitMax(allocator, &patches);
+ const patch_text = try patchToText(allocator, patches);
+ defer allocator.free(patch_text);
+ try testing.expectEqualStrings(expected_patch, patch_text);
}
- const texts_textmode = try rebuildtexts(arena.allocator(), try this.diff(arena.allocator(), a, b, false));
- defer {
- arena.allocator().free(texts_textmode[0]);
- arena.allocator().free(texts_textmode[1]);
+ {
+ var patches = try dmp.diffAndMakePatch(
+ allocator,
+ "abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz",
+ "abcdefuvwxyz",
+ );
+ defer deinitPatchList(allocator, &patches);
+ const text_before = try patchToText(allocator, patches);
+ defer allocator.free(text_before);
+ try dmp.patchSplitMax(allocator, &patches);
+ const text_after = try patchToText(allocator, patches);
+ defer allocator.free(text_after);
+ try testing.expectEqualStrings(text_before, text_after);
+ }
+ {
+ var patches = try dmp.diffAndMakePatch(
+ allocator,
+ "1234567890123456789012345678901234567890123456789012345678901234567890",
+ "abc",
+ );
+ defer deinitPatchList(allocator, &patches);
+ const pre_patch_text = try patchToText(allocator, patches);
+ defer allocator.free(pre_patch_text);
+ try dmp.patchSplitMax(allocator, &patches);
+ const patch_text = try patchToText(allocator, patches);
+ defer allocator.free(patch_text);
+ try testing.expectEqualStrings(
+ "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n",
+ patch_text,
+ );
+ }
+ {
+ var patches = try dmp.diffAndMakePatch(
+ allocator,
+ "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1",
+ "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1",
+ );
+ defer deinitPatchList(allocator, &patches);
+ try dmp.patchSplitMax(allocator, &patches);
+ const patch_text = try patchToText(allocator, patches);
+ defer allocator.free(patch_text);
+ try testing.expectEqualStrings(
+ "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n",
+ patch_text,
+ );
}
- try testing.expectEqualDeep(texts_textmode, texts_linemode); // diff: Overlap line-mode.
+}
- // Test null inputs -- not needed because nulls can't be passed in C#.
+test patchSplitMax {
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchSplitMax,
+ .{},
+ );
+ try testPatchSplitMax(testing.allocator);
}
-test diffCleanupSemantic {
- var arena = std.heap.ArenaAllocator.init(talloc);
- defer arena.deinit();
+fn testPatchAddPadding(
+ allocator: Allocator,
+ before: []const u8,
+ after: []const u8,
+ expect_before: []const u8,
+ expect_after: []const u8,
+) !void {
+ const dmp = DiffMatchPatch{};
+ var patches = try dmp.diffAndMakePatch(allocator, before, after);
+ defer deinitPatchList(allocator, &patches);
+ const patch_text_before = try patchToText(allocator, patches);
+ defer allocator.free(patch_text_before);
+ try testing.expectEqualStrings(expect_before, patch_text_before);
+ const codes = try dmp.patchAddPadding(allocator, &patches);
+ allocator.free(codes);
+ const patch_text_after = try patchToText(allocator, patches);
+ defer allocator.free(patch_text_after);
+ try testing.expectEqualStrings(expect_after, patch_text_after);
+}
+test patchAddPadding {
+ // Both edges full.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchAddPadding,
+ .{
+ "",
+ "test",
+ "@@ -0,0 +1,4 @@\n+test\n",
+ "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n",
+ },
+ );
+ // Both edges partial.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchAddPadding,
+ .{
+ "XY",
+ "XtestY",
+ "@@ -1,2 +1,6 @@\n X\n+test\n Y\n",
+ "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n",
+ },
+ );
+ // Both edges none.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchAddPadding,
+ .{
+ "XXXXYYYY",
+ "XXXXtestYYYY",
+ "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n",
+ "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n",
+ },
+ );
+}
+
+fn testPatchApply(
+ allocator: Allocator,
+ dmp: DiffMatchPatch,
+ before: []const u8,
+ after: []const u8,
+ apply_to: []const u8,
+ expect: []const u8,
+ all_applied: bool,
+) !void {
+ var patches = try dmp.diffAndMakePatch(allocator, before, after);
+ defer deinitPatchList(allocator, &patches);
+ const result, const success = try dmp.patchApply(allocator, &patches, apply_to);
+ defer allocator.free(result);
+ try testing.expectEqual(all_applied, success);
+ try testing.expectEqualStrings(expect, result);
+}
- // Cleanup semantically trivial equalities.
+test "testPatchApply" {
+ // These tests differ from the source, because we just return one
+ // bool for if all patches were successfully applied or not.
+ var dmp = DiffMatchPatch{};
+ dmp.match_distance = 1000;
+ dmp.match_threshold = 0.5;
+ dmp.patch_delete_threshold = 0.5;
+ dmp.match_max_bits = 32; // Necessary to get the correct legacy behavior
// Null case.
- var diffs = DiffList{};
- defer diffs.deinit(arena.allocator());
- // var this = default;
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqual(@as(usize, 0), diffs.items.len); // Null case
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "ab"),
- Diff.init(.insert, "cd"),
- Diff.init(.equal, "12"),
- Diff.init(.delete, "e"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // No elimination #1
- Diff.init(.delete, "ab"),
- Diff.init(.insert, "cd"),
- Diff.init(.equal, "12"),
- Diff.init(.delete, "e"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "abc"),
- Diff.init(.insert, "ABC"),
- Diff.init(.equal, "1234"),
- Diff.init(.delete, "wxyz"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // No elimination #2
- Diff.init(.delete, "abc"),
- Diff.init(.insert, "ABC"),
- Diff.init(.equal, "1234"),
- Diff.init(.delete, "wxyz"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "a"),
- Diff.init(.equal, "b"),
- Diff.init(.delete, "c"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // Simple elimination
- Diff.init(.delete, "abc"),
- Diff.init(.insert, "b"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "ab"),
- Diff.init(.equal, "cd"),
- Diff.init(.delete, "e"),
- Diff.init(.equal, "f"),
- Diff.init(.insert, "g"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // Backpass elimination
- Diff.init(.delete, "abcdef"),
- Diff.init(.insert, "cdfg"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.insert, "1"),
- Diff.init(.equal, "A"),
- Diff.init(.delete, "B"),
- Diff.init(.insert, "2"),
- Diff.init(.equal, "_"),
- Diff.init(.insert, "1"),
- Diff.init(.equal, "A"),
- Diff.init(.delete, "B"),
- Diff.init(.insert, "2"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // Multiple elimination
- Diff.init(.delete, "AB_AB"),
- Diff.init(.insert, "1A2_1A2"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.equal, "The c"),
- Diff.init(.delete, "ow and the c"),
- Diff.init(.equal, "at."),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // Word boundaries
- Diff.init(.equal, "The "),
- Diff.init(.delete, "cow and the "),
- Diff.init(.equal, "cat."),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "abcxx"),
- Diff.init(.insert, "xxdef"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // No overlap elimination
- Diff.init(.delete, "abcxx"),
- Diff.init(.insert, "xxdef"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "abcxxx"),
- Diff.init(.insert, "xxxdef"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // Overlap elimination
- Diff.init(.delete, "abc"),
- Diff.init(.equal, "xxx"),
- Diff.init(.insert, "def"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "xxxabc"),
- Diff.init(.insert, "defxxx"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // Reverse overlap elimination
- Diff.init(.insert, "def"),
- Diff.init(.equal, "xxx"),
- Diff.init(.delete, "abc"),
- }), diffs.items);
-
- diffs.items.len = 0;
- try diffs.appendSlice(arena.allocator(), &.{
- Diff.init(.delete, "abcd1212"),
- Diff.init(.insert, "1212efghi"),
- Diff.init(.equal, "----"),
- Diff.init(.delete, "A3"),
- Diff.init(.insert, "3BC"),
- });
- try diffCleanupSemantic(arena.allocator(), &diffs);
- try testing.expectEqualDeep(@as([]const Diff, &[_]Diff{ // Two overlap eliminations
- Diff.init(.delete, "abcd"),
- Diff.init(.equal, "1212"),
- Diff.init(.insert, "efghi"),
- Diff.init(.equal, "----"),
- Diff.init(.delete, "A"),
- Diff.init(.equal, "3"),
- Diff.init(.insert, "BC"),
- }), diffs.items);
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "",
+ "",
+ "Hello World",
+ "Hello World",
+ true,
+ },
+ );
+ // Exact match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "The quick brown fox jumps over the lazy dog.",
+ "That quick brown fox jumped over a lazy dog.",
+ "The quick brown fox jumps over the lazy dog.",
+ "That quick brown fox jumped over a lazy dog.",
+ true,
+ },
+ );
+ // Partial match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "The quick brown fox jumps over the lazy dog.",
+ "That quick brown fox jumped over a lazy dog.",
+ "The quick red rabbit jumps over the tired tiger.",
+ "That quick red rabbit jumped over a tired tiger.",
+ true,
+ },
+ );
+ // Failed match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "The quick brown fox jumps over the lazy dog.",
+ "That quick brown fox jumped over a lazy dog.",
+ "I am the very model of a modern major general.",
+ "I am the very model of a modern major general.",
+ false,
+ },
+ );
+ // Big delete, small change.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y",
+ "xabcy",
+ true,
+ },
+ );
+ // Big delete, big change 1.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ "x12345678901234567890---------------++++++++++---------------12345678901234567890y",
+ "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y",
+ false,
+ },
+ );
+ dmp.patch_delete_threshold = 0.6;
+ // Big delete, big change 2.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "x1234567890123456789012345678901234567890123456789012345678901234567890y",
+ "xabcy",
+ "x12345678901234567890---------------++++++++++---------------12345678901234567890y",
+ "xabcy",
+ true,
+ },
+ );
+ dmp.patch_delete_threshold = 0.6;
+ dmp.match_threshold = 0.0;
+ dmp.match_distance = 0;
+ // Compensate for failed patch.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "abcdefghijklmnopqrstuvwxyz--------------------1234567890",
+ "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890",
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890",
+ false,
+ },
+ );
+ dmp.match_threshold = 0.5;
+ dmp.match_distance = 1000;
+ // Edge exact match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "",
+ "test",
+ "",
+ "test",
+ true,
+ },
+ );
+ // Near edge exact match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "XY",
+ "XtestY",
+ "XY",
+ "XtestY",
+ true,
+ },
+ );
+ // Edge partial match.
+ try testing.checkAllAllocationFailures(
+ testing.allocator,
+ testPatchApply,
+ .{
+ dmp,
+ "y",
+ "y123",
+ "x",
+ "x123",
+ true,
+ },
+ );
+}
+
+test "patching does not affect patches" {
+ const allocator = std.testing.allocator;
+ var dmp = DiffMatchPatch{};
+ dmp.match_distance = 1000;
+ dmp.match_threshold = 0.5;
+ dmp.patch_delete_threshold = 0.5;
+ dmp.match_max_bits = 32; // Need this so test #2 splits
+ var patches1 = try dmp.diffAndMakePatch(allocator, "", "test");
+ defer deinitPatchList(allocator, &patches1);
+ const patch1_str = try patchToText(allocator, patches1);
+ defer allocator.free(patch1_str);
+ const result1, _ = try dmp.patchApply(allocator, &patches1, "");
+ allocator.free(result1);
+ const patch1_str_after = try patchToText(allocator, patches1);
+ defer allocator.free(patch1_str_after);
+ try testing.expectEqualStrings(patch1_str, patch1_str_after);
+ var patches2 = try dmp.diffAndMakePatch(
+ allocator,
+ "The quick brown fox jumps over the lazy dog.",
+ "Woof",
+ );
+ defer deinitPatchList(allocator, &patches2);
+ const patch2_str = try patchToText(allocator, patches2);
+ defer allocator.free(patch2_str);
+ const result2, _ = try dmp.patchApply(
+ allocator,
+ &patches2,
+ "The quick brown fox jumps over the lazy dog.",
+ );
+ allocator.free(result2);
+ const patch2_str_after = try patchToText(allocator, patches2);
+ defer allocator.free(patch2_str_after);
+ try testing.expectEqualStrings(patch2_str, patch2_str_after);
}
diff --git a/build.zig b/build.zig
index 30802da..39db8fb 100644
--- a/build.zig
+++ b/build.zig
@@ -1,7 +1,47 @@
const std = @import("std");
pub fn build(b: *std.Build) void {
+ const target = b.standardTargetOptions(.{});
+ const optimize = b.standardOptimizeOption(.{});
+
_ = b.addModule("diffz", .{
.root_source_file = b.path("DiffMatchPatch.zig"),
+ .target = target,
+ .optimize = optimize,
+ });
+
+ // Run tests
+ const tests = b.addTest(.{
+ .name = "tests",
+ .root_source_file = b.path("DiffMatchPatch.zig"),
+ .target = target,
+ .optimize = optimize,
});
+ const step_tests = b.addRunArtifact(tests);
+
+ b.step("test", "Run diffz tests").dependOn(&step_tests.step);
+
+ const addOutputDirectoryArg = comptime if (@import("builtin").zig_version.order(.{ .major = 0, .minor = 13, .patch = 0 }) == .lt)
+ std.Build.Step.Run.addOutputFileArg
+ else
+ std.Build.Step.Run.addOutputDirectoryArg;
+
+ const run_kcov = b.addSystemCommand(&.{
+ "kcov",
+ "--clean",
+ "--exclude-line=unreachable,expect(false)",
+ });
+ run_kcov.addPrefixedDirectoryArg("--include-pattern=", b.path("."));
+ const coverage_output = addOutputDirectoryArg(run_kcov, ".");
+ run_kcov.addArtifactArg(tests);
+ run_kcov.enableTestRunnerMode();
+
+ const install_coverage = b.addInstallDirectory(.{
+ .source_dir = coverage_output,
+ .install_dir = .{ .custom = "coverage" },
+ .install_subdir = "",
+ });
+
+ const coverage_step = b.step("coverage", "Generate coverage (kcov must be installed)");
+ coverage_step.dependOn(&install_coverage.step);
}
diff --git a/build.zig.zon b/build.zig.zon
new file mode 100644
index 0000000..b4e0b4d
--- /dev/null
+++ b/build.zig.zon
@@ -0,0 +1,13 @@
+.{
+ .name = "diffz",
+ .version = "0.0.1",
+ .paths = .{
+ "DiffMatchPatch.zig",
+ ".gitattributes",
+ ".gitignore",
+ "LICENSE",
+ "README.md",
+ "build.zig.zon",
+ "build.zig",
+ },
+}
diff --git a/roadmap.md b/roadmap.md
new file mode 100644
index 0000000..d361f70
--- /dev/null
+++ b/roadmap.md
@@ -0,0 +1,68 @@
+# Roadmap
+
+- [✅] Port patch
+ - [✅] Add DiffMatchPatch object instead of @This() (which won't work)
+- [✅] Port match.
+- [✅] Port test coverage.
+- [✅] Unicode-aware `diffLineMode`.
+ - [✅] Coverage for all corner cases of preventing diff splits which aren't
+ on valid UTF-8 boundaries.
+ - [✅] Convert `line_array` to encode UTF-8 byte sequences and store `u21` keys
+ - [✅] Make the inner function accept a stream iterator, one which delivers the
+ entire string with boundaries (where applicable) at the end.
+- [ ] Refactor: the port currently treats Diffs and Patches as raw ArrayLists,
+ these should be proper Zig objects, with member functions, and probably
+ come in an Unmanaged and normal form.
+ - [?] Diff struct becomes Edit. Patch also needs a name, because Diff and
+ Patch should be the names of the user-facing structs. The name for
+ what a patch is in classic diff/patch programs is Hunk, so that's a
+ justifiable choice.
+ - [ ] DiffList and PatchList remain same, used internally.
+ - [ ] New Diff struct, and DiffUnmanaged.
+ - [ ] Namespaces subsequent operations on diffs.
+ - [ ] Same for Patch and PatchUnmanaged. These are little more than the
+ relevant DiffList, a DiffMatchPatch instance, and some decl functions,
+ plus the Allocator for managed versions.
+- [ ] Enhancements
+ - [ ] Extend Bitap algorithm to handle larger patches. The algorithm takes
+ `m * n` space, where `m` is unique bytes in the pattern and `n` is the
+ pattern length, so I think the idea of doing it up to 2048 bytes/bits
+ was optimistic on my part. But comptime-gated function specializations
+ for 64 (status quo), 128, and 256 bytes, would mean a lot less frobbing
+ and munging the patches internally. Performance implications expected
+ to be positive, if not hugely so. The algorithm is also amenable to
+ SIMD acceleration, although I'm not going to do that.
+ - [ ] `diffsForRegion`: provides every diff pertaining to a specific
+ region of `before`. Needs to also include how much overlap, if
+ any, the diff includes. Should have "borrow" and "copy"
+ versions. Signature being `diffsForRegion(diffs: DiffList, start: usize,`
+ `end: usize, ) ?DiffList`.
+ - [ ] Implement a delta format which doesn't suck so badly. I have copious
+ notes on this.
+ - [ ] I'd also like to break compatibility with the 'Unidiff' format, in a
+ less dramatic way. It's mostly the compulsive percent-encoding of
+ everything which doesn't fit in a URI, it's Googley [derogatory] and
+ a UTF-8 native patch format has no need for this. This would be a
+ separate, sucks-less text format, differentiable by its header,
+ decodes into a Patch in the same basic way. The legacy form is
+ already ported and should be kept.
+ - [ ] Add `Diff.differs() bool`, which checks if there are any differences
+ between the before and after text.
+ - [✅] Diff stream
+ - [✅] Use Unicode characters and codepoint indices - 32.
+ - [✅] Implement line diff as a stream.
+ - [✅] Also gives word diff, token diff, etc.
+- [ ] Histogram?
+ - [ ] Imara diff has an optimized histogram:
+ https://github.com/pascalkuthe/imara-diff
+ - [ ] Calculating the histogram while hashing the lines would be
+ straightforward, this could be comptime-gated, but probably
+ just a second copy of the munge function is fine.
+ - [ ] This one is getting into overkill territory perhaps.
+- [ ] POSIX-diff compatible patch output?
+ - [ ] This one seems pretty worthwhile to me. It would need to call line
+ mode without refining further, but everything else is fairly simple.
+- [ ] Delta functions? They aren't used internally. I favor ignoring the
+ legacy version and implementing a better one.
+
+Covers the bases.