From 000000b48f6e8cd193033ca24cb83b5804fad409 Mon Sep 17 00:00:00 2001
From: randymcmillan <randymcmillan@protonmail.com>
Date: Thu, 30 Apr 2026 11:26:54 -0400
Subject: [PATCH 1/4] fix: prevent UTF-8 slicing panic in Differ::fancy_replace

Address a thread panic at src/differ.rs:271:52 caused by byte-indexed
string slicing on multi-byte (UTF-8) character boundaries.

The previous implementation assumed character indices matched byte
offsets, leading to crashes when diffing lines containing emojis,
mathematical symbols, or non-ASCII characters.

Changes:
- Replaced direct byte slicing with character-aware indexing using .char_indices().
- Added a regression test 'test_utf8_intraline_diff_no_panic' to ensure
  stability when comparing strings with multi-byte characters.

Verified on:
- x86_64-unknown-linux-gnu
- aarch64-apple-darwin

03-000ef697
---
 src/differ.rs  | 13 +++++++++++--
 tests/tests.rs | 10 ++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/differ.rs b/src/differ.rs
index 5caef9a..0c896be 100644
--- a/src/differ.rs
+++ b/src/differ.rs
@@ -252,6 +252,15 @@ impl Differ {
         res
     }
 
+    fn split_at_char_boundary<'a>(line: &'a str, char_idx: usize) -> (&'a str, &'a str) {
+        let byte_idx = line
+            .char_indices()
+            .nth(char_idx)
+            .map(|(i, _)| i)
+            .unwrap_or(line.len());
+        line.split_at(byte_idx)
+    }
+
     fn qformat(
         &self,
         first_line: &str,
@@ -268,8 +277,8 @@ impl Differ {
         );
         common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
         common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
-        first_tags = first_tags.split_at(common).1.trim_right();
-        second_tags = second_tags.split_at(common).1.trim_right();
+        first_tags = Self::split_at_char_boundary(first_tags, common).1.trim_right();
+        second_tags = Self::split_at_char_boundary(second_tags, common).1.trim_right();
         let mut s = format!("- {}", first_line);
         res.push(s);
         if first_tags != "" {
diff --git a/tests/tests.rs b/tests/tests.rs
index 8f49de9..2024535 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -133,6 +133,16 @@ fn test_differ_restore() {
     assert_eq!(second_text, Differ::restore(&diff, 2));
 }
 
+#[test]
+fn test_differ_compare_utf8() {
+    let first_text = vec!["\tcrab 🦀\n"];
+    let second_text = vec!["\tcrab 🦞\n"];
+    let differ = Differ::new();
+    let result = differ.compare(&first_text, &second_text).join("");
+    assert!(result.contains("🦀"));
+    assert!(result.contains("🦞"));
+}
+
 #[test]
 fn test_unified_diff() {
     let first_text = "one two three four".split(" ").collect::<Vec<&str>>();

From 0000002d78a570de606308ca44b0d3a308149a2d Mon Sep 17 00:00:00 2001
From: randymcmillan <randymcmillan@protonmail.com>
Date: Thu, 30 Apr 2026 13:20:56 -0400
Subject: [PATCH 2/4] fix: use second tag line for qformat alignment

qformat now trims the shared prefix against both tag lines instead of
 checking first_tags twice, which keeps the generated ? lines aligned when the
 second tag line has different leading spaces.
Add a regression test for the alignment case.

00-0013c970
---
 src/differ.rs | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/differ.rs b/src/differ.rs
index 0c896be..89f24a6 100644
--- a/src/differ.rs
+++ b/src/differ.rs
@@ -276,7 +276,7 @@ impl Differ {
             count_leading(second_line, '\t'),
         );
         common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
-        common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' '));
+        common = cmp::min(common, count_leading(second_tags.split_at(common).0, ' '));
         first_tags = Self::split_at_char_boundary(first_tags, common).1.trim_right();
         second_tags = Self::split_at_char_boundary(second_tags, common).1.trim_right();
         let mut s = format!("- {}", first_line);
@@ -347,3 +347,18 @@ fn test_qformat() {
         ]
     );
 }
+
+#[test]
+fn test_qformat_uses_both_tag_lines_for_alignment() {
+    let differ = Differ::new();
+    let result = differ.qformat("\t\tabc\n", "\t\tadc\n", "  ^", " ^");
+    assert_eq!(
+        result,
+        vec![
+            "- \t\tabc\n",
+            "? \t ^\n",
+            "+ \t\tadc\n",
+            "? \t^\n",
+        ]
+    );
+}

From 000000b95b734f57488cef7166ce43a2ed92883a Mon Sep 17 00:00:00 2001
From: randymcmillan <randymcmillan@protonmail.com>
Date: Thu, 30 Apr 2026 13:25:59 -0400
Subject: [PATCH 3/4] fix: remove unnecessary mut in sequencematcher

Drop the redundant mut binding when building the second-sequence index.
 The Vec returned from or_insert_with is still mutated, but the binding itself
 does not need to be mutable.

02-000ca690
---
 src/sequencematcher.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sequencematcher.rs b/src/sequencematcher.rs
index 4157808..0f3434f 100644
--- a/src/sequencematcher.rs
+++ b/src/sequencematcher.rs
@@ -114,7 +114,7 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> {
         let second_sequence = self.second_sequence;
         let mut second_sequence_elements = HashMap::new();
         for (i, item) in second_sequence.iter().enumerate() {
-            let mut counter = second_sequence_elements
+            let counter = second_sequence_elements
                 .entry(item)
                 .or_insert_with(Vec::new);
             counter.push(i);

From 000000ddd3632365b2e20fec22c3a08b577ed543 Mon Sep 17 00:00:00 2001
From: randymcmillan <randymcmillan@protonmail.com>
Date: Thu, 30 Apr 2026 13:29:24 -0400
Subject: [PATCH 4/4] fix: rename utf8 intraline regression test

Rename the regression test to test_utf8_intraline_diff_no_panic so it
 matches the PR description and stays easy to search when debugging similar
 UTF-8 intraline diff issues.

02-0033b485
---
 tests/tests.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/tests.rs b/tests/tests.rs
index 2024535..8178f4d 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -134,7 +134,7 @@ fn test_differ_restore() {
 }
 
 #[test]
-fn test_differ_compare_utf8() {
+fn test_utf8_intraline_diff_no_panic() {
     let first_text = vec!["\tcrab 🦀\n"];
     let second_text = vec!["\tcrab 🦞\n"];
     let differ = Differ::new();