diff --git a/Cargo.toml b/Cargo.toml index 2edf6df..af3a3e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "difflib" -version = "0.4.0" +version = "0.5.1" authors = ["Dima Kudosh "] description = "Port of Python's difflib library to Rust." documentation = "https://github.com/DimaKudosh/difflib/wiki" diff --git a/README.md b/README.md index b881fd2..b4bbc1e 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ Simply add difflib to your dependencies block in Cargo.toml ```rust [dependencies] -difflib = "0.4.0" +difflib = "0.5.1" ``` ## Documentation diff --git a/src/differ.rs b/src/differ.rs index 5caef9a..f0ee9ff 100644 --- a/src/differ.rs +++ b/src/differ.rs @@ -1,6 +1,6 @@ -use sequencematcher::SequenceMatcher; +use crate::sequencematcher::SequenceMatcher; use std::cmp; -use utils::{count_leading, str_with_similar_chars}; +use crate::utils::{count_leading, str_with_similar_chars}; #[derive(Default)] pub struct Differ { @@ -252,6 +252,15 @@ impl Differ { res } + fn split_at_char_boundary<'a>(line: &'a str, char_idx: usize) -> (&'a str, &'a str) { + let byte_idx = line + .char_indices() + .nth(char_idx) + .map(|(i, _)| i) + .unwrap_or(line.len()); + line.split_at(byte_idx) + } + fn qformat( &self, first_line: &str, @@ -266,10 +275,10 @@ impl Differ { count_leading(first_line, '\t'), count_leading(second_line, '\t'), ); + common = cmp::min(common, count_leading(second_tags.split_at(common).0, ' ')); common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ')); - common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ')); - first_tags = first_tags.split_at(common).1.trim_right(); - second_tags = second_tags.split_at(common).1.trim_right(); + first_tags = Self::split_at_char_boundary(first_tags, common).1.trim_end(); + second_tags = Self::split_at_char_boundary(second_tags, common).1.trim_end(); let mut s = format!("- {}", first_line); res.push(s); if first_tags != "" { diff --git a/src/lib.rs b/src/lib.rs index ca6b1cc..a69a84a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,10 +2,10 @@ pub mod differ; pub mod sequencematcher; mod utils; -use sequencematcher::{Sequence, SequenceMatcher}; +use crate::sequencematcher::{Sequence, SequenceMatcher}; use std::collections::HashMap; use std::fmt::Display; -use utils::{format_range_context, format_range_unified}; +use crate::utils::{format_range_context, format_range_unified}; pub fn get_close_matches<'a>( word: &str, diff --git a/src/sequencematcher.rs b/src/sequencematcher.rs index 4157808..bb2a5c3 100644 --- a/src/sequencematcher.rs +++ b/src/sequencematcher.rs @@ -1,7 +1,7 @@ use std::cmp::{max, min}; use std::collections::HashMap; use std::hash::Hash; -use utils::calculate_ratio; +use crate::utils::calculate_ratio; #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord)] pub struct Match { @@ -114,7 +114,7 @@ impl<'a, T: Sequence> SequenceMatcher<'a, T> { let second_sequence = self.second_sequence; let mut second_sequence_elements = HashMap::new(); for (i, item) in second_sequence.iter().enumerate() { - let mut counter = second_sequence_elements + let counter = second_sequence_elements .entry(item) .or_insert_with(Vec::new); counter.push(i); diff --git a/tests/tests.rs b/tests/tests.rs index 8f49de9..d191c9f 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -133,6 +133,18 @@ fn test_differ_restore() { assert_eq!(second_text, Differ::restore(&diff, 2)); } +#[test] +fn test_differ_compare_utf8() { + let first_text = vec!["\tcrab 🦀\n"]; + let second_text = vec!["\tcrab 🦞\n"]; + let differ = Differ::new(); + let result = differ.compare(&first_text, &second_text).join(""); + assert!(result.contains("🦀")); + assert!(result.contains("🦞")); + assert!(result.contains("? ")); + assert!(result.contains("^")); +} + #[test] fn test_unified_diff() { let first_text = "one two three four".split(" ").collect::>();