11//! This module contains the Smith-Waterman algorithm implementation for local sequence alignment.
2- //!
2+ //!
33//! The Smith-Waterman algorithm is a dynamic programming algorithm used for determining
44//! similar regions between two sequences (nucleotide or protein sequences). It is particularly
55//! useful in bioinformatics for identifying optimal local alignments.
66//!
77//! # Algorithm Overview
8- //!
8+ //!
99//! The algorithm works by:
1010//! 1. Creating a scoring matrix where each cell represents the maximum alignment score
1111//! ending at that position
1414//! 4. Tracing back from the highest scoring position to reconstruct the alignment
1515//!
1616//! # Time Complexity
17- //!
17+ //!
1818//! O(m * n) where m and n are the lengths of the two sequences
1919//!
2020//! # Space Complexity
21- //!
21+ //!
2222//! O(m * n) for the scoring matrix
2323//!
2424//! # References
25- //!
25+ //!
2626//! - [Smith, T.F., Waterman, M.S. (1981). "Identification of Common Molecular Subsequences"](https://doi.org/10.1016/0022-2836(81)90087-5)
2727//! - [Wikipedia: Smith-Waterman algorithm](https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm)
2828
@@ -46,7 +46,7 @@ use std::cmp::max;
4646///
4747/// ```
4848/// use the_algorithms_rust::dynamic_programming::score_function;
49- ///
49+ ///
5050/// let score = score_function('A', 'A', 1, -1, -2);
5151/// assert_eq!(score, 1); // Match
5252///
@@ -93,7 +93,7 @@ pub fn score_function(
9393///
9494/// ```
9595/// use the_algorithms_rust::dynamic_programming::smith_waterman;
96- ///
96+ ///
9797/// let score_matrix = smith_waterman("ACAC", "CA", 1, -1, -2);
9898/// assert_eq!(score_matrix.len(), 5); // query length + 1
9999/// assert_eq!(score_matrix[0].len(), 3); // subject length + 1
@@ -107,37 +107,36 @@ pub fn smith_waterman(
107107) -> Vec < Vec < i32 > > {
108108 let query_upper: Vec < char > = query. to_uppercase ( ) . chars ( ) . collect ( ) ;
109109 let subject_upper: Vec < char > = subject. to_uppercase ( ) . chars ( ) . collect ( ) ;
110-
110+
111111 let m = query_upper. len ( ) ;
112112 let n = subject_upper. len ( ) ;
113-
113+
114114 // Initialize scoring matrix with zeros
115115 let mut score = vec ! [ vec![ 0 ; n + 1 ] ; m + 1 ] ;
116-
116+
117117 // Fill the scoring matrix using dynamic programming
118118 for i in 1 ..=m {
119119 for j in 1 ..=n {
120120 // Calculate score for match/mismatch
121- let match_or_mismatch = score[ i - 1 ] [ j - 1 ]
122- + score_function (
123- query_upper[ i - 1 ] ,
124- subject_upper[ j - 1 ] ,
125- match_score,
126- mismatch_score,
127- gap_score,
128- ) ;
129-
121+ let match_or_mismatch = score[ i - 1 ] [ j - 1 ] + score_function (
122+ query_upper[ i - 1 ] ,
123+ subject_upper[ j - 1 ] ,
124+ match_score,
125+ mismatch_score,
126+ gap_score,
127+ ) ;
128+
130129 // Calculate score for deletion (gap in subject)
131130 let delete = score[ i - 1 ] [ j] + gap_score;
132-
131+
133132 // Calculate score for insertion (gap in query)
134133 let insert = score[ i] [ j - 1 ] + gap_score;
135-
134+
136135 // Take maximum of all options, but never go below 0 (local alignment)
137136 score[ i] [ j] = max ( 0 , max ( match_or_mismatch, max ( delete, insert) ) ) ;
138137 }
139138 }
140-
139+
141140 score
142141}
143142
@@ -152,6 +151,9 @@ pub fn smith_waterman(
152151/// * `score` - The score matrix from the Smith-Waterman algorithm
153152/// * `query` - Original query sequence used in alignment
154153/// * `subject` - Original subject sequence used in alignment
154+ /// * `match_score` - Score for matching characters (should match smith_waterman call)
155+ /// * `mismatch_score` - Score for mismatching characters (should match smith_waterman call)
156+ /// * `gap_score` - Penalty for gaps (should match smith_waterman call)
155157///
156158/// # Returns
157159///
@@ -162,19 +164,26 @@ pub fn smith_waterman(
162164///
163165/// ```
164166/// use the_algorithms_rust::dynamic_programming::{smith_waterman, traceback};
165- ///
167+ ///
166168/// let score_matrix = smith_waterman("ACAC", "CA", 1, -1, -2);
167- /// let alignment = traceback(&score_matrix, "ACAC", "CA");
169+ /// let alignment = traceback(&score_matrix, "ACAC", "CA", 1, -1, -2 );
168170/// assert_eq!(alignment, "CA\nCA");
169171/// ```
170- pub fn traceback ( score : & [ Vec < i32 > ] , query : & str , subject : & str ) -> String {
172+ pub fn traceback (
173+ score : & [ Vec < i32 > ] ,
174+ query : & str ,
175+ subject : & str ,
176+ match_score : i32 ,
177+ mismatch_score : i32 ,
178+ gap_score : i32 ,
179+ ) -> String {
171180 let query_upper: Vec < char > = query. to_uppercase ( ) . chars ( ) . collect ( ) ;
172181 let subject_upper: Vec < char > = subject. to_uppercase ( ) . chars ( ) . collect ( ) ;
173-
182+
174183 // Find the cell with maximum score
175- let mut max_value = i32 :: MIN ;
184+ let mut max_value = 0 ;
176185 let ( mut i_max, mut j_max) = ( 0 , 0 ) ;
177-
186+
178187 for ( i, row) in score. iter ( ) . enumerate ( ) {
179188 for ( j, & value) in row. iter ( ) . enumerate ( ) {
180189 if value > max_value {
@@ -184,46 +193,57 @@ pub fn traceback(score: &[Vec<i32>], query: &str, subject: &str) -> String {
184193 }
185194 }
186195 }
187-
196+
188197 // If no significant alignment found, return empty string
189- if i_max == 0 || j_max = = 0 {
198+ if max_value < = 0 {
190199 return String :: new ( ) ;
191200 }
192-
201+
193202 // Traceback from the maximum scoring cell
194203 let ( mut i, mut j) = ( i_max, j_max) ;
195- let mut align1 = String :: new ( ) ;
196- let mut align2 = String :: new ( ) ;
197-
204+ let mut align1 = Vec :: new ( ) ;
205+ let mut align2 = Vec :: new ( ) ;
206+
198207 // Continue tracing back until we hit a cell with score 0
199208 while i > 0 && j > 0 && score[ i] [ j] > 0 {
200209 let current_score = score[ i] [ j] ;
201-
210+
202211 // Check if we came from diagonal (match/mismatch)
203- if current_score
204- == score[ i - 1 ] [ j - 1 ]
205- + score_function ( query_upper[ i - 1 ] , subject_upper[ j - 1 ] , 1 , -1 , -2 )
206- {
207- align1. insert ( 0 , query_upper[ i - 1 ] ) ;
208- align2. insert ( 0 , subject_upper[ j - 1 ] ) ;
212+ if current_score == score[ i - 1 ] [ j - 1 ] + score_function (
213+ query_upper[ i - 1 ] ,
214+ subject_upper[ j - 1 ] ,
215+ match_score,
216+ mismatch_score,
217+ gap_score,
218+ ) {
219+ align1. push ( query_upper[ i - 1 ] ) ;
220+ align2. push ( subject_upper[ j - 1 ] ) ;
209221 i -= 1 ;
210222 j -= 1 ;
211- }
223+ }
212224 // Check if we came from above (deletion/gap in subject)
213- else if current_score == score[ i - 1 ] [ j] - 2 {
214- align1. insert ( 0 , query_upper[ i - 1 ] ) ;
215- align2. insert ( 0 , '-' ) ;
225+ else if current_score == score[ i - 1 ] [ j] + gap_score {
226+ align1. push ( query_upper[ i - 1 ] ) ;
227+ align2. push ( '-' ) ;
216228 i -= 1 ;
217- }
229+ }
218230 // Otherwise we came from left (insertion/gap in query)
219231 else {
220- align1. insert ( 0 , '-' ) ;
221- align2. insert ( 0 , subject_upper[ j - 1 ] ) ;
232+ align1. push ( '-' ) ;
233+ align2. push ( subject_upper[ j - 1 ] ) ;
222234 j -= 1 ;
223235 }
224236 }
225-
226- format ! ( "{align1}\n {align2}" )
237+
238+ // Reverse the sequences (we built them backwards)
239+ align1. reverse ( ) ;
240+ align2. reverse ( ) ;
241+
242+ format ! (
243+ "{}\n {}" ,
244+ align1. into_iter( ) . collect:: <String >( ) ,
245+ align2. into_iter( ) . collect:: <String >( )
246+ )
227247}
228248
229249#[ cfg( test) ]
@@ -247,8 +267,8 @@ mod tests {
247267 $(
248268 #[ test]
249269 fn $name( ) {
250- let ( score, query, subject, expected) = $test_cases;
251- assert_eq!( traceback( & score, query, subject) , expected) ;
270+ let ( score, query, subject, match_score , mismatch_score , gap_score , expected) = $test_cases;
271+ assert_eq!( traceback( & score, query, subject, match_score , mismatch_score , gap_score ) , expected) ;
252272 }
253273 ) *
254274 }
@@ -300,6 +320,7 @@ mod tests {
300320 ] ,
301321 "ACAC" ,
302322 "CA" ,
323+ 1 , -1 , -2 ,
303324 "CA\n CA" ,
304325 ) ,
305326 test_traceback_agt_agt: (
@@ -311,9 +332,23 @@ mod tests {
311332 ] ,
312333 "AGT" ,
313334 "AGT" ,
335+ 1 , -1 , -2 ,
314336 "AGT\n AGT" ,
315337 ) ,
316- test_traceback_empty: ( vec![ vec![ 0 , 0 , 0 ] ] , "ACAC" , "" , "" ) ,
338+ test_traceback_empty: ( vec![ vec![ 0 , 0 , 0 ] ] , "ACAC" , "" , 1 , -1 , -2 , "" ) ,
339+ test_traceback_custom_scoring: (
340+ vec![
341+ vec![ 0 , 0 , 0 ] ,
342+ vec![ 0 , 0 , 2 ] ,
343+ vec![ 0 , 2 , 0 ] ,
344+ vec![ 0 , 0 , 4 ] ,
345+ vec![ 0 , 2 , 0 ] ,
346+ ] ,
347+ "ACAC" ,
348+ "CA" ,
349+ 2 , -2 , -3 ,
350+ "CA\n CA" ,
351+ ) ,
317352 }
318353
319354 #[ test]
@@ -339,8 +374,43 @@ mod tests {
339374 let result1 = smith_waterman ( "acac" , "CA" , 1 , -1 , -2 ) ;
340375 let result2 = smith_waterman ( "ACAC" , "ca" , 1 , -1 , -2 ) ;
341376 let result3 = smith_waterman ( "AcAc" , "Ca" , 1 , -1 , -2 ) ;
342-
377+
343378 assert_eq ! ( result1, result2) ;
344379 assert_eq ! ( result2, result3) ;
345380 }
381+
382+ #[ test]
383+ fn test_custom_scoring_end_to_end ( ) {
384+ // Test with custom scoring parameters (match=2, mismatch=-2, gap=-3)
385+ let query = "ACGT" ;
386+ let subject = "ACGT" ;
387+ let match_score = 2 ;
388+ let mismatch_score = -2 ;
389+ let gap_score = -3 ;
390+
391+ // Generate score matrix with custom parameters
392+ let score_matrix = smith_waterman ( query, subject, match_score, mismatch_score, gap_score) ;
393+
394+ // Traceback using the same custom parameters
395+ let alignment = traceback ( & score_matrix, query, subject, match_score, mismatch_score, gap_score) ;
396+
397+ // With perfect match and match_score=2, we expect alignment "ACGT\nACGT"
398+ assert_eq ! ( alignment, "ACGT\n ACGT" ) ;
399+
400+ // Verify the score is correct (4 matches × 2 = 8)
401+ assert_eq ! ( score_matrix[ 4 ] [ 4 ] , 8 ) ;
402+ }
403+
404+ #[ test]
405+ fn test_alignment_at_boundary ( ) {
406+ // Test case where optimal alignment might be at row/column 0
407+ let query = "A" ;
408+ let subject = "A" ;
409+ let score_matrix = smith_waterman ( query, subject, 1 , -1 , -2 ) ;
410+ let alignment = traceback ( & score_matrix, query, subject, 1 , -1 , -2 ) ;
411+
412+ // Should find the alignment even though it's near the boundary
413+ assert_eq ! ( alignment, "A\n A" ) ;
414+ assert_eq ! ( score_matrix[ 1 ] [ 1 ] , 1 ) ;
415+ }
346416}
0 commit comments