From 4c2fa6ea095435fa0195cd62cf5ded11da1bff3e Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 10:44:09 +0100 Subject: [PATCH 01/13] feat(items): create first version of closure and goto --- src/grammar/consts.rs | 7 ++ src/grammar/grammar.rs | 72 ++++------------- src/grammar/item.rs | 172 +++++++++++++++++++++++++++++++++++++++++ src/grammar/mod.rs | 5 +- 4 files changed, 196 insertions(+), 60 deletions(-) create mode 100644 src/grammar/consts.rs create mode 100644 src/grammar/item.rs diff --git a/src/grammar/consts.rs b/src/grammar/consts.rs new file mode 100644 index 0000000..12f7f93 --- /dev/null +++ b/src/grammar/consts.rs @@ -0,0 +1,7 @@ + +pub const EPSILON: char = 'ε'; +pub const STRING_END: char = '$'; + +// NOTE: this could be in conflict with the Terminal symbols, so +// it is MANDATORY that the Terminal doesn´t have dots in it! +pub const ITEM_SEP: char = '.'; \ No newline at end of file diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index cca1948..aba3c02 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -1,6 +1,7 @@ use std::collections::{BTreeSet, BTreeMap}; use crate::automata::DFA; +use crate::grammar::consts::{EPSILON, STRING_END}; pub type NonTerminal = usize; pub type Terminal = char; @@ -11,10 +12,10 @@ pub enum Letter { Terminal(Terminal), } -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord)] pub struct Production { - lhs: NonTerminal, - rhs: Vec, + pub lhs: NonTerminal, + pub rhs: Vec, } #[derive(Debug, PartialEq)] @@ -28,14 +29,12 @@ pub struct Grammar { nullable: Option>, } -const EPSILON: char = 'ε'; -const STRING_END: char = '$'; - -// NOTE: this could be in conflict with the Terminal symbols, so -// it is MANDATORY that the Terminal doesn´t have dots in it! -const ITEM_SEP: char = '.'; impl Grammar { + pub fn first_k(&self, letter: &Vec, look_ahead: usize) -> BTreeSet { + unimplemented!(); + } + pub fn first(&mut self, letter: &Letter) -> BTreeSet { if let None = self.nullable { self.nullable = Some(self.get_nullable()); @@ -92,6 +91,10 @@ impl Grammar { first } + pub fn follow_k(&self, letter: &NonTerminal, look_ahead: usize) -> BTreeSet { + unimplemented!(); + } + pub fn follow(&mut self, non_terminal: &NonTerminal) -> BTreeSet { if let None = self.nullable { self.nullable = Some(self.get_nullable()); @@ -381,7 +384,7 @@ impl Grammar { self.nullable = None; } - fn transitions_to_adj_list(&self) -> BTreeMap>> { + pub fn transitions_to_adj_list(&self) -> BTreeMap>> { let mut adj_list: BTreeMap>> = BTreeMap::new(); for production in self.productions.iter() { adj_list.entry(production.lhs) @@ -391,31 +394,6 @@ impl Grammar { adj_list } - - pub fn get_itemization(&self) -> Vec { - let mut itemized_transitions = vec![]; - for production in self.productions.iter() { - if production.rhs.len() == 1 && production.rhs[0] == Letter::Terminal(EPSILON) { - itemized_transitions.push(Production { - lhs: production.lhs, - rhs: vec![Letter::Terminal(ITEM_SEP)] - }); - continue; - } - - for i in 0..=production.rhs.len() { - let mut rhs = production.rhs.clone(); - - rhs.insert(i, Letter::Terminal(ITEM_SEP)); - itemized_transitions.push(Production { - lhs: production.lhs, - rhs: rhs - }); - } - } - - itemized_transitions - } } impl From<&DFA> for Grammar { @@ -625,28 +603,4 @@ mod test { assert_eq!(grammar, result); } - - #[test] - fn test_itemization() { - let grammar = get_test_grammar(); - - let items = grammar.get_itemization(); - - let result_productions = vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b'), Letter::Terminal(ITEM_SEP)] }, - - Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('c'), Letter::Terminal(ITEM_SEP)] }, - - Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP)] }, - ]; - - assert!(items.iter().all(|item| result_productions.contains(item))); - assert!(result_productions.iter().all(|item| items.contains(item))); - } } \ No newline at end of file diff --git a/src/grammar/item.rs b/src/grammar/item.rs new file mode 100644 index 0000000..b5d032e --- /dev/null +++ b/src/grammar/item.rs @@ -0,0 +1,172 @@ +use std::collections::{BTreeSet, BTreeMap}; + +use crate::grammar::grammar::{Letter, NonTerminal, Production}; +use crate::grammar::consts::{EPSILON, STRING_END, ITEM_SEP}; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Item { + production: Production, + look_ahead: String, + + /// number of the lookahead for the item + num: usize, +} + +impl Item { + pub fn get_itemization(productions: &Vec) -> Vec { + let mut itemized_transitions = vec![]; + for production in productions.iter() { + itemized_transitions.append(&mut Item::itemize(production)); + } + + itemized_transitions + } + + /// itemizes a single production + pub fn itemize(production: &Production) -> Vec { + let mut itemized_prod = vec![]; + + if production.rhs.len() == 1 && production.rhs[0] == Letter::Terminal(EPSILON) { + itemized_prod.push(Production { + lhs: production.lhs, + rhs: vec![Letter::Terminal(ITEM_SEP)] + }); + return itemized_prod; + } + + for i in 0..=production.rhs.len() { + let mut rhs = production.rhs.clone(); + + rhs.insert(i, Letter::Terminal(ITEM_SEP)); + itemized_prod.push(Production { + lhs: production.lhs, + rhs: rhs + }); + } + + itemized_prod + } + + fn add_initial_sep(productions: BTreeMap>>) + -> BTreeMap>> { + let mut result_prod = BTreeMap::new(); + for (non_terminal, set) in productions.into_iter() { + let mut letters = set.into_iter().collect::>>(); + letters.iter_mut().for_each(|letter| { + letter.insert(0, Letter::Terminal(ITEM_SEP)); + }); + + result_prod.insert(non_terminal, letters.into_iter().collect::>>()); + } + + result_prod + } + + /// return the closure of the set **productions** in the input + /// with the given look_ahead + /// production is the adjiacency list of all the productions + fn closure( + items: &BTreeSet, + productions: BTreeMap>>, + look_ahead: usize) -> BTreeSet + { + let mut closure_items = (*items).clone(); + let dot_production = Self::add_initial_sep(productions); + + items.iter().for_each(|item| { + let item_sep_pos = item.production.rhs.iter().position(|letter| *letter == Letter::Terminal(ITEM_SEP)); + if item_sep_pos.is_none() { + return; + } + + let item_sep_pos = item_sep_pos.unwrap(); + if item_sep_pos == item.production.rhs.len() - 1 { + return; + } + + let next_letter = &item.production.rhs[item_sep_pos + 1]; + if let Letter::NonTerminal(non_terminal) = next_letter { + let mut new_items = dot_production.get(non_terminal) + .unwrap() + .iter() + .map(|rhs| { + Item { + production: Production { + lhs: *non_terminal, + rhs: rhs.clone(), + }, + look_ahead: item.look_ahead.clone(), + num: look_ahead, + } + }).collect::>(); + + closure_items.append(&mut new_items); + } + }); + + closure_items + } + + fn goto(items: &BTreeSet) -> BTreeSet { + let mut goto_items: BTreeSet = BTreeSet::new(); + + items.iter().for_each(|item| { + let item_sep_pos = item.production.rhs.iter().position(|letter| *letter == Letter::Terminal(ITEM_SEP)); + if item_sep_pos.is_none() { + return; + } + + let item_sep_pos = item_sep_pos.unwrap(); + if item_sep_pos == item.production.rhs.len() - 1 { + return; + } + + let next_letter = &item.production.rhs[item_sep_pos + 1]; + if *next_letter == Letter::Terminal(STRING_END) { + return; + } + + goto_items.insert({ + let mut new_item = item.clone(); + new_item.production.rhs[item_sep_pos] = new_item.production.rhs[item_sep_pos + 1].clone(); + new_item.production.rhs[item_sep_pos + 1] = Letter::Terminal(ITEM_SEP); + + new_item + }); + }); + + goto_items + } +} + +#[cfg(tests)] +mod tests { + #[test] + fn test_itemization() { + let productions = vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + ]; + + let items = Item::get_itemization(&productions); + + let result_productions = vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b'), Letter::Terminal(ITEM_SEP)] }, + + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('c'), Letter::Terminal(ITEM_SEP)] }, + + Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP)] }, + ]; + + assert!(items.iter().all(|item| result_productions.contains(item))); + assert!(result_productions.iter().all(|item| items.contains(item))); + } +} \ No newline at end of file diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index 4b8b37a..c603449 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -1,3 +1,6 @@ mod grammar; +mod item; +pub mod consts; -pub use grammar::*; \ No newline at end of file +pub use grammar::*; +pub use item::*; From 32a73496c14cc1a85c64620a9ea0840b66e05c58 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 15:40:47 +0100 Subject: [PATCH 02/13] feat(item): create goto and closure functions --- src/grammar/item.rs | 155 +++++++++++++++++++++++++++++++------------- 1 file changed, 110 insertions(+), 45 deletions(-) diff --git a/src/grammar/item.rs b/src/grammar/item.rs index b5d032e..8bf8482 100644 --- a/src/grammar/item.rs +++ b/src/grammar/item.rs @@ -1,15 +1,14 @@ -use std::collections::{BTreeSet, BTreeMap}; +use std::collections::{BTreeSet, BTreeMap, VecDeque}; -use crate::grammar::grammar::{Letter, NonTerminal, Production}; -use crate::grammar::consts::{EPSILON, STRING_END, ITEM_SEP}; +use crate::grammar::grammar::{Grammar, Letter, Terminal, NonTerminal, Production}; +use crate::grammar::consts::{EPSILON, ITEM_SEP}; + +use super::consts::STRING_END; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Item { production: Production, - look_ahead: String, - - /// number of the lookahead for the item - num: usize, + look_ahead: Option, } impl Item { @@ -47,6 +46,7 @@ impl Item { itemized_prod } + /// adds a ITEM_SEP to the beginning of each production fn add_initial_sep(productions: BTreeMap>>) -> BTreeMap>> { let mut result_prod = BTreeMap::new(); @@ -65,49 +65,108 @@ impl Item { /// return the closure of the set **productions** in the input /// with the given look_ahead /// production is the adjiacency list of all the productions - fn closure( + pub fn closure( items: &BTreeSet, - productions: BTreeMap>>, - look_ahead: usize) -> BTreeSet - { + grammar: &mut Grammar, + ) -> BTreeSet { let mut closure_items = (*items).clone(); - let dot_production = Self::add_initial_sep(productions); - - items.iter().for_each(|item| { - let item_sep_pos = item.production.rhs.iter().position(|letter| *letter == Letter::Terminal(ITEM_SEP)); - if item_sep_pos.is_none() { - return; - } - - let item_sep_pos = item_sep_pos.unwrap(); - if item_sep_pos == item.production.rhs.len() - 1 { - return; - } - - let next_letter = &item.production.rhs[item_sep_pos + 1]; - if let Letter::NonTerminal(non_terminal) = next_letter { - let mut new_items = dot_production.get(non_terminal) - .unwrap() - .iter() - .map(|rhs| { - Item { - production: Production { - lhs: *non_terminal, - rhs: rhs.clone(), - }, - look_ahead: item.look_ahead.clone(), - num: look_ahead, + let mut used_non_term = vec![false; grammar.get_non_terminal().len()]; + let mut non_terminals = Self::compute_closure_queue(items, &mut used_non_term); + + let dot_production = Self::add_initial_sep(grammar.transitions_to_adj_list()); + + // apply the closure to all the non terminals in non_terminals + while let Some((non_terminal, letter_first)) = non_terminals.pop_front() { + dot_production.get(&non_terminal) + .unwrap() + .iter() + .for_each(|rhs| { + // with dot_production, the dot is always at 0, so the first letter is 1 + if rhs.len() >= 1 { + let letter = &rhs[1]; + + if let Letter::NonTerminal(non_term) = letter { + if !used_non_term[*non_term as usize] { + used_non_term[*non_term as usize] = true; + + if rhs.len() >= 2 { + non_terminals.push_back((*non_term, Some(rhs[2].clone()))); + } else { + non_terminals.push_back((*non_term, None)); + } + } } - }).collect::>(); - closure_items.append(&mut new_items); - } - }); + } + + let production = Production { + lhs: non_terminal, + rhs: rhs.clone(), + }; + + // Closure with first only if the precedente look_ahead has been set! + // this should save some computation time. + if let None = letter_first { + closure_items.insert(Item { + production: production, + look_ahead: None, + }); + return; + } + + let first = letter_first.as_ref().unwrap(); + let first_letter_set = grammar.first(first); + + first_letter_set.iter().for_each(|look_ahead| { + closure_items.insert(Item { + production: production.clone(), + look_ahead: Some(*look_ahead), + }); + }); + }); + } closure_items } - fn goto(items: &BTreeSet) -> BTreeSet { + /// queue of non_terminal to explore and next letter for first + fn compute_closure_queue(items: &BTreeSet, used_non_term: &mut Vec) + -> VecDeque<(NonTerminal, Option)> { + let mut non_terminals: VecDeque<(NonTerminal, Option)> = VecDeque::new(); + items.iter().for_each(|item| { + let item_sep_pos = item.production.rhs.iter() + .position(|letter| *letter == Letter::Terminal(ITEM_SEP)); + if item_sep_pos.is_none() { + return; + } + + let item_sep_pos = item_sep_pos.unwrap(); + if item_sep_pos == item.production.rhs.len() - 1 { + return; + } + + let next_letter = &item.production.rhs[item_sep_pos + 1]; + + if let Letter::NonTerminal(non_terminal) = next_letter { + if used_non_term[*non_terminal] { + return; + } + used_non_term[*non_terminal] = true; + + if let None = item.look_ahead { + non_terminals.push_back((*non_terminal, None)); + } else if item_sep_pos < item.production.rhs.len() - 2 { + non_terminals.push_back((*non_terminal, Some(item.production.rhs[item_sep_pos + 2].clone()))); + } else { + non_terminals.push_back((*non_terminal, Some(Letter::Terminal(STRING_END)))); + } + } + }); + + non_terminals + } + + pub fn goto(items: &BTreeSet, letter: &Letter) -> BTreeSet { let mut goto_items: BTreeSet = BTreeSet::new(); items.iter().for_each(|item| { @@ -121,8 +180,7 @@ impl Item { return; } - let next_letter = &item.production.rhs[item_sep_pos + 1]; - if *next_letter == Letter::Terminal(STRING_END) { + if &item.production.rhs[item_sep_pos + 1] != letter { return; } @@ -139,8 +197,10 @@ impl Item { } } -#[cfg(tests)] +#[cfg(test)] mod tests { + use super::*; + use crate::set; #[test] fn test_itemization() { let productions = vec![ @@ -169,4 +229,9 @@ mod tests { assert!(items.iter().all(|item| result_productions.contains(item))); assert!(result_productions.iter().all(|item| items.contains(item))); } + + #[test] + fn closure_0 () { + + } } \ No newline at end of file From af713a39a164eec1523d193dd5e1180f42acf79e Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 16:29:22 +0100 Subject: [PATCH 03/13] test(item): add closure_0 and 1 and goto tests --- src/grammar/grammar.rs | 36 +++++++--- src/grammar/item.rs | 149 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 174 insertions(+), 11 deletions(-) diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index aba3c02..40126ae 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -3,6 +3,8 @@ use std::collections::{BTreeSet, BTreeMap}; use crate::automata::DFA; use crate::grammar::consts::{EPSILON, STRING_END}; +use super::item::Item; + pub type NonTerminal = usize; pub type Terminal = char; @@ -31,8 +33,20 @@ pub struct Grammar { impl Grammar { - pub fn first_k(&self, letter: &Vec, look_ahead: usize) -> BTreeSet { - unimplemented!(); + pub fn new(start_symbol: NonTerminal, productions: Vec) -> Self { + Grammar { + start_symbol, + productions, + nullable: None, + } + } + + pub fn get_start_symbol(&self) -> NonTerminal { + self.start_symbol + } + + pub fn get_productions(&self) -> &Vec { + &self.productions } pub fn first(&mut self, letter: &Letter) -> BTreeSet { @@ -91,10 +105,6 @@ impl Grammar { first } - pub fn follow_k(&self, letter: &NonTerminal, look_ahead: usize) -> BTreeSet { - unimplemented!(); - } - pub fn follow(&mut self, non_terminal: &NonTerminal) -> BTreeSet { if let None = self.nullable { self.nullable = Some(self.get_nullable()); @@ -355,7 +365,7 @@ impl Grammar { }); // add corresponding productions - let mut adj_list = self.transitions_to_adj_list(); + let mut adj_list = self.productions_to_adj_list(); for unitary_couple in unitary_couples.iter() { if unitary_couple.0 == unitary_couple.1 { continue; @@ -384,7 +394,7 @@ impl Grammar { self.nullable = None; } - pub fn transitions_to_adj_list(&self) -> BTreeMap>> { + pub fn productions_to_adj_list(&self) -> BTreeMap>> { let mut adj_list: BTreeMap>> = BTreeMap::new(); for production in self.productions.iter() { adj_list.entry(production.lhs) @@ -394,6 +404,16 @@ impl Grammar { adj_list } + + pub fn add_fake_initial_state(&mut self) -> () { + let new_state = self.get_non_terminal().iter().max().unwrap() + 1; + self.productions.push(Production { + lhs: new_state, + rhs: vec![Letter::NonTerminal(self.start_symbol)] + }); + + self.start_symbol = new_state; + } } impl From<&DFA> for Grammar { diff --git a/src/grammar/item.rs b/src/grammar/item.rs index 8bf8482..25d3174 100644 --- a/src/grammar/item.rs +++ b/src/grammar/item.rs @@ -73,7 +73,7 @@ impl Item { let mut used_non_term = vec![false; grammar.get_non_terminal().len()]; let mut non_terminals = Self::compute_closure_queue(items, &mut used_non_term); - let dot_production = Self::add_initial_sep(grammar.transitions_to_adj_list()); + let dot_production = Self::add_initial_sep(grammar.productions_to_adj_list()); // apply the closure to all the non terminals in non_terminals while let Some((non_terminal, letter_first)) = non_terminals.pop_front() { @@ -82,14 +82,14 @@ impl Item { .iter() .for_each(|rhs| { // with dot_production, the dot is always at 0, so the first letter is 1 - if rhs.len() >= 1 { + if rhs.len() >= 2 { let letter = &rhs[1]; if let Letter::NonTerminal(non_term) = letter { if !used_non_term[*non_term as usize] { used_non_term[*non_term as usize] = true; - if rhs.len() >= 2 { + if rhs.len() >= 3 { non_terminals.push_back((*non_term, Some(rhs[2].clone()))); } else { non_terminals.push_back((*non_term, None)); @@ -232,6 +232,149 @@ mod tests { #[test] fn closure_0 () { + // S -> (S) + // S -> A + // A -> a + let mut grammar = Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a')] }, + ], + ); + grammar.add_fake_initial_state(); + + let mut start_item = set![Item { + production: Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: None, + }]; + let closure = Item::closure(&mut start_item, &mut grammar).into_iter() + .map(|item| item.production) + .collect::>(); + + let result = vec![ + Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('a')] }, + ]; + + assert!(closure.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| closure.contains(item))); + } + + #[test] + fn closure_1() { + // S -> CC + // C -> cC + // C -> d + + let mut grammar = Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('c'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('d')] }, + ], + ); + + grammar.add_fake_initial_state(); + + let mut start_item = set![Item { + production: Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: Some(STRING_END), + }]; + + let closure: Vec = Item::closure(&mut start_item, &mut grammar).into_iter().collect(); + + let result = vec![ + Item { + production: Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: Some(STRING_END), + }, + Item { + production: Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1), Letter::NonTerminal(1)] }, + look_ahead: Some(STRING_END), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c'), Letter::NonTerminal(1)] }, + look_ahead: Some('c'), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c'), Letter::NonTerminal(1)] }, + look_ahead: Some('d'), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('d')] }, + look_ahead: Some('c'), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('d')] }, + look_ahead: Some('d'), + }, + ]; + + assert!(closure.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| closure.contains(item))); + } + + #[test] + fn goto() { + // S -> (S) + // S -> () + let mut grammar = Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(')')] }, + ], + ); + grammar.add_fake_initial_state(); + + let mut start_item = set![Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: None, + }]; + + let closure = Item::closure(&mut start_item, &mut grammar); + let goto = Item::goto(&closure, &Letter::Terminal('(')).into_iter() + .map(|item| item.production) + .collect::>(); + + // result should be + // S -> (.S) + // S -> (.) + let result = vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::Terminal(')')] }, + ]; + + assert!(goto.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| goto.contains(item))); + + // SECOND PART OF TEST, APPLY CLOSURE TO GOTO'S OUTPUT + let mut goto_items = goto.into_iter() + .map(|item| Item { + production: item, + look_ahead: None, + }) + .collect::>(); + + let closure = Item::closure(&mut goto_items, &mut grammar) + .into_iter() + .map(|item| item.production) + .collect::>(); + + let result = vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('('), Letter::Terminal(')')] }, + ]; + + assert!(closure.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| closure.contains(item))); } } \ No newline at end of file From 63c66953bf4c617a38be99a3fe63e5627f7af006 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 20:52:42 +0100 Subject: [PATCH 04/13] fix: infinite loop in first --- src/grammar/grammar.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index 40126ae..f06670f 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -56,7 +56,8 @@ impl Grammar { match letter { Letter::NonTerminal(non_terminal) => { - let mut first = self._first(non_terminal); + let mut used = vec![false; self.get_non_terminal().len()]; + let mut first = self._first(non_terminal, &mut used); if self.nullable.as_ref().unwrap().contains(&non_terminal) { first.insert(EPSILON); @@ -72,7 +73,12 @@ impl Grammar { } } - fn _first(&self, non_terminal: &NonTerminal) -> BTreeSet { + fn _first(&self, non_terminal: &NonTerminal, used: &mut Vec) -> BTreeSet { + if used[*non_terminal] == true { + return BTreeSet::new(); + } + used[*non_terminal] = true; + let nullable = self.nullable.as_ref().unwrap(); let mut first = BTreeSet::new(); @@ -85,7 +91,7 @@ impl Grammar { // we can continue to add more only if previous symbols are nullable match letter { Letter::NonTerminal(idx) => { - first.append(&mut self._first(idx)); + first.append(&mut self._first(idx, used)); if !nullable.contains(idx) { break; } @@ -111,16 +117,16 @@ impl Grammar { } let num_non_terminal = self.get_non_terminal().len(); - let mut used = vec![0; num_non_terminal]; + let mut used = vec![false; num_non_terminal]; self._follow(non_terminal, &mut used) } - fn _follow(&self, non_terminal: &NonTerminal, used: &mut Vec) -> BTreeSet { - if used[*non_terminal] == 1 { + fn _follow(&self, non_terminal: &NonTerminal, used: &mut Vec) -> BTreeSet { + if used[*non_terminal] == true { return BTreeSet::new(); } - used[*non_terminal] = 1; + used[*non_terminal] = true; let nullable = self.nullable.as_ref().unwrap(); let mut follow = BTreeSet::new(); @@ -145,7 +151,8 @@ impl Grammar { let next_letter = &production.rhs[i + 1]; match next_letter { Letter::NonTerminal(idx) => { - follow.append(&mut self._first(idx)); + let mut first_used_table = vec![false; used.len()]; + follow.append(&mut self._first(idx, &mut first_used_table)); }, Letter::Terminal(ch) => { follow.insert(*ch); From d3cff7832365c7b17152e5a9f792b42efb5defc0 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 21:05:22 +0100 Subject: [PATCH 05/13] refactor: create semplification impl --- src/grammar/grammar.rs | 335 +----------------------- src/grammar/grammar/semplification.rs | 360 ++++++++++++++++++++++++++ 2 files changed, 362 insertions(+), 333 deletions(-) create mode 100644 src/grammar/grammar/semplification.rs diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index f06670f..9271d61 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -5,6 +5,8 @@ use crate::grammar::consts::{EPSILON, STRING_END}; use super::item::Item; +mod semplification; + pub type NonTerminal = usize; pub type Terminal = char; @@ -200,207 +202,6 @@ impl Grammar { non_terminals } - pub fn get_nullable(&self) -> BTreeSet { - let mut nullable = BTreeSet::new(); - let mut has_changed = true; - while has_changed { - has_changed = false; - for production in self.productions.iter() { - let mut is_nullable = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - if !nullable.contains(idx) { - is_nullable = false; - break; - } - } - Letter::Terminal(ch) => { - if *ch != EPSILON { - is_nullable = false; - break; - } - } - } - } - if is_nullable && !nullable.contains(&production.lhs) { - nullable.insert(production.lhs); - has_changed = true; - } - } - } - - nullable - } - - /// O(m^2) implementation of reachable function, could be optimized - /// but i need to store adjacency list of the graph in grammar, and the - /// use bfs. - pub fn get_reachable(&self) -> BTreeSet { - let mut reachable = BTreeSet::new(); - let mut has_changed = true; - reachable.insert(self.start_symbol); - while has_changed { - has_changed = false; - for production in self.productions.iter() { - if !reachable.contains(&production.lhs) { - continue; - } - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - if !reachable.contains(idx) { - reachable.insert(*idx); - has_changed = true; - } - } - Letter::Terminal(_) => {} - } - } - } - } - - reachable - } - - /// returns set of generator non terminals - /// a non terminal is a generator when it produces some finite - /// string of terminals - pub fn get_generators(&self) -> BTreeSet { - let mut generators = BTreeSet::new(); - for production in self.productions.iter() { - let mut is_generator = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(non_terminal) => { - if !generators.contains(non_terminal) { - is_generator = false; - break; - } - } - Letter::Terminal(_) => {} - } - } - if is_generator { - generators.insert(production.lhs); - } - } - - generators - } - - pub fn remove_useless(&mut self) -> () { - // first remove non generators - let generators = self.get_generators(); - - self.productions.retain(|production| { - generators.contains(&production.lhs) && production.rhs.iter().all(|letter| { - match letter { - Letter::NonTerminal(idx) => generators.contains(idx), - Letter::Terminal(_) => true - } - }) - }); - - // then remove non reachable - let reachable = self.get_reachable(); - - self.productions.retain(|production| { - reachable.contains(&production.lhs) && production.rhs.iter().all(|letter| { - match letter { - Letter::NonTerminal(idx) => reachable.contains(idx), - Letter::Terminal(_) => true - } - }) - }); - - // invalidate nullable - self.nullable = None; - } - - pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { - let non_terminals = self.get_non_terminal(); - let mut unitary_couples = BTreeSet::new(); - let mut has_changed = true; - - for non_terminal in non_terminals { - unitary_couples.insert((non_terminal, non_terminal)); - } - - while has_changed { - has_changed = false; - for production in self.productions.iter() { - if production.rhs.len() != 1 { - continue; - } - let mut to_insert = BTreeSet::new(); - for unitary_couple in unitary_couples.iter() { - if let Letter::NonTerminal(non_term) = production.rhs[0] { - if unitary_couple.1 == production.lhs && - !unitary_couples.contains(&(unitary_couple.0, non_term)) && - !to_insert.contains(&(unitary_couple.0, non_term)) { - to_insert.insert((unitary_couple.0, non_term)); - } - } - } - - if to_insert.len() > 0 { - unitary_couples.append(&mut to_insert); - has_changed = true; - } - } - } - - unitary_couples - } - - // TODO: this is a very complex function in this moment, it needs refactor - // it also has some points were it can be optimized - pub fn remove_unitary_cycles(&mut self) { - let unitary_couples = self.get_unitary_couples(); - - // remove all unitary couples - self.productions.retain(|production| { - if production.rhs.len() != 1 { - return true; - } - - match production.rhs[0] { - Letter::NonTerminal(non_term) => !unitary_couples.contains(&(production.lhs, non_term)), - Letter::Terminal(_) => true - } - }); - - // add corresponding productions - let mut adj_list = self.productions_to_adj_list(); - for unitary_couple in unitary_couples.iter() { - if unitary_couple.0 == unitary_couple.1 { - continue; - } - - let mut to_insert = adj_list.get(&unitary_couple.1).unwrap().clone(); - - adj_list.entry(unitary_couple.0) - .or_insert(BTreeSet::new()) - .append(&mut to_insert); - } - - // trasform adj list back to transitions - let mut new_transitions = vec![]; - for (non_terminal, transitions) in adj_list.iter() { - for transition in transitions.iter() { - new_transitions.push(Production { - lhs: *non_terminal, - rhs: transition.clone() - }); - } - } - self.productions = new_transitions; - - // invalidate nullable - self.nullable = None; - } - pub fn productions_to_adj_list(&self) -> BTreeMap>> { let mut adj_list: BTreeMap>> = BTreeMap::new(); for production in self.productions.iter() { @@ -456,7 +257,6 @@ impl From<&DFA> for Grammar { #[cfg(test)] mod test { use super::*; - use crate::map; fn get_test_grammar() -> Grammar { // S -> Ab | c @@ -485,7 +285,6 @@ mod test { assert!(first.contains(&'a')); assert!(first.contains(&'b')); assert!(first.contains(&'c')); - } #[test] @@ -500,134 +299,4 @@ mod test { assert_eq!(follow.len(), 1); assert!(follow.contains(&'b')); } - - #[test] - fn test_nullable() { - let grammar = get_test_grammar(); - - let nullable = grammar.get_nullable(); - assert_eq!(nullable.len(), 1); - assert!(nullable.contains(&1)); - } - - #[test] - fn test_dfa_conversion() { - // this dfa should recognize ba* - let dfa: DFA = DFA::from_state( - 3, - 0, - vec![1], - vec![ - map! { - 'a' => 2, - 'b' => 1 - }, - map! { - 'a' => 1, - 'b' => 2 - }, - map! { - 'a' => 2, - 'b' => 2 - }, - ], - None - ); - - let grammar = Grammar::from(&dfa); - - // FIXME: the order in the production matters, but it shouldn't be the case. - let result = Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, - ], - nullable: None, - }; - - assert_eq!(grammar, result); - } - - #[test] - fn test_remove_useless() { - let mut grammar = { - // S -> AB | a - // B -> b - - // S = 0 - // B = 1 - // A = 2 - - Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('b')] }, - ], - nullable: None, - } - }; - - grammar.remove_useless(); - - let result = Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - ], - nullable: None, - }; - - assert_eq!(grammar, result); - } - - #[test] - fn test_remove_unitary_cycles() { - // E -> E + T | T - // T -> T * F | F - // F -> (E) | a - - let mut grammar = Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, - ], - nullable: None, - }; - - let result = Grammar { - // E -> E + T | T * F | (E) | a - // T -> T * F | (E) | a - // F -> (E) | a - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, - ], - nullable: None, - }; - - grammar.remove_unitary_cycles(); - - assert_eq!(grammar, result); - } } \ No newline at end of file diff --git a/src/grammar/grammar/semplification.rs b/src/grammar/grammar/semplification.rs new file mode 100644 index 0000000..68209c6 --- /dev/null +++ b/src/grammar/grammar/semplification.rs @@ -0,0 +1,360 @@ +use std::collections::{BTreeSet}; + +use super::{Grammar, NonTerminal, Letter, EPSILON, Production}; + +impl Grammar { + pub fn get_nullable(&self) -> BTreeSet { + let mut nullable = BTreeSet::new(); + let mut has_changed = true; + while has_changed { + has_changed = false; + for production in self.productions.iter() { + let mut is_nullable = true; + for letter in production.rhs.iter() { + match letter { + Letter::NonTerminal(idx) => { + if !nullable.contains(idx) { + is_nullable = false; + break; + } + } + Letter::Terminal(ch) => { + if *ch != EPSILON { + is_nullable = false; + break; + } + } + } + } + if is_nullable && !nullable.contains(&production.lhs) { + nullable.insert(production.lhs); + has_changed = true; + } + } + } + + nullable + } + + /// O(m^2) implementation of reachable function, could be optimized + /// but i need to store adjacency list of the graph in grammar, and the + /// use bfs. + pub fn get_reachable(&self) -> BTreeSet { + let mut reachable = BTreeSet::new(); + let mut has_changed = true; + reachable.insert(self.start_symbol); + while has_changed { + has_changed = false; + for production in self.productions.iter() { + if !reachable.contains(&production.lhs) { + continue; + } + for letter in production.rhs.iter() { + match letter { + Letter::NonTerminal(idx) => { + if !reachable.contains(idx) { + reachable.insert(*idx); + has_changed = true; + } + } + Letter::Terminal(_) => {} + } + } + } + } + + reachable + } + + /// returns set of generator non terminals + /// a non terminal is a generator when it produces some finite + /// string of terminals + pub fn get_generators(&self) -> BTreeSet { + let mut generators = BTreeSet::new(); + for production in self.productions.iter() { + let mut is_generator = true; + for letter in production.rhs.iter() { + match letter { + Letter::NonTerminal(non_terminal) => { + if !generators.contains(non_terminal) { + is_generator = false; + break; + } + } + Letter::Terminal(_) => {} + } + } + if is_generator { + generators.insert(production.lhs); + } + } + + generators + } + + pub fn remove_useless(&mut self) -> () { + // first remove non generators + let generators = self.get_generators(); + + self.productions.retain(|production| { + generators.contains(&production.lhs) && production.rhs.iter().all(|letter| { + match letter { + Letter::NonTerminal(idx) => generators.contains(idx), + Letter::Terminal(_) => true + } + }) + }); + + // then remove non reachable + let reachable = self.get_reachable(); + + self.productions.retain(|production| { + reachable.contains(&production.lhs) && production.rhs.iter().all(|letter| { + match letter { + Letter::NonTerminal(idx) => reachable.contains(idx), + Letter::Terminal(_) => true + } + }) + }); + + // invalidate nullable + self.nullable = None; + } + + pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { + let non_terminals = self.get_non_terminal(); + let mut unitary_couples = BTreeSet::new(); + let mut has_changed = true; + + for non_terminal in non_terminals { + unitary_couples.insert((non_terminal, non_terminal)); + } + + while has_changed { + has_changed = false; + for production in self.productions.iter() { + if production.rhs.len() != 1 { + continue; + } + let mut to_insert = BTreeSet::new(); + for unitary_couple in unitary_couples.iter() { + if let Letter::NonTerminal(non_term) = production.rhs[0] { + if unitary_couple.1 == production.lhs && + !unitary_couples.contains(&(unitary_couple.0, non_term)) && + !to_insert.contains(&(unitary_couple.0, non_term)) { + to_insert.insert((unitary_couple.0, non_term)); + } + } + } + + if to_insert.len() > 0 { + unitary_couples.append(&mut to_insert); + has_changed = true; + } + } + } + + unitary_couples + } + + // TODO: this is a very complex function in this moment, it needs refactor + // it also has some points were it can be optimized + pub fn remove_unitary_cycles(&mut self) { + let unitary_couples = self.get_unitary_couples(); + + // remove all unitary couples + self.productions.retain(|production| { + if production.rhs.len() != 1 { + return true; + } + + match production.rhs[0] { + Letter::NonTerminal(non_term) => !unitary_couples.contains(&(production.lhs, non_term)), + Letter::Terminal(_) => true + } + }); + + // add corresponding productions + let mut adj_list = self.productions_to_adj_list(); + for unitary_couple in unitary_couples.iter() { + if unitary_couple.0 == unitary_couple.1 { + continue; + } + + let mut to_insert = adj_list.get(&unitary_couple.1).unwrap().clone(); + + adj_list.entry(unitary_couple.0) + .or_insert(BTreeSet::new()) + .append(&mut to_insert); + } + + // trasform adj list back to transitions + let mut new_transitions = vec![]; + for (non_terminal, transitions) in adj_list.iter() { + for transition in transitions.iter() { + new_transitions.push(Production { + lhs: *non_terminal, + rhs: transition.clone() + }); + } + } + self.productions = new_transitions; + + // invalidate nullable + self.nullable = None; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::map; + use crate::automata::DFA; + + fn get_test_grammar() -> Grammar { + // S -> Ab | c + // A -> aA | ε + // S = 0 + // A = 1 + Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + ], + nullable: None, + } + } + + #[test] + fn test_nullable() { + let grammar = get_test_grammar(); + + let nullable = grammar.get_nullable(); + assert_eq!(nullable.len(), 1); + assert!(nullable.contains(&1)); + } + + #[test] + fn test_dfa_conversion() { + // this dfa should recognize ba* + let dfa: DFA = DFA::from_state( + 3, + 0, + vec![1], + vec![ + map! { + 'a' => 2, + 'b' => 1 + }, + map! { + 'a' => 1, + 'b' => 2 + }, + map! { + 'a' => 2, + 'b' => 2 + }, + ], + None + ); + + let grammar = Grammar::from(&dfa); + + // FIXME: the order in the production matters, but it shouldn't be the case. + let result = Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + ], + nullable: None, + }; + + assert_eq!(grammar, result); + } + + #[test] + fn test_remove_useless() { + let mut grammar = { + // S -> AB | a + // B -> b + + // S = 0 + // B = 1 + // A = 2 + + Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::NonTerminal(2)] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('b')] }, + ], + nullable: None, + } + }; + + grammar.remove_useless(); + + let result = Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, + ], + nullable: None, + }; + + assert_eq!(grammar, result); + } + + #[test] + fn test_remove_unitary_cycles() { + // E -> E + T | T + // T -> T * F | F + // F -> (E) | a + + let mut grammar = Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, + Production { lhs: 1, rhs: vec![Letter::NonTerminal(2)] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, + ], + nullable: None, + }; + + let result = Grammar { + // E -> E + T | T * F | (E) | a + // T -> T * F | (E) | a + // F -> (E) | a + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, + Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a')] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, + ], + nullable: None, + }; + + grammar.remove_unitary_cycles(); + + assert_eq!(grammar, result); + } +} \ No newline at end of file From 3dab406637890ff2e7e2657a8f8c38236da09612 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 21:08:04 +0100 Subject: [PATCH 06/13] test(first): add first cycle check --- src/grammar/grammar.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index 9271d61..79782f5 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -287,6 +287,22 @@ mod test { assert!(first.contains(&'c')); } + #[test] + fn test_first_cycle() { + let mut grammar = Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::NonTerminal(0)] }, + ], + nullable: None, + }; + + let first = grammar.first(&Letter::NonTerminal(0)); + + assert_eq!(first.len(), 0); + } + #[test] fn test_follow() { let mut grammar = get_test_grammar(); From 86630e88f53b7c77c0f5bc1e9bc6c805d4117cf7 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 21:23:34 +0100 Subject: [PATCH 07/13] refactor: further simplyfy grammar file --- src/grammar/grammar.rs | 56 +++++++-- src/grammar/grammar/helper.rs | 138 ++++++++++++++++++++++ src/grammar/grammar/semplification.rs | 163 +------------------------- 3 files changed, 186 insertions(+), 171 deletions(-) create mode 100644 src/grammar/grammar/helper.rs diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index 79782f5..65dc381 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -5,6 +5,7 @@ use crate::grammar::consts::{EPSILON, STRING_END}; use super::item::Item; +mod helper; mod semplification; pub type NonTerminal = usize; @@ -193,15 +194,6 @@ impl Grammar { follow } - pub fn get_non_terminal(&self) -> BTreeSet { - let mut non_terminals = BTreeSet::new(); - for production in self.productions.iter() { - non_terminals.insert(production.lhs); - } - - non_terminals - } - pub fn productions_to_adj_list(&self) -> BTreeMap>> { let mut adj_list: BTreeMap>> = BTreeMap::new(); for production in self.productions.iter() { @@ -257,6 +249,8 @@ impl From<&DFA> for Grammar { #[cfg(test)] mod test { use super::*; + use crate::map; + use crate::automata::DFA; fn get_test_grammar() -> Grammar { // S -> Ab | c @@ -315,4 +309,48 @@ mod test { assert_eq!(follow.len(), 1); assert!(follow.contains(&'b')); } + + #[test] + fn test_dfa_conversion() { + // this dfa should recognize ba* + let dfa: DFA = DFA::from_state( + 3, + 0, + vec![1], + vec![ + map! { + 'a' => 2, + 'b' => 1 + }, + map! { + 'a' => 1, + 'b' => 2 + }, + map! { + 'a' => 2, + 'b' => 2 + }, + ], + None + ); + + let grammar = Grammar::from(&dfa); + + // FIXME: the order in the production matters, but it shouldn't be the case. + let result = Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, + Production { lhs: 2, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + ], + nullable: None, + }; + + assert_eq!(grammar, result); + } } \ No newline at end of file diff --git a/src/grammar/grammar/helper.rs b/src/grammar/grammar/helper.rs new file mode 100644 index 0000000..0a1bc9f --- /dev/null +++ b/src/grammar/grammar/helper.rs @@ -0,0 +1,138 @@ +/// This file contains some general helper functions used +/// To implement grammar semplification and first and follows +use std::collections::{BTreeSet}; + +use super::{Grammar, NonTerminal, Letter, EPSILON}; + +impl Grammar { + pub fn get_non_terminal(&self) -> BTreeSet { + let mut non_terminals = BTreeSet::new(); + for production in self.productions.iter() { + non_terminals.insert(production.lhs); + } + + non_terminals + } + + pub fn get_nullable(&self) -> BTreeSet { + let mut nullable = BTreeSet::new(); + let mut has_changed = true; + while has_changed { + has_changed = false; + for production in self.productions.iter() { + let mut is_nullable = true; + for letter in production.rhs.iter() { + match letter { + Letter::NonTerminal(idx) => { + if !nullable.contains(idx) { + is_nullable = false; + break; + } + } + Letter::Terminal(ch) => { + if *ch != EPSILON { + is_nullable = false; + break; + } + } + } + } + if is_nullable && !nullable.contains(&production.lhs) { + nullable.insert(production.lhs); + has_changed = true; + } + } + } + + nullable + } + + /// O(m^2) implementation of reachable function, could be optimized + /// but i need to store adjacency list of the graph in grammar, and the + /// use bfs. + pub fn get_reachable(&self) -> BTreeSet { + let mut reachable = BTreeSet::new(); + let mut has_changed = true; + reachable.insert(self.start_symbol); + while has_changed { + has_changed = false; + for production in self.productions.iter() { + if !reachable.contains(&production.lhs) { + continue; + } + for letter in production.rhs.iter() { + match letter { + Letter::NonTerminal(idx) => { + if !reachable.contains(idx) { + reachable.insert(*idx); + has_changed = true; + } + } + Letter::Terminal(_) => {} + } + } + } + } + + reachable + } + + /// returns set of generator non terminals + /// a non terminal is a generator when it produces some finite + /// string of terminals + /// This is still O(m^2) implementation, could be optimized + pub fn get_generators(&self) -> BTreeSet { + let mut generators = BTreeSet::new(); + for production in self.productions.iter() { + let mut is_generator = true; + for letter in production.rhs.iter() { + match letter { + Letter::NonTerminal(non_terminal) => { + if !generators.contains(non_terminal) { + is_generator = false; + break; + } + } + Letter::Terminal(_) => {} + } + } + if is_generator { + generators.insert(production.lhs); + } + } + + generators + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::grammar::{Production}; + + fn get_test_grammar() -> Grammar { + // S -> Ab | c + // A -> aA | ε + // S = 0 + // A = 1 + Grammar { + start_symbol: 0, + productions: vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + ], + nullable: None, + } + } + + #[test] + fn test_nullable() { + let grammar = get_test_grammar(); + + let nullable = grammar.get_nullable(); + assert_eq!(nullable.len(), 1); + assert!(nullable.contains(&1)); + } +} \ No newline at end of file diff --git a/src/grammar/grammar/semplification.rs b/src/grammar/grammar/semplification.rs index 68209c6..1659bd6 100644 --- a/src/grammar/grammar/semplification.rs +++ b/src/grammar/grammar/semplification.rs @@ -1,97 +1,8 @@ use std::collections::{BTreeSet}; -use super::{Grammar, NonTerminal, Letter, EPSILON, Production}; +use super::{Grammar, NonTerminal, Letter, Production}; impl Grammar { - pub fn get_nullable(&self) -> BTreeSet { - let mut nullable = BTreeSet::new(); - let mut has_changed = true; - while has_changed { - has_changed = false; - for production in self.productions.iter() { - let mut is_nullable = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - if !nullable.contains(idx) { - is_nullable = false; - break; - } - } - Letter::Terminal(ch) => { - if *ch != EPSILON { - is_nullable = false; - break; - } - } - } - } - if is_nullable && !nullable.contains(&production.lhs) { - nullable.insert(production.lhs); - has_changed = true; - } - } - } - - nullable - } - - /// O(m^2) implementation of reachable function, could be optimized - /// but i need to store adjacency list of the graph in grammar, and the - /// use bfs. - pub fn get_reachable(&self) -> BTreeSet { - let mut reachable = BTreeSet::new(); - let mut has_changed = true; - reachable.insert(self.start_symbol); - while has_changed { - has_changed = false; - for production in self.productions.iter() { - if !reachable.contains(&production.lhs) { - continue; - } - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - if !reachable.contains(idx) { - reachable.insert(*idx); - has_changed = true; - } - } - Letter::Terminal(_) => {} - } - } - } - } - - reachable - } - - /// returns set of generator non terminals - /// a non terminal is a generator when it produces some finite - /// string of terminals - pub fn get_generators(&self) -> BTreeSet { - let mut generators = BTreeSet::new(); - for production in self.productions.iter() { - let mut is_generator = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(non_terminal) => { - if !generators.contains(non_terminal) { - is_generator = false; - break; - } - } - Letter::Terminal(_) => {} - } - } - if is_generator { - generators.insert(production.lhs); - } - } - - generators - } - pub fn remove_useless(&mut self) -> () { // first remove non generators let generators = self.get_generators(); @@ -208,78 +119,6 @@ impl Grammar { #[cfg(test)] mod tests { use super::*; - use crate::map; - use crate::automata::DFA; - - fn get_test_grammar() -> Grammar { - // S -> Ab | c - // A -> aA | ε - // S = 0 - // A = 1 - Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, - ], - nullable: None, - } - } - - #[test] - fn test_nullable() { - let grammar = get_test_grammar(); - - let nullable = grammar.get_nullable(); - assert_eq!(nullable.len(), 1); - assert!(nullable.contains(&1)); - } - - #[test] - fn test_dfa_conversion() { - // this dfa should recognize ba* - let dfa: DFA = DFA::from_state( - 3, - 0, - vec![1], - vec![ - map! { - 'a' => 2, - 'b' => 1 - }, - map! { - 'a' => 1, - 'b' => 2 - }, - map! { - 'a' => 2, - 'b' => 2 - }, - ], - None - ); - - let grammar = Grammar::from(&dfa); - - // FIXME: the order in the production matters, but it shouldn't be the case. - let result = Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, - ], - nullable: None, - }; - - assert_eq!(grammar, result); - } #[test] fn test_remove_useless() { From 4848a4f76beea9323b6c4103b2dba53dc556f477 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 21:28:46 +0100 Subject: [PATCH 08/13] fix(get_generators): generators didn't find all generators --- src/grammar/grammar/helper.rs | 40 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/grammar/grammar/helper.rs b/src/grammar/grammar/helper.rs index 0a1bc9f..68d7496 100644 --- a/src/grammar/grammar/helper.rs +++ b/src/grammar/grammar/helper.rs @@ -56,9 +56,9 @@ impl Grammar { reachable.insert(self.start_symbol); while has_changed { has_changed = false; - for production in self.productions.iter() { + self.productions.iter().for_each(|production| -> () { if !reachable.contains(&production.lhs) { - continue; + return; } for letter in production.rhs.iter() { match letter { @@ -71,7 +71,7 @@ impl Grammar { Letter::Terminal(_) => {} } } - } + }); } reachable @@ -83,22 +83,30 @@ impl Grammar { /// This is still O(m^2) implementation, could be optimized pub fn get_generators(&self) -> BTreeSet { let mut generators = BTreeSet::new(); - for production in self.productions.iter() { - let mut is_generator = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(non_terminal) => { - if !generators.contains(non_terminal) { - is_generator = false; - break; + let mut has_changed = true; + + while has_changed { + has_changed = false; + + self.productions.iter().for_each(|production| -> () { + let mut is_generator = true; + production.rhs.iter().for_each(|letter| -> () { + match letter { + Letter::NonTerminal(non_terminal) => { + if !generators.contains(non_terminal) { + is_generator = false; + return; + } } + Letter::Terminal(_) => {} } - Letter::Terminal(_) => {} + }); + + if is_generator { + generators.insert(production.lhs); + has_changed = true; } - } - if is_generator { - generators.insert(production.lhs); - } + }); } generators From b6708cd12555588d9d24d106197a1eb1b2e00cc8 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Thu, 8 Dec 2022 21:30:48 +0100 Subject: [PATCH 09/13] refactor(grammar): move unitary couples in helpers --- src/grammar/grammar/helper.rs | 42 ++++++++++++++++++++++++++- src/grammar/grammar/semplification.rs | 38 +----------------------- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/grammar/grammar/helper.rs b/src/grammar/grammar/helper.rs index 68d7496..291ba8f 100644 --- a/src/grammar/grammar/helper.rs +++ b/src/grammar/grammar/helper.rs @@ -87,7 +87,7 @@ impl Grammar { while has_changed { has_changed = false; - + self.productions.iter().for_each(|production| -> () { let mut is_generator = true; production.rhs.iter().for_each(|letter| -> () { @@ -111,6 +111,46 @@ impl Grammar { generators } + + /// returns set of unitary couples of non terminals + /// a unitary couple is a couple of non terminals (A, B) such that + /// A -> B is a production in the grammar or A -> C, C -> B is a production + /// (aka it's transitive and reflexive) + pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { + let non_terminals = self.get_non_terminal(); + let mut unitary_couples = BTreeSet::new(); + let mut has_changed = true; + + for non_terminal in non_terminals { + unitary_couples.insert((non_terminal, non_terminal)); + } + + while has_changed { + has_changed = false; + for production in self.productions.iter() { + if production.rhs.len() != 1 { + continue; + } + let mut to_insert = BTreeSet::new(); + for unitary_couple in unitary_couples.iter() { + if let Letter::NonTerminal(non_term) = production.rhs[0] { + if unitary_couple.1 == production.lhs && + !unitary_couples.contains(&(unitary_couple.0, non_term)) && + !to_insert.contains(&(unitary_couple.0, non_term)) { + to_insert.insert((unitary_couple.0, non_term)); + } + } + } + + if to_insert.len() > 0 { + unitary_couples.append(&mut to_insert); + has_changed = true; + } + } + } + + unitary_couples + } } #[cfg(test)] diff --git a/src/grammar/grammar/semplification.rs b/src/grammar/grammar/semplification.rs index 1659bd6..2b2bb01 100644 --- a/src/grammar/grammar/semplification.rs +++ b/src/grammar/grammar/semplification.rs @@ -1,6 +1,6 @@ use std::collections::{BTreeSet}; -use super::{Grammar, NonTerminal, Letter, Production}; +use super::{Grammar, Letter, Production}; impl Grammar { pub fn remove_useless(&mut self) -> () { @@ -32,42 +32,6 @@ impl Grammar { self.nullable = None; } - pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { - let non_terminals = self.get_non_terminal(); - let mut unitary_couples = BTreeSet::new(); - let mut has_changed = true; - - for non_terminal in non_terminals { - unitary_couples.insert((non_terminal, non_terminal)); - } - - while has_changed { - has_changed = false; - for production in self.productions.iter() { - if production.rhs.len() != 1 { - continue; - } - let mut to_insert = BTreeSet::new(); - for unitary_couple in unitary_couples.iter() { - if let Letter::NonTerminal(non_term) = production.rhs[0] { - if unitary_couple.1 == production.lhs && - !unitary_couples.contains(&(unitary_couple.0, non_term)) && - !to_insert.contains(&(unitary_couple.0, non_term)) { - to_insert.insert((unitary_couple.0, non_term)); - } - } - } - - if to_insert.len() > 0 { - unitary_couples.append(&mut to_insert); - has_changed = true; - } - } - } - - unitary_couples - } - // TODO: this is a very complex function in this moment, it needs refactor // it also has some points were it can be optimized pub fn remove_unitary_cycles(&mut self) { From 631f5b52a799acd7365a4187b983bfed9204c9ec Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Sun, 11 Dec 2022 11:39:09 +0100 Subject: [PATCH 10/13] refactor(item): create easier way to get production vec --- src/grammar/consts.rs | 5 +++- src/grammar/grammar.rs | 28 +++++++----------- src/grammar/item.rs | 61 +++++++++++++++++++++++++++------------ src/grammar/mod.rs | 2 ++ src/grammar/production.rs | 37 ++++++++++++++++++++++++ 5 files changed, 96 insertions(+), 37 deletions(-) create mode 100644 src/grammar/production.rs diff --git a/src/grammar/consts.rs b/src/grammar/consts.rs index 12f7f93..274377a 100644 --- a/src/grammar/consts.rs +++ b/src/grammar/consts.rs @@ -4,4 +4,7 @@ pub const STRING_END: char = '$'; // NOTE: this could be in conflict with the Terminal symbols, so // it is MANDATORY that the Terminal doesn´t have dots in it! -pub const ITEM_SEP: char = '.'; \ No newline at end of file +pub const ITEM_SEP: char = '.'; + +pub type NonTerminal = usize; +pub type Terminal = char; diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index 65dc381..d6dc96f 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -1,28 +1,20 @@ use std::collections::{BTreeSet, BTreeMap}; use crate::automata::DFA; -use crate::grammar::consts::{EPSILON, STRING_END}; - -use super::item::Item; +use crate::grammar::{ + Production, + Letter +}; +use crate::grammar::consts::{ + EPSILON, + STRING_END, + Terminal, + NonTerminal, +}; mod helper; mod semplification; -pub type NonTerminal = usize; -pub type Terminal = char; - -#[derive(Debug, PartialEq, Clone, PartialOrd, Eq, Ord)] -pub enum Letter { - NonTerminal(NonTerminal), - Terminal(Terminal), -} - -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord)] -pub struct Production { - pub lhs: NonTerminal, - pub rhs: Vec, -} - #[derive(Debug, PartialEq)] pub struct Grammar { start_symbol: NonTerminal, diff --git a/src/grammar/item.rs b/src/grammar/item.rs index 25d3174..4a68806 100644 --- a/src/grammar/item.rs +++ b/src/grammar/item.rs @@ -1,7 +1,7 @@ use std::collections::{BTreeSet, BTreeMap, VecDeque}; -use crate::grammar::grammar::{Grammar, Letter, Terminal, NonTerminal, Production}; -use crate::grammar::consts::{EPSILON, ITEM_SEP}; +use crate::grammar::{Grammar, Letter, Production}; +use crate::grammar::consts::{EPSILON, ITEM_SEP, Terminal, NonTerminal}; use super::consts::STRING_END; @@ -71,32 +71,39 @@ impl Item { ) -> BTreeSet { let mut closure_items = (*items).clone(); let mut used_non_term = vec![false; grammar.get_non_terminal().len()]; - let mut non_terminals = Self::compute_closure_queue(items, &mut used_non_term); - let dot_production = Self::add_initial_sep(grammar.productions_to_adj_list()); - // apply the closure to all the non terminals in non_terminals - while let Some((non_terminal, letter_first)) = non_terminals.pop_front() { + // the non terminals to explore are seen as a BFS that expands to other non terminals + // when it sees an arc (e.g. a non terminal after a SEP). + let mut closure_queue = Self::compute_closure_queue(items, &mut used_non_term); + + // apply the closure to all the non terminals in closure_queue + while let Some((non_terminal, letter_first)) = closure_queue.pop_front() { dot_production.get(&non_terminal) .unwrap() .iter() .for_each(|rhs| { // with dot_production, the dot is always at 0, so the first letter is 1 - if rhs.len() >= 2 { - let letter = &rhs[1]; - - if let Letter::NonTerminal(non_term) = letter { - if !used_non_term[*non_term as usize] { - used_non_term[*non_term as usize] = true; - - if rhs.len() >= 3 { - non_terminals.push_back((*non_term, Some(rhs[2].clone()))); - } else { - non_terminals.push_back((*non_term, None)); + let (non_term_opt, look_ahead) = Self::get_next_closure_non_term(rhs); + + match non_term_opt { + Some(non_term) => { + if !used_non_term[non_term] { + used_non_term[non_term] = true; + + match look_ahead { + Some(_) => { + if let None = letter_first { + closure_queue.push_back((non_term, None)); + } else { + closure_queue.push_back((non_term, look_ahead)) + } + }, + None => closure_queue.push_back((non_term, None)), } } } - + None => {} } let production = Production { @@ -164,7 +171,25 @@ impl Item { }); non_terminals + } + + /// this function assumes the dot is at the beginning of the production + /// and returns the non terminal after the dot, with the look ahead letter, + /// if there is one. + fn get_next_closure_non_term(rhs: &Vec) -> (Option, Option) { + let first_non_term = Production::get_nth_if_non_terminal(rhs, 1); + let second_letter = Production::get_nth(rhs, 2); + + match first_non_term { + Some(non_term) => { + match second_letter { + Some(letter) => (Some(*non_term), Some(letter.clone())), + None => (Some(*non_term), None), + } + } + None => (None, None), } + } pub fn goto(items: &BTreeSet, letter: &Letter) -> BTreeSet { let mut goto_items: BTreeSet = BTreeSet::new(); diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index c603449..7ab5dc3 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -1,6 +1,8 @@ mod grammar; mod item; +mod production; pub mod consts; pub use grammar::*; pub use item::*; +pub use production::*; diff --git a/src/grammar/production.rs b/src/grammar/production.rs new file mode 100644 index 0000000..7937035 --- /dev/null +++ b/src/grammar/production.rs @@ -0,0 +1,37 @@ +use crate::grammar::consts::{ + Terminal, + NonTerminal, +}; + +#[derive(Debug, PartialEq, Clone, PartialOrd, Eq, Ord)] +pub enum Letter { + NonTerminal(NonTerminal), + Terminal(Terminal), +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord)] +pub struct Production { + pub lhs: NonTerminal, + pub rhs: Vec, +} + +impl Production { + pub fn get_nth_if_non_terminal(letters: &Vec, n: usize) -> Option<&NonTerminal> { + if n >= letters.len() { + return None; + } + + match &letters[n] { + Letter::NonTerminal(idx) => Some(idx), + _ => None, + } + } + + pub fn get_nth(letters: &Vec, n: usize) -> Option<&Letter> { + if n >= letters.len() { + return None; + } + + Some(&letters[n]) + } +} \ No newline at end of file From 134ff40f7f58b8b6a450b5b9d8df30cf5b7974f0 Mon Sep 17 00:00:00 2001 From: Angelo Huang <63465494+Flecart@users.noreply.github.com> Date: Wed, 14 Dec 2022 22:01:18 +0100 Subject: [PATCH 11/13] feat/grammar/first follow (#15) * feat(first_follow): first version * test(first_follow): move tests * test(first_follow): remove old implementation of first and follow * test(first_follow): fix tests and first bug --- src/grammar/first_follow.rs | 278 ++++++++++++++++++++++++++++++++++++ src/grammar/grammar.rs | 200 -------------------------- src/grammar/mod.rs | 6 +- 3 files changed, 282 insertions(+), 202 deletions(-) create mode 100644 src/grammar/first_follow.rs diff --git a/src/grammar/first_follow.rs b/src/grammar/first_follow.rs new file mode 100644 index 0000000..9352d30 --- /dev/null +++ b/src/grammar/first_follow.rs @@ -0,0 +1,278 @@ +use std::collections::BTreeSet; + +use crate::grammar::{ + Grammar, + Letter, + Production, + consts::{EPSILON, STRING_END, Terminal, NonTerminal}, +}; + +pub struct FirstFollow { + first_table: Option>>, + follow_table: Option>>, + nullable: Option>, + + num_non_terminal: usize, +} + +impl FirstFollow { + /// Create a new FirstFollow struct, it is important that + /// this is not public, as we want to only expose valid istantiations + /// of the first follow table. + fn new(non_non_terminals: usize) -> Self { + FirstFollow { + first_table: None, + follow_table: None, + nullable: None, + num_non_terminal: non_non_terminals, + } + } + + pub fn get_first(&self, letter: &Letter) -> BTreeSet { + match letter { + Letter::NonTerminal(idx) => self.first_table.as_ref().unwrap()[*idx].clone(), + Letter::Terminal(ch) => { + let mut set = BTreeSet::new(); + set.insert(*ch); + set + } + } + } + + pub fn get_follow(&self, non_terminal: NonTerminal) -> BTreeSet { + self.follow_table.as_ref().unwrap()[non_terminal].clone() + } + + fn compute_nullable(&mut self, grammar: &Grammar) { + let nullable = grammar.get_nullable(); + self.nullable = Some(vec![false; self.num_non_terminal]); + + nullable.iter().for_each(|non_terminal| -> () { + self.nullable.as_mut().unwrap()[*non_terminal] = true; + }); + } + + fn compute_first(&mut self, grammar: &Grammar) { + if let None = self.nullable { + self.compute_nullable(grammar); + } + + self.first_table = Some(vec![BTreeSet::new(); self.num_non_terminal]); + let productions = grammar.get_productions(); + let mut has_changed = true; + + while has_changed { + has_changed = false; + + productions.iter().for_each(|production| -> () { + has_changed |= self.update_first_table(production); + }); + } + + (0..self.num_non_terminal).for_each(|i| -> () { + if self.nullable.as_ref().unwrap()[i] { + self.first_table.as_mut().unwrap()[i].insert(EPSILON); + } + }); + } + + fn update_first_table(&mut self, production: &Production) -> bool { + let mut has_changed = false; + for letter in production.rhs.iter() { + match letter { + Letter::NonTerminal(idx) => { + let set_to_join = self.first_table.as_ref().unwrap()[*idx].clone(); + + has_changed |= Self::append_if_not_superset( + &mut self.first_table.as_mut().unwrap()[production.lhs], + set_to_join, + ); + if !self.nullable.as_ref().unwrap()[*idx] { + break; + } + } + Letter::Terminal(ch) => { + if *ch != EPSILON { + has_changed |= self.first_table.as_mut().unwrap()[production.lhs].insert(*ch); + break; + } + } + } + } + + has_changed + } + + fn compute_follow(&mut self, grammar: &Grammar) { + if let None = self.first_table { + self.compute_first(grammar); + } + self.follow_table = Some(vec![BTreeSet::new(); self.num_non_terminal]); + self.follow_table.as_mut().unwrap()[grammar.get_start_symbol()].insert(STRING_END); + + let productions = grammar.get_productions(); + let mut has_changed = true; + + while has_changed { + has_changed = false; + + productions.iter().for_each(|production| { + has_changed |= self.update_follow_table(production); + }); + } + } + + /// updates the follow table with the given production + /// returns true if the follow table has changed, false otherwise + fn update_follow_table(&mut self, production: &Production) -> bool { + let mut has_changed = false; + for (i, letter) in production.rhs.iter().enumerate() { + match letter { + Letter::NonTerminal(idx) => { + // if we are at the end of the production, then we need to add the follow of the lhs + if i == production.rhs.len() - 1 { + let to_join = self.follow_table.as_ref().unwrap()[production.lhs].clone(); + has_changed |= Self::append_if_not_superset( + &mut self.follow_table.as_mut().unwrap()[*idx], + to_join, + ); + } else { + // otherwise we need to add the first of the next symbol + let next_letter = &production.rhs[i + 1]; + match next_letter { + Letter::NonTerminal(next) => { + let to_join = self.first_table.as_ref().unwrap()[*next].clone(); + has_changed |= Self::append_if_not_superset( + &mut self.follow_table.as_mut().unwrap()[*idx], + to_join, + ); + }, + Letter::Terminal(ch) => { + self.follow_table.as_mut().unwrap()[*idx].insert(*ch); + } + } + + // if the whole next symbol is nullable, then we need to add the follow of the lhs + if self.is_nullable(&mut production.rhs[i+1..].iter()) { + let to_join = self.follow_table.as_ref().unwrap()[production.lhs].clone(); + has_changed |= Self::append_if_not_superset( + &mut self.follow_table.as_mut().unwrap()[*idx], + to_join, + ); + } + } + }, + Letter::Terminal(_) => {} + } + } + + has_changed + } + + fn append_if_not_superset(first_set: &mut BTreeSet, second_set: BTreeSet) -> bool { + if first_set.is_superset(&second_set) { + return false; + } + let mut mutable = second_set; + + first_set.append(&mut mutable); + + true + } + + /// checks if the rest of the iterator is all nullable. + /// assumes the nullable set has been initialized. + fn is_nullable<'a, T: Iterator>(&self, iter: &mut T) -> bool { + iter.all(|letter| -> bool { + match letter { + Letter::NonTerminal(idx) => self.nullable.as_ref().unwrap()[*idx], + Letter::Terminal(ch) => *ch == EPSILON, + } + }) + } +} + + +impl From<&Grammar> for FirstFollow { + fn from(grammar: &Grammar) -> Self { + let num_non_terminal = grammar.get_non_terminal().len(); + + let mut first_follow = FirstFollow::new(num_non_terminal); + + first_follow.compute_nullable(grammar); + first_follow.compute_first(grammar); + first_follow.compute_follow(grammar); + + first_follow + } +} + +#[cfg(test)] +mod test { + use super::*; + + fn get_test_grammar() -> Grammar { + // S -> Ab | c + // A -> aA | ε + // S = 0 + // A = 1 + Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + ] + ) + } + + #[test] + fn test_first() { + let grammar = get_test_grammar(); + + let first_follow = FirstFollow::from(&grammar); + let first = first_follow.get_first(&Letter::NonTerminal(0)); + + assert_eq!(first.len(), 3); + assert!(first.contains(&'a')); + assert!(first.contains(&'b')); + assert!(first.contains(&'c')); + + let first = first_follow.get_first(&Letter::NonTerminal(1)); + assert_eq!(first.len(), 2); + assert!(first.contains(&'a')); + assert!(first.contains(&EPSILON)); + } + + #[test] + fn test_first_cycle() { + let grammar = Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::NonTerminal(0)] }, + ], + ); + + + let first_follow = FirstFollow::from(&grammar); + let first = first_follow.get_first(&Letter::NonTerminal(0)); + + assert_eq!(first.len(), 0); + } + + #[test] + fn test_follow() { + let grammar = get_test_grammar(); + let first_follow = FirstFollow::from(&grammar); + + let follow = first_follow.get_follow(0); + assert_eq!(follow.len(), 1); + assert!(follow.contains(&STRING_END)); + + let follow = first_follow.get_follow(1); + assert_eq!(follow.len(), 1); + assert!(follow.contains(&'b')); + } +} \ No newline at end of file diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index d6dc96f..5ce37d5 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -44,148 +44,6 @@ impl Grammar { &self.productions } - pub fn first(&mut self, letter: &Letter) -> BTreeSet { - if let None = self.nullable { - self.nullable = Some(self.get_nullable()); - } - - match letter { - Letter::NonTerminal(non_terminal) => { - let mut used = vec![false; self.get_non_terminal().len()]; - let mut first = self._first(non_terminal, &mut used); - - if self.nullable.as_ref().unwrap().contains(&non_terminal) { - first.insert(EPSILON); - } - - first - } - Letter::Terminal(terminal) => { - let mut first = BTreeSet::new(); - first.insert(*terminal); - first - } - } - } - - fn _first(&self, non_terminal: &NonTerminal, used: &mut Vec) -> BTreeSet { - if used[*non_terminal] == true { - return BTreeSet::new(); - } - used[*non_terminal] = true; - - let nullable = self.nullable.as_ref().unwrap(); - let mut first = BTreeSet::new(); - - for production in self.productions.iter() { - if production.lhs != *non_terminal { - continue; - } - - for letter in production.rhs.iter() { - // we can continue to add more only if previous symbols are nullable - match letter { - Letter::NonTerminal(idx) => { - first.append(&mut self._first(idx, used)); - if !nullable.contains(idx) { - break; - } - }, - Letter::Terminal(ch) => { - // NOTE: i don't want to insert epsilons, because - // then i should remove them in upper level! - if *ch != EPSILON { - first.insert(*ch); - break; - } - } - } - } - } - - first - } - - pub fn follow(&mut self, non_terminal: &NonTerminal) -> BTreeSet { - if let None = self.nullable { - self.nullable = Some(self.get_nullable()); - } - - let num_non_terminal = self.get_non_terminal().len(); - let mut used = vec![false; num_non_terminal]; - - self._follow(non_terminal, &mut used) - } - - fn _follow(&self, non_terminal: &NonTerminal, used: &mut Vec) -> BTreeSet { - if used[*non_terminal] == true { - return BTreeSet::new(); - } - used[*non_terminal] = true; - - let nullable = self.nullable.as_ref().unwrap(); - let mut follow = BTreeSet::new(); - - if *non_terminal == self.start_symbol { - follow.insert(STRING_END); - } - - for production in self.productions.iter() { - for (i, letter) in production.rhs.iter().enumerate() { - match letter { - Letter::NonTerminal(idx) => { - if *idx != *non_terminal { - continue; - } - - // if we are at the end of the production, then we need to add the follow of the lhs - if i == production.rhs.len() - 1 { - follow.append(&mut self._follow(&production.lhs, used)); - } else { - // otherwise we need to add the first of the next symbol - let next_letter = &production.rhs[i + 1]; - match next_letter { - Letter::NonTerminal(idx) => { - let mut first_used_table = vec![false; used.len()]; - follow.append(&mut self._first(idx, &mut first_used_table)); - }, - Letter::Terminal(ch) => { - follow.insert(*ch); - } - } - - // if the whole next symbol is nullable, then we need to add the follow of the lhs - let mut is_nullable = true; - for letter in production.rhs[i + 1..].iter() { - match letter { - Letter::NonTerminal(idx) => { - if !nullable.contains(idx) { - is_nullable = false; - break; - } - }, - Letter::Terminal(ch) => { - if *ch != EPSILON { - is_nullable = false; - break; - } - } - } - } - - if is_nullable { - follow.append(&mut self._follow(&production.lhs, used)); - } - } - }, - Letter::Terminal(_) => {} - } - } - } - - follow - } - pub fn productions_to_adj_list(&self) -> BTreeMap>> { let mut adj_list: BTreeMap>> = BTreeMap::new(); for production in self.productions.iter() { @@ -244,64 +102,6 @@ mod test { use crate::map; use crate::automata::DFA; - fn get_test_grammar() -> Grammar { - // S -> Ab | c - // A -> aA | ε - // S = 0 - // A = 1 - Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, - ], - nullable: None, - } - } - - #[test] - fn test_first() { - let mut grammar = get_test_grammar(); - - let first = grammar.first(&Letter::NonTerminal(0)); - - assert_eq!(first.len(), 3); - assert!(first.contains(&'a')); - assert!(first.contains(&'b')); - assert!(first.contains(&'c')); - } - - #[test] - fn test_first_cycle() { - let mut grammar = Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(0)] }, - ], - nullable: None, - }; - - let first = grammar.first(&Letter::NonTerminal(0)); - - assert_eq!(first.len(), 0); - } - - #[test] - fn test_follow() { - let mut grammar = get_test_grammar(); - - let follow = grammar.follow(&0); - assert_eq!(follow.len(), 1); - assert!(follow.contains(&STRING_END)); - - let follow = grammar.follow(&1); - assert_eq!(follow.len(), 1); - assert!(follow.contains(&'b')); - } - #[test] fn test_dfa_conversion() { // this dfa should recognize ba* diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index 7ab5dc3..e9cb1bb 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -1,8 +1,10 @@ mod grammar; -mod item; +// mod item; mod production; +mod first_follow; pub mod consts; pub use grammar::*; -pub use item::*; +// pub use item::*; pub use production::*; +pub use first_follow::*; From 404450b3ab4a5df100dd65986464aa305835b2a1 Mon Sep 17 00:00:00 2001 From: Giovanni Spadaccini Date: Wed, 28 Dec 2022 16:01:12 +0100 Subject: [PATCH 12/13] first part of refactor --- src/grammar/first_follow.rs | 267 +++++++------------------- src/grammar/first_follow/first.rs | 58 ++++++ src/grammar/first_follow/follow.rs | 26 +++ src/grammar/grammar.rs | 89 +++++---- src/grammar/grammar/helper.rs | 80 ++++---- src/grammar/grammar/semplification.rs | 181 ++++++++++++----- src/grammar/mod.rs | 6 +- src/grammar/production.rs | 14 +- src/grammar/production/helper.rs | 81 ++++++++ src/main.rs | 8 +- 10 files changed, 483 insertions(+), 327 deletions(-) create mode 100644 src/grammar/first_follow/first.rs create mode 100644 src/grammar/first_follow/follow.rs create mode 100644 src/grammar/production/helper.rs diff --git a/src/grammar/first_follow.rs b/src/grammar/first_follow.rs index 9352d30..2fd1a03 100644 --- a/src/grammar/first_follow.rs +++ b/src/grammar/first_follow.rs @@ -1,183 +1,34 @@ use std::collections::BTreeSet; use crate::grammar::{ - Grammar, - Letter, - Production, - consts::{EPSILON, STRING_END, Terminal, NonTerminal}, + consts::{NonTerminal, Terminal, EPSILON}, + Grammar, Letter, Production, }; -pub struct FirstFollow { - first_table: Option>>, - follow_table: Option>>, - nullable: Option>, +use self::first::compute_first; +use crate::grammar::first_follow::follow::compute_follow; - num_non_terminal: usize, -} - -impl FirstFollow { - /// Create a new FirstFollow struct, it is important that - /// this is not public, as we want to only expose valid istantiations - /// of the first follow table. - fn new(non_non_terminals: usize) -> Self { - FirstFollow { - first_table: None, - follow_table: None, - nullable: None, - num_non_terminal: non_non_terminals, - } - } - - pub fn get_first(&self, letter: &Letter) -> BTreeSet { - match letter { - Letter::NonTerminal(idx) => self.first_table.as_ref().unwrap()[*idx].clone(), - Letter::Terminal(ch) => { - let mut set = BTreeSet::new(); - set.insert(*ch); - set - } - } - } - - pub fn get_follow(&self, non_terminal: NonTerminal) -> BTreeSet { - self.follow_table.as_ref().unwrap()[non_terminal].clone() - } - - fn compute_nullable(&mut self, grammar: &Grammar) { - let nullable = grammar.get_nullable(); - self.nullable = Some(vec![false; self.num_non_terminal]); - - nullable.iter().for_each(|non_terminal| -> () { - self.nullable.as_mut().unwrap()[*non_terminal] = true; - }); - } - - fn compute_first(&mut self, grammar: &Grammar) { - if let None = self.nullable { - self.compute_nullable(grammar); - } - - self.first_table = Some(vec![BTreeSet::new(); self.num_non_terminal]); - let productions = grammar.get_productions(); - let mut has_changed = true; - - while has_changed { - has_changed = false; - - productions.iter().for_each(|production| -> () { - has_changed |= self.update_first_table(production); - }); - } - - (0..self.num_non_terminal).for_each(|i| -> () { - if self.nullable.as_ref().unwrap()[i] { - self.first_table.as_mut().unwrap()[i].insert(EPSILON); - } - }); - } - - fn update_first_table(&mut self, production: &Production) -> bool { - let mut has_changed = false; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - let set_to_join = self.first_table.as_ref().unwrap()[*idx].clone(); - - has_changed |= Self::append_if_not_superset( - &mut self.first_table.as_mut().unwrap()[production.lhs], - set_to_join, - ); - if !self.nullable.as_ref().unwrap()[*idx] { - break; - } - } - Letter::Terminal(ch) => { - if *ch != EPSILON { - has_changed |= self.first_table.as_mut().unwrap()[production.lhs].insert(*ch); - break; - } - } - } - } - - has_changed - } +mod first; +mod follow; - fn compute_follow(&mut self, grammar: &Grammar) { - if let None = self.first_table { - self.compute_first(grammar); - } - self.follow_table = Some(vec![BTreeSet::new(); self.num_non_terminal]); - self.follow_table.as_mut().unwrap()[grammar.get_start_symbol()].insert(STRING_END); - - let productions = grammar.get_productions(); - let mut has_changed = true; - - while has_changed { - has_changed = false; - - productions.iter().for_each(|production| { - has_changed |= self.update_follow_table(production); - }); - } - } +pub type FirstTable = Vec>; +pub type FollowTable = Vec>; - /// updates the follow table with the given production - /// returns true if the follow table has changed, false otherwise - fn update_follow_table(&mut self, production: &Production) -> bool { - let mut has_changed = false; - for (i, letter) in production.rhs.iter().enumerate() { - match letter { - Letter::NonTerminal(idx) => { - // if we are at the end of the production, then we need to add the follow of the lhs - if i == production.rhs.len() - 1 { - let to_join = self.follow_table.as_ref().unwrap()[production.lhs].clone(); - has_changed |= Self::append_if_not_superset( - &mut self.follow_table.as_mut().unwrap()[*idx], - to_join, - ); - } else { - // otherwise we need to add the first of the next symbol - let next_letter = &production.rhs[i + 1]; - match next_letter { - Letter::NonTerminal(next) => { - let to_join = self.first_table.as_ref().unwrap()[*next].clone(); - has_changed |= Self::append_if_not_superset( - &mut self.follow_table.as_mut().unwrap()[*idx], - to_join, - ); - }, - Letter::Terminal(ch) => { - self.follow_table.as_mut().unwrap()[*idx].insert(*ch); - } - } +pub use first::get_first; - // if the whole next symbol is nullable, then we need to add the follow of the lhs - if self.is_nullable(&mut production.rhs[i+1..].iter()) { - let to_join = self.follow_table.as_ref().unwrap()[production.lhs].clone(); - has_changed |= Self::append_if_not_superset( - &mut self.follow_table.as_mut().unwrap()[*idx], - to_join, - ); - } - } - }, - Letter::Terminal(_) => {} - } - } +pub struct FirstFollowTable { + first: FirstTable, + follow: FollowTable, + nullable: Vec, +} - has_changed +impl FirstFollowTable { + pub fn get_follow(&self, non_terminal: NonTerminal) -> BTreeSet { + self.follow[non_terminal].clone() } - fn append_if_not_superset(first_set: &mut BTreeSet, second_set: BTreeSet) -> bool { - if first_set.is_superset(&second_set) { - return false; - } - let mut mutable = second_set; - - first_set.append(&mut mutable); - - true + pub fn get_first(&self, letter: &Letter) -> BTreeSet { + first::get_first_letter(&self.first, letter) } /// checks if the rest of the iterator is all nullable. @@ -185,32 +36,45 @@ impl FirstFollow { fn is_nullable<'a, T: Iterator>(&self, iter: &mut T) -> bool { iter.all(|letter| -> bool { match letter { - Letter::NonTerminal(idx) => self.nullable.as_ref().unwrap()[*idx], + Letter::NonTerminal(idx) => self.nullable[*idx], Letter::Terminal(ch) => *ch == EPSILON, } }) } } - -impl From<&Grammar> for FirstFollow { +impl From<&Grammar> for FirstFollowTable { fn from(grammar: &Grammar) -> Self { let num_non_terminal = grammar.get_non_terminal().len(); - let mut first_follow = FirstFollow::new(num_non_terminal); - - first_follow.compute_nullable(grammar); - first_follow.compute_first(grammar); - first_follow.compute_follow(grammar); + let nullable = compute_nullable(grammar, num_non_terminal); + let first = compute_first(grammar, num_non_terminal, &nullable); + let follow = compute_follow(grammar, num_non_terminal, &first); - first_follow + FirstFollowTable { + first, + follow, + nullable, + } } } +fn compute_nullable(grammar: &Grammar, num_non_terminal: usize) -> Vec { + let nullable = grammar.get_nullable(); + let mut out = vec![false; num_non_terminal]; + + nullable.iter().for_each(|non_terminal| -> () { + out[*non_terminal] = true; + }); + out +} + #[cfg(test)] mod test { + use crate::grammar::consts::STRING_END; + use super::*; - + fn get_test_grammar() -> Grammar { // S -> Ab | c // A -> aA | ε @@ -219,19 +83,31 @@ mod test { Grammar::new( 0, vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, - ] + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1), Letter::Terminal('b')], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('c')], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal(EPSILON)], + }, + ], ) } #[test] - fn test_first() { + fn first_test() { let grammar = get_test_grammar(); - let first_follow = FirstFollow::from(&grammar); + let first_follow = FirstFollowTable::from(&grammar); let first = first_follow.get_first(&Letter::NonTerminal(0)); assert_eq!(first.len(), 3); @@ -246,26 +122,31 @@ mod test { } #[test] - fn test_first_cycle() { + fn first_cycle_test() { let grammar = Grammar::new( 0, vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(0)] }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::NonTerminal(0)], + }, ], ); - - let first_follow = FirstFollow::from(&grammar); + let first_follow = FirstFollowTable::from(&grammar); let first = first_follow.get_first(&Letter::NonTerminal(0)); assert_eq!(first.len(), 0); } #[test] - fn test_follow() { + fn follow_test() { let grammar = get_test_grammar(); - let first_follow = FirstFollow::from(&grammar); + let first_follow = FirstFollowTable::from(&grammar); let follow = first_follow.get_follow(0); assert_eq!(follow.len(), 1); @@ -275,4 +156,4 @@ mod test { assert_eq!(follow.len(), 1); assert!(follow.contains(&'b')); } -} \ No newline at end of file +} diff --git a/src/grammar/first_follow/first.rs b/src/grammar/first_follow/first.rs new file mode 100644 index 0000000..23be5ad --- /dev/null +++ b/src/grammar/first_follow/first.rs @@ -0,0 +1,58 @@ +use std::collections::BTreeSet; + +use crate::grammar::{ + consts::{Terminal, EPSILON}, + Grammar, Letter, +}; + +use super::FirstTable; + +pub fn compute_first( + grammar: &Grammar, + num_non_terminal: usize, + nullable: &Vec, +) -> FirstTable { + let mut first_table = vec![BTreeSet::new(); num_non_terminal]; + let productions = grammar.get_productions(); + let mut has_changed = true; + + while has_changed { + has_changed = false; + + productions.iter().for_each(|production| -> () { + has_changed |= production.update_first_table(&mut first_table, &nullable); + }); + } + + (0..num_non_terminal).for_each(|i| -> () { + if nullable[i] { + first_table[i].insert(EPSILON); + } + }); + first_table +} +pub fn get_first_letter(first: &FirstTable, letter: &Letter) -> BTreeSet { + match letter { + Letter::NonTerminal(idx) => first[*idx].clone(), + Letter::Terminal(ch) => BTreeSet::from([*ch]), + } +} +//TODO: add tests +pub fn get_first<'a, T>(first: &FirstTable, iter: &mut T) -> BTreeSet +where + T: Iterator, +{ + let mut out = BTreeSet::new(); + let mut nullable = true; + for letter in iter { + let mut set = get_first_letter(first, letter); + if !set.remove(&EPSILON) { + nullable = false; + } + out.append(&mut set); + } + if nullable { + out.insert(EPSILON); + } + out +} diff --git a/src/grammar/first_follow/follow.rs b/src/grammar/first_follow/follow.rs new file mode 100644 index 0000000..1304cdd --- /dev/null +++ b/src/grammar/first_follow/follow.rs @@ -0,0 +1,26 @@ +use std::collections::BTreeSet; + +use crate::grammar::{consts::STRING_END, Grammar}; + +use super::{FirstTable, FollowTable}; + +pub fn compute_follow( + grammar: &Grammar, + num_non_terminal: usize, + first: &FirstTable, +) -> FollowTable { + let mut follow_table = vec![BTreeSet::new(); num_non_terminal]; + follow_table[grammar.get_start_symbol()].insert(STRING_END); + + let productions = grammar.get_productions(); + let mut has_changed = true; + + while has_changed { + has_changed = false; + + productions.iter().for_each(|production| { + has_changed |= production.update_follow_table(first, &mut follow_table); + }); + } + follow_table +} diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index 5ce37d5..728c2e0 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -1,16 +1,8 @@ -use std::collections::{BTreeSet, BTreeMap}; +use std::collections::{BTreeMap, BTreeSet}; use crate::automata::DFA; -use crate::grammar::{ - Production, - Letter -}; -use crate::grammar::consts::{ - EPSILON, - STRING_END, - Terminal, - NonTerminal, -}; +use crate::grammar::consts::{NonTerminal, Terminal, EPSILON, STRING_END}; +use crate::grammar::{Letter, Production}; mod helper; mod semplification; @@ -26,7 +18,6 @@ pub struct Grammar { nullable: Option>, } - impl Grammar { pub fn new(start_symbol: NonTerminal, productions: Vec) -> Self { Grammar { @@ -47,9 +38,10 @@ impl Grammar { pub fn productions_to_adj_list(&self) -> BTreeMap>> { let mut adj_list: BTreeMap>> = BTreeMap::new(); for production in self.productions.iter() { - adj_list.entry(production.lhs) + adj_list + .entry(production.start_symbol) .or_insert(BTreeSet::new()) - .insert(production.rhs.clone()); + .insert(production.expand_rule.clone()); } adj_list @@ -58,8 +50,8 @@ impl Grammar { pub fn add_fake_initial_state(&mut self) -> () { let new_state = self.get_non_terminal().iter().max().unwrap() + 1; self.productions.push(Production { - lhs: new_state, - rhs: vec![Letter::NonTerminal(self.start_symbol)] + start_symbol: new_state, + expand_rule: vec![Letter::NonTerminal(self.start_symbol)], }); self.start_symbol = new_state; @@ -77,17 +69,23 @@ impl From<&DFA> for Grammar { for (transition_ch, dest) in transitions.iter() { let lhs = idx; let rhs = vec![Letter::Terminal(*transition_ch), Letter::NonTerminal(*dest)]; - productions.push(Production { lhs: lhs, rhs }); + productions.push(Production { + start_symbol: lhs, + expand_rule: rhs, + }); } } for end_state in dfa.get_end_states() { let lhs = *end_state; let rhs = vec![Letter::Terminal(EPSILON)]; - productions.push(Production { lhs: lhs, rhs }); + productions.push(Production { + start_symbol: lhs, + expand_rule: rhs, + }); } - Self { + Self { start_symbol: dfa.get_start_state(), productions, @@ -99,31 +97,31 @@ impl From<&DFA> for Grammar { #[cfg(test)] mod test { use super::*; - use crate::map; use crate::automata::DFA; + use crate::map; #[test] fn test_dfa_conversion() { // this dfa should recognize ba* let dfa: DFA = DFA::from_state( 3, - 0, - vec![1], + 0, + vec![1], vec![ - map! { + map! { 'a' => 2, 'b' => 1 }, - map! { + map! { 'a' => 1, 'b' => 2 }, - map! { + map! { 'a' => 2, 'b' => 2 }, - ], - None + ], + None, ); let grammar = Grammar::from(&dfa); @@ -132,17 +130,38 @@ mod test { let result = Grammar { start_symbol: 0, productions: vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('b'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('b'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('b'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal(EPSILON)], + }, ], nullable: None, }; assert_eq!(grammar, result); } -} \ No newline at end of file +} diff --git a/src/grammar/grammar/helper.rs b/src/grammar/grammar/helper.rs index 291ba8f..56381b3 100644 --- a/src/grammar/grammar/helper.rs +++ b/src/grammar/grammar/helper.rs @@ -1,14 +1,14 @@ /// This file contains some general helper functions used /// To implement grammar semplification and first and follows -use std::collections::{BTreeSet}; +use std::collections::BTreeSet; -use super::{Grammar, NonTerminal, Letter, EPSILON}; +use super::{Grammar, Letter, NonTerminal, Production, EPSILON}; impl Grammar { pub fn get_non_terminal(&self) -> BTreeSet { let mut non_terminals = BTreeSet::new(); for production in self.productions.iter() { - non_terminals.insert(production.lhs); + non_terminals.insert(production.start_symbol); } non_terminals @@ -20,30 +20,15 @@ impl Grammar { while has_changed { has_changed = false; for production in self.productions.iter() { - let mut is_nullable = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - if !nullable.contains(idx) { - is_nullable = false; - break; - } - } - Letter::Terminal(ch) => { - if *ch != EPSILON { - is_nullable = false; - break; - } - } - } - } - if is_nullable && !nullable.contains(&production.lhs) { - nullable.insert(production.lhs); + if production.check_is_nullable(&nullable) + && !nullable.contains(&production.start_symbol) + { + nullable.insert(production.start_symbol); has_changed = true; } } } - + nullable } @@ -57,10 +42,10 @@ impl Grammar { while has_changed { has_changed = false; self.productions.iter().for_each(|production| -> () { - if !reachable.contains(&production.lhs) { + if !reachable.contains(&production.start_symbol) { return; } - for letter in production.rhs.iter() { + for letter in production.expand_rule.iter() { match letter { Letter::NonTerminal(idx) => { if !reachable.contains(idx) { @@ -90,7 +75,7 @@ impl Grammar { self.productions.iter().for_each(|production| -> () { let mut is_generator = true; - production.rhs.iter().for_each(|letter| -> () { + production.expand_rule.iter().for_each(|letter| -> () { match letter { Letter::NonTerminal(non_terminal) => { if !generators.contains(non_terminal) { @@ -103,7 +88,7 @@ impl Grammar { }); if is_generator { - generators.insert(production.lhs); + generators.insert(production.start_symbol); has_changed = true; } }); @@ -116,7 +101,7 @@ impl Grammar { /// a unitary couple is a couple of non terminals (A, B) such that /// A -> B is a production in the grammar or A -> C, C -> B is a production /// (aka it's transitive and reflexive) - pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { + pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { let non_terminals = self.get_non_terminal(); let mut unitary_couples = BTreeSet::new(); let mut has_changed = true; @@ -124,19 +109,20 @@ impl Grammar { for non_terminal in non_terminals { unitary_couples.insert((non_terminal, non_terminal)); } - + while has_changed { has_changed = false; for production in self.productions.iter() { - if production.rhs.len() != 1 { + if production.expand_rule.len() != 1 { continue; } let mut to_insert = BTreeSet::new(); for unitary_couple in unitary_couples.iter() { - if let Letter::NonTerminal(non_term) = production.rhs[0] { - if unitary_couple.1 == production.lhs && - !unitary_couples.contains(&(unitary_couple.0, non_term)) && - !to_insert.contains(&(unitary_couple.0, non_term)) { + if let Letter::NonTerminal(non_term) = production.expand_rule[0] { + if unitary_couple.1 == production.start_symbol + && !unitary_couples.contains(&(unitary_couple.0, non_term)) + && !to_insert.contains(&(unitary_couple.0, non_term)) + { to_insert.insert((unitary_couple.0, non_term)); } } @@ -156,8 +142,8 @@ impl Grammar { #[cfg(test)] mod tests { use super::*; - use crate::grammar::{Production}; - + use crate::grammar::Production; + fn get_test_grammar() -> Grammar { // S -> Ab | c // A -> aA | ε @@ -166,10 +152,22 @@ mod tests { Grammar { start_symbol: 0, productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1), Letter::Terminal('b')], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('c')], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal(EPSILON)], + }, ], nullable: None, } @@ -183,4 +181,4 @@ mod tests { assert_eq!(nullable.len(), 1); assert!(nullable.contains(&1)); } -} \ No newline at end of file +} diff --git a/src/grammar/grammar/semplification.rs b/src/grammar/grammar/semplification.rs index 2b2bb01..6b742d1 100644 --- a/src/grammar/grammar/semplification.rs +++ b/src/grammar/grammar/semplification.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeSet}; +use std::collections::BTreeSet; use super::{Grammar, Letter, Production}; @@ -8,24 +8,22 @@ impl Grammar { let generators = self.get_generators(); self.productions.retain(|production| { - generators.contains(&production.lhs) && production.rhs.iter().all(|letter| { - match letter { + generators.contains(&production.start_symbol) + && production.expand_rule.iter().all(|letter| match letter { Letter::NonTerminal(idx) => generators.contains(idx), - Letter::Terminal(_) => true - } - }) + Letter::Terminal(_) => true, + }) }); // then remove non reachable let reachable = self.get_reachable(); self.productions.retain(|production| { - reachable.contains(&production.lhs) && production.rhs.iter().all(|letter| { - match letter { + reachable.contains(&production.start_symbol) + && production.expand_rule.iter().all(|letter| match letter { Letter::NonTerminal(idx) => reachable.contains(idx), - Letter::Terminal(_) => true - } - }) + Letter::Terminal(_) => true, + }) }); // invalidate nullable @@ -33,23 +31,25 @@ impl Grammar { } // TODO: this is a very complex function in this moment, it needs refactor - // it also has some points were it can be optimized + // it also has some points were it can be optimized pub fn remove_unitary_cycles(&mut self) { let unitary_couples = self.get_unitary_couples(); // remove all unitary couples self.productions.retain(|production| { - if production.rhs.len() != 1 { + if production.expand_rule.len() != 1 { return true; } - match production.rhs[0] { - Letter::NonTerminal(non_term) => !unitary_couples.contains(&(production.lhs, non_term)), - Letter::Terminal(_) => true + match production.expand_rule[0] { + Letter::NonTerminal(non_term) => { + !unitary_couples.contains(&(production.start_symbol, non_term)) + } + Letter::Terminal(_) => true, } }); - // add corresponding productions + // add corresponding productions let mut adj_list = self.productions_to_adj_list(); for unitary_couple in unitary_couples.iter() { if unitary_couple.0 == unitary_couple.1 { @@ -58,7 +58,8 @@ impl Grammar { let mut to_insert = adj_list.get(&unitary_couple.1).unwrap().clone(); - adj_list.entry(unitary_couple.0) + adj_list + .entry(unitary_couple.0) .or_insert(BTreeSet::new()) .append(&mut to_insert); } @@ -68,8 +69,8 @@ impl Grammar { for (non_terminal, transitions) in adj_list.iter() { for transition in transitions.iter() { new_transitions.push(Production { - lhs: *non_terminal, - rhs: transition.clone() + start_symbol: *non_terminal, + expand_rule: transition.clone(), }); } } @@ -97,9 +98,18 @@ mod tests { Grammar { start_symbol: 0, productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('b')] }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a')], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('b')], + }, ], nullable: None, } @@ -109,9 +119,10 @@ mod tests { let result = Grammar { start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - ], + productions: vec![Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a')], + }], nullable: None, }; @@ -122,17 +133,47 @@ mod tests { fn test_remove_unitary_cycles() { // E -> E + T | T // T -> T * F | F - // F -> (E) | a - + // F -> (E) | a + let mut grammar = Grammar { start_symbol: 0, productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::NonTerminal(0), + Letter::Terminal('+'), + Letter::NonTerminal(1), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![ + Letter::NonTerminal(1), + Letter::Terminal('*'), + Letter::NonTerminal(2), + ], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::NonTerminal(2)], + }, + Production { + start_symbol: 2, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('a')], + }, ], nullable: None, }; @@ -143,15 +184,66 @@ mod tests { // F -> (E) | a start_symbol: 0, productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::NonTerminal(0), + Letter::Terminal('+'), + Letter::NonTerminal(1), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::NonTerminal(1), + Letter::Terminal('*'), + Letter::NonTerminal(2), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a')], + }, + Production { + start_symbol: 1, + expand_rule: vec![ + Letter::NonTerminal(1), + Letter::Terminal('*'), + Letter::NonTerminal(2), + ], + }, + Production { + start_symbol: 1, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a')], + }, + Production { + start_symbol: 2, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('a')], + }, ], nullable: None, }; @@ -160,4 +252,5 @@ mod tests { assert_eq!(grammar, result); } -} \ No newline at end of file +} + diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index e9cb1bb..12c12f3 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -1,10 +1,10 @@ mod grammar; // mod item; -mod production; -mod first_follow; pub mod consts; +mod first_follow; +mod production; pub use grammar::*; // pub use item::*; -pub use production::*; pub use first_follow::*; +pub use production::*; diff --git a/src/grammar/production.rs b/src/grammar/production.rs index 7937035..50ac997 100644 --- a/src/grammar/production.rs +++ b/src/grammar/production.rs @@ -1,7 +1,6 @@ -use crate::grammar::consts::{ - Terminal, - NonTerminal, -}; +use crate::grammar::consts::{NonTerminal, Terminal}; + +mod helper; #[derive(Debug, PartialEq, Clone, PartialOrd, Eq, Ord)] pub enum Letter { @@ -11,8 +10,8 @@ pub enum Letter { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord)] pub struct Production { - pub lhs: NonTerminal, - pub rhs: Vec, + pub start_symbol: NonTerminal, + pub expand_rule: Vec, } impl Production { @@ -34,4 +33,5 @@ impl Production { Some(&letters[n]) } -} \ No newline at end of file +} + diff --git a/src/grammar/production/helper.rs b/src/grammar/production/helper.rs new file mode 100644 index 0000000..19c87ef --- /dev/null +++ b/src/grammar/production/helper.rs @@ -0,0 +1,81 @@ +use std::collections::{BTreeSet, VecDeque}; + +use crate::grammar::{ + consts::{NonTerminal, EPSILON}, + get_first, FollowTable, +}; + +use super::{Letter, Production}; +use crate::grammar::first_follow::FirstTable; + +impl Production { + pub fn check_is_nullable(&self, nullable: &BTreeSet) -> bool { + for letter in self.expand_rule.iter() { + match letter { + Letter::NonTerminal(idx) => { + if !nullable.contains(idx) { + return false; + } + } + Letter::Terminal(ch) => { + if *ch != EPSILON { + return false; + } + } + } + } + true + } + + pub fn update_first_table(&self, first_table: &mut FirstTable, nullable: &Vec) -> bool { + let mut has_changed = false; + for letter in self.expand_rule.iter() { + match letter { + Letter::NonTerminal(idx) => { + let mut set_to_join = first_table[*idx].clone(); + + if !set_to_join.is_subset(&first_table[self.start_symbol]) { + first_table[self.start_symbol].append(&mut set_to_join); + has_changed = true; + } + if !nullable[*idx] { + break; + } + } + Letter::Terminal(ch) => { + if *ch != EPSILON { + has_changed |= first_table[self.start_symbol].insert(*ch); + break; + } + } + } + } + has_changed + } + + /// updates the follow table with the given production + /// returns true if the follow table has changed, false otherwise + pub fn update_follow_table(&self, first: &FirstTable, follow: &mut FollowTable) -> bool { + let mut has_changed = false; + let mut production = self + .expand_rule + .clone() + .into_iter() + .collect::>(); + + for letter in self.expand_rule.iter() { + production.pop_front(); + if let Letter::NonTerminal(idx) = letter { + let mut res = get_first(first, &mut production.iter()); + if res.remove(&EPSILON) { + res.append(&mut follow[self.start_symbol].clone()); + } + if !res.is_subset(&follow[*idx]) { + follow[*idx].append(&mut res); + has_changed = true; + } + } + } + has_changed + } +} diff --git a/src/main.rs b/src/main.rs index db54ac0..06280dd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,10 @@ #![allow(non_snake_case)] mod app; -mod utils; -mod display; mod automata; -mod grammar; +mod display; mod error; +mod grammar; +mod utils; #[macro_use] mod macros; @@ -31,7 +31,7 @@ fn main() { let mut web_options = eframe::WebOptions::default(); web_options.default_theme = eframe::Theme::Dark; web_options.follow_system_theme = false; - + eframe::start_web( "the_canvas_id", // hardcode it web_options, From 7aed14796eb9ae73ecbdd5140a0733c929c52050 Mon Sep 17 00:00:00 2001 From: Angelo 'Flecart' Huang Date: Sat, 21 Jan 2023 14:35:07 +0100 Subject: [PATCH 13/13] refactor(first_fol): remove useless imports --- src/grammar/first_follow.rs | 9 +++------ src/grammar/first_follow/first.rs | 2 ++ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/grammar/first_follow.rs b/src/grammar/first_follow.rs index 2fd1a03..44e636e 100644 --- a/src/grammar/first_follow.rs +++ b/src/grammar/first_follow.rs @@ -2,12 +2,9 @@ use std::collections::BTreeSet; use crate::grammar::{ consts::{NonTerminal, Terminal, EPSILON}, - Grammar, Letter, Production, + Grammar, Letter, Production }; -use self::first::compute_first; -use crate::grammar::first_follow::follow::compute_follow; - mod first; mod follow; @@ -48,8 +45,8 @@ impl From<&Grammar> for FirstFollowTable { let num_non_terminal = grammar.get_non_terminal().len(); let nullable = compute_nullable(grammar, num_non_terminal); - let first = compute_first(grammar, num_non_terminal, &nullable); - let follow = compute_follow(grammar, num_non_terminal, &first); + let first = first::compute_first(grammar, num_non_terminal, &nullable); + let follow = follow::compute_follow(grammar, num_non_terminal, &first); FirstFollowTable { first, diff --git a/src/grammar/first_follow/first.rs b/src/grammar/first_follow/first.rs index 23be5ad..797a025 100644 --- a/src/grammar/first_follow/first.rs +++ b/src/grammar/first_follow/first.rs @@ -31,12 +31,14 @@ pub fn compute_first( }); first_table } + pub fn get_first_letter(first: &FirstTable, letter: &Letter) -> BTreeSet { match letter { Letter::NonTerminal(idx) => first[*idx].clone(), Letter::Terminal(ch) => BTreeSet::from([*ch]), } } + //TODO: add tests pub fn get_first<'a, T>(first: &FirstTable, iter: &mut T) -> BTreeSet where