@@ -1077,7 +1077,163 @@ pub mod vec {
10771077 }
10781078 }
10791079
1080+ impl < G , K , V , R > Collection < G , ( K , V ) , R >
1081+ where
1082+ G : Scope < Timestamp : Lattice +Ord > ,
1083+ K : crate :: ExchangeData +Hashable ,
1084+ V : crate :: ExchangeData ,
1085+ R : crate :: ExchangeData +Semigroup ,
1086+ {
1087+ /// Matches pairs `(key,val1)` and `(key,val2)` based on `key` and yields pairs `(key, (val1, val2))`.
1088+ ///
1089+ /// The [`join_map`](Join::join_map) method may be more convenient for non-trivial processing pipelines.
1090+ ///
1091+ /// # Examples
1092+ ///
1093+ /// ```
1094+ /// use differential_dataflow::input::Input;
1095+ ///
1096+ /// ::timely::example(|scope| {
1097+ ///
1098+ /// let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
1099+ /// let y = scope.new_collection_from(vec![(0, 'a'), (1, 'b')]).1;
1100+ /// let z = scope.new_collection_from(vec![(0, (1, 'a')), (1, (3, 'b'))]).1;
1101+ ///
1102+ /// x.join(&y)
1103+ /// .assert_eq(&z);
1104+ /// });
1105+ /// ```
1106+ pub fn join < V2 , R2 > ( & self , other : & Collection < G , ( K , V2 ) , R2 > ) -> Collection < G , ( K , ( V , V2 ) ) , <R as Multiply < R2 > >:: Output >
1107+ where
1108+ K : crate :: ExchangeData ,
1109+ V2 : crate :: ExchangeData ,
1110+ R2 : crate :: ExchangeData +Semigroup ,
1111+ R : Multiply < R2 , Output : Semigroup +' static > ,
1112+ {
1113+ self . join_map ( other, |k, v, v2| ( k. clone ( ) , ( v. clone ( ) , v2. clone ( ) ) ) )
1114+ }
1115+
1116+ /// Matches pairs `(key,val1)` and `(key,val2)` based on `key` and then applies a function.
1117+ ///
1118+ /// # Examples
1119+ ///
1120+ /// ```
1121+ /// use differential_dataflow::input::Input;
1122+ ///
1123+ /// ::timely::example(|scope| {
1124+ ///
1125+ /// let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
1126+ /// let y = scope.new_collection_from(vec![(0, 'a'), (1, 'b')]).1;
1127+ /// let z = scope.new_collection_from(vec![(1, 'a'), (3, 'b')]).1;
1128+ ///
1129+ /// x.join_map(&y, |_key, &a, &b| (a,b))
1130+ /// .assert_eq(&z);
1131+ /// });
1132+ /// ```
1133+ pub fn join_map < V2 : crate :: ExchangeData , R2 : crate :: ExchangeData +Semigroup , D : crate :: Data , L > ( & self , other : & Collection < G , ( K , V2 ) , R2 > , mut logic : L ) -> Collection < G , D , <R as Multiply < R2 > >:: Output >
1134+ where R : Multiply < R2 , Output : Semigroup +' static > , L : FnMut ( & K , & V , & V2 ) ->D +' static {
1135+ let arranged1 = self . arrange_by_key ( ) ;
1136+ let arranged2 = other. arrange_by_key ( ) ;
1137+ arranged1. join_core ( & arranged2, move |k, v1, v2| Some ( logic ( k, v1, v2) ) )
1138+ }
1139+
1140+ /// Matches pairs `(key, val)` and `key` based on `key`, producing the former with frequencies multiplied.
1141+ ///
1142+ /// When the second collection contains frequencies that are either zero or one this is the more traditional
1143+ /// relational semijoin. When the second collection may contain multiplicities, this operation may scale up
1144+ /// the counts of the records in the first input.
1145+ ///
1146+ /// # Examples
1147+ ///
1148+ /// ```
1149+ /// use differential_dataflow::input::Input;
1150+ ///
1151+ /// ::timely::example(|scope| {
1152+ ///
1153+ /// let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
1154+ /// let y = scope.new_collection_from(vec![0, 2]).1;
1155+ /// let z = scope.new_collection_from(vec![(0, 1)]).1;
1156+ ///
1157+ /// x.semijoin(&y)
1158+ /// .assert_eq(&z);
1159+ /// });
1160+ /// ```
1161+ pub fn semijoin < R2 : crate :: ExchangeData +Semigroup > ( & self , other : & Collection < G , K , R2 > ) -> Collection < G , ( K , V ) , <R as Multiply < R2 > >:: Output >
1162+ where R : Multiply < R2 , Output : Semigroup +' static > {
1163+ let arranged1 = self . arrange_by_key ( ) ;
1164+ let arranged2 = other. arrange_by_self ( ) ;
1165+ arranged1. join_core ( & arranged2, |k, v, _| Some ( ( k. clone ( ) , v. clone ( ) ) ) )
1166+ }
10801167
1168+ /// Subtracts the semijoin with `other` from `self`.
1169+ ///
1170+ /// In the case that `other` has multiplicities zero or one this results
1171+ /// in a relational antijoin, in which we discard input records whose key
1172+ /// is present in `other`. If the multiplicities could be other than zero
1173+ /// or one, the semantic interpretation of this operator is less clear.
1174+ ///
1175+ /// In almost all cases, you should ensure that `other` has multiplicities
1176+ /// that are zero or one, perhaps by using the `distinct` operator.
1177+ ///
1178+ /// # Examples
1179+ ///
1180+ /// ```
1181+ /// use differential_dataflow::input::Input;
1182+ ///
1183+ /// ::timely::example(|scope| {
1184+ ///
1185+ /// let x = scope.new_collection_from(vec![(0, 1), (1, 3)]).1;
1186+ /// let y = scope.new_collection_from(vec![0, 2]).1;
1187+ /// let z = scope.new_collection_from(vec![(1, 3)]).1;
1188+ ///
1189+ /// x.antijoin(&y)
1190+ /// .assert_eq(&z);
1191+ /// });
1192+ /// ```
1193+ pub fn antijoin < R2 : crate :: ExchangeData +Semigroup > ( & self , other : & Collection < G , K , R2 > ) -> Collection < G , ( K , V ) , R >
1194+ where R : Multiply < R2 , Output =R > , R : Abelian +' static {
1195+ self . concat ( & self . semijoin ( other) . negate ( ) )
1196+ }
1197+
1198+ /// Joins two arranged collections with the same key type.
1199+ ///
1200+ /// Each matching pair of records `(key, val1)` and `(key, val2)` are subjected to the `result` function,
1201+ /// which produces something implementing `IntoIterator`, where the output collection will have an entry for
1202+ /// every value returned by the iterator.
1203+ ///
1204+ /// This trait is implemented for arrangements (`Arranged<G, T>`) rather than collections. The `Join` trait
1205+ /// contains the implementations for collections.
1206+ ///
1207+ /// # Examples
1208+ ///
1209+ /// ```
1210+ /// use differential_dataflow::input::Input;
1211+ /// use differential_dataflow::trace::Trace;
1212+ ///
1213+ /// ::timely::example(|scope| {
1214+ ///
1215+ /// let x = scope.new_collection_from(vec![(0u32, 1), (1, 3)]).1
1216+ /// .arrange_by_key();
1217+ /// let y = scope.new_collection_from(vec![(0, 'a'), (1, 'b')]).1
1218+ /// .arrange_by_key();
1219+ ///
1220+ /// let z = scope.new_collection_from(vec![(1, 'a'), (3, 'b')]).1;
1221+ ///
1222+ /// x.join_core(&y, |_key, &a, &b| Some((a, b)))
1223+ /// .assert_eq(&z);
1224+ /// });
1225+ /// ```
1226+ pub fn join_core < Tr2 , I , L > ( & self , stream2 : & Arranged < G , Tr2 > , result : L ) -> Collection < G , I :: Item , <R as Multiply < Tr2 :: Diff > >:: Output >
1227+ where
1228+ Tr2 : for < ' a > crate :: trace:: TraceReader < Key < ' a > =& ' a K , Time =G :: Timestamp > +Clone +' static ,
1229+ R : Multiply < Tr2 :: Diff , Output : Semigroup +' static > ,
1230+ I : IntoIterator < Item : crate :: Data > ,
1231+ L : FnMut ( & K , & V , Tr2 :: Val < ' _ > ) ->I +' static ,
1232+ {
1233+ self . arrange_by_key ( )
1234+ . join_core ( stream2, result)
1235+ }
1236+ }
10811237}
10821238
10831239/// Conversion to a differential dataflow Collection.
0 commit comments