@@ -20,7 +20,7 @@ use arrow::{
2020 compute:: { DatePart , date_part} ,
2121 datatypes:: { DataType , TimeUnit } ,
2222} ;
23- use chrono:: { TimeZone , Utc , prelude:: * } ;
23+ use chrono:: { Duration , TimeZone , Utc , prelude:: * } ;
2424use chrono_tz:: Tz ;
2525use datafusion:: {
2626 common:: { Result , ScalarValue } ,
@@ -72,6 +72,78 @@ pub fn spark_dayofweek(args: &[ColumnarValue]) -> Result<ColumnarValue> {
7272 Ok ( ColumnarValue :: Array ( Arc :: new ( dayofweek) ) )
7373}
7474
75+ /// `spark_weekofyear(date/timestamp/compatible-string[, timezone])`
76+ ///
77+ /// Matches Spark's `weekofyear()` semantics:
78+ /// ISO week numbering, with Monday as the first day of the week,
79+ /// and week 1 defined as the first week with more than 3 days.
80+ ///
81+ /// For `Timestamp` inputs, this function interprets epoch milliseconds in the
82+ /// provided timezone (if any) before deriving the calendar date and ISO week.
83+ /// If no timezone is provided, `UTC` is used by default. For `Date` and
84+ /// compatible string inputs, the behavior is unchanged: the value is cast to
85+ /// `Date32` and the ISO week is computed from the resulting date.
86+ pub fn spark_weekofyear ( args : & [ ColumnarValue ] ) -> Result < ColumnarValue > {
87+ // First argument as an Arrow array (date/timestamp/string, etc.)
88+ let array = args[ 0 ] . clone ( ) . into_array ( 1 ) ?;
89+
90+ // Determine timezone (for timestamp inputs). Default to UTC to match
91+ // existing behavior when no timezone is provided.
92+ let default_tz = chrono_tz:: UTC ;
93+ let tz: Tz = if args. len ( ) > 1 {
94+ match & args[ 1 ] {
95+ ColumnarValue :: Scalar ( ScalarValue :: Utf8 ( Some ( s) ) )
96+ | ColumnarValue :: Scalar ( ScalarValue :: LargeUtf8 ( Some ( s) ) ) => {
97+ s. parse :: < Tz > ( ) . unwrap_or ( default_tz)
98+ }
99+ _ => default_tz,
100+ }
101+ } else {
102+ default_tz
103+ } ;
104+
105+ match array. data_type ( ) {
106+ // Timestamp inputs: localize epoch milliseconds before computing ISO week
107+ DataType :: Timestamp ( TimeUnit :: Millisecond , _) => {
108+ let ts_arr = array
109+ . as_any ( )
110+ . downcast_ref :: < TimestampMillisecondArray > ( )
111+ . expect ( "internal cast to TimestampMillisecondArray must succeed" ) ;
112+
113+ let weekofyear = Int32Array :: from_iter ( ts_arr. iter ( ) . map ( |opt_ms| {
114+ opt_ms. map ( |ms| {
115+ // Localize epoch milliseconds to the chosen timezone, then
116+ // derive the ISO week number from the resulting date.
117+ let dt = tz. timestamp_millis ( ms) ;
118+ dt. date_naive ( ) . iso_week ( ) . week ( ) as i32
119+ } )
120+ } ) ) ;
121+
122+ Ok ( ColumnarValue :: Array ( Arc :: new ( weekofyear) ) )
123+ }
124+ // Non-timestamp inputs: preserve existing Date32-based behavior
125+ _ => {
126+ let input = cast ( & array, & DataType :: Date32 ) ?;
127+ let input = input
128+ . as_any ( )
129+ . downcast_ref :: < Date32Array > ( )
130+ . expect ( "internal cast to Date32 must succeed" ) ;
131+
132+ let epoch =
133+ NaiveDate :: from_ymd_opt ( 1970 , 1 , 1 ) . expect ( "1970-01-01 must be a valid date" ) ;
134+ let weekofyear = Int32Array :: from_iter ( input. iter ( ) . map ( |opt_days| {
135+ opt_days. and_then ( |days| {
136+ epoch
137+ . checked_add_signed ( Duration :: days ( days as i64 ) )
138+ . map ( |date| date. iso_week ( ) . week ( ) as i32 )
139+ } )
140+ } ) ) ;
141+
142+ Ok ( ColumnarValue :: Array ( Arc :: new ( weekofyear) ) )
143+ }
144+ }
145+ }
146+
75147/// `spark_quarter(date/timestamp/compatible-string)`
76148///
77149/// Simulates Spark's `quarter()` function.
@@ -307,6 +379,29 @@ mod tests {
307379 Ok ( ( ) )
308380 }
309381
382+ #[ test]
383+ fn test_spark_weekofyear ( ) -> Result < ( ) > {
384+ let input = Arc :: new ( Date32Array :: from ( vec ! [
385+ Some ( 0 ) ,
386+ Some ( 4017 ) ,
387+ Some ( 16801 ) ,
388+ Some ( 17167 ) ,
389+ Some ( 14455 ) ,
390+ None ,
391+ ] ) ) ;
392+ let args = vec ! [ ColumnarValue :: Array ( input) ] ;
393+ let expected_ret: ArrayRef = Arc :: new ( Int32Array :: from ( vec ! [
394+ Some ( 1 ) ,
395+ Some ( 1 ) ,
396+ Some ( 53 ) ,
397+ Some ( 52 ) ,
398+ Some ( 31 ) ,
399+ None ,
400+ ] ) ) ;
401+ assert_eq ! ( & spark_weekofyear( & args) ?. into_array( 1 ) ?, & expected_ret) ;
402+ Ok ( ( ) )
403+ }
404+
310405 #[ test]
311406 fn test_spark_quarter_basic ( ) -> Result < ( ) > {
312407 // Date32 days relative to 1970-01-01:
0 commit comments