From 9ebd571083847e5b5af1bb0e5ca3d37c3d8e9bdd Mon Sep 17 00:00:00 2001 From: Satvik-Singh192 Date: Sun, 2 Nov 2025 19:10:16 +0530 Subject: [PATCH] feat: implemented date standardization in transform --- app/etl/transform.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/app/etl/transform.py b/app/etl/transform.py index 8c645d8..f2d2b33 100644 --- a/app/etl/transform.py +++ b/app/etl/transform.py @@ -45,8 +45,15 @@ def transform(df: pd.DataFrame) -> pd.DataFrame: if any(keyword in col.lower() for keyword in ['date', 'time', 'created', 'updated'])] for col in date_columns: - # TODO (Find & Fix): Date columns are not standardized - pass + try: + df_transformed[col] = pd.to_datetime(df_transformed[col], errors='coerce', infer_datetime_format=True) + # Standardize all dates to 'YYYY-MM-DD HH:MM:SS' + df_transformed[col] = df_transformed[col].dt.strftime('%Y-%m-%d %H:%M:%S') + + print(f"✅ Standardized date column '{col}' (e.g., {df_transformed[col].iloc[0]})") + except Exception as e: + print(f"⚠️ Could not standardize column '{col}': {e}") + # TODO (Find & Fix): Text columns are not cleaned (strip, lowercase) return df_transformed