diff --git a/docs/chapters/01_welcome/index.rst b/docs/chapters/01_welcome/index.rst index 1700b6d..b800c8e 100644 --- a/docs/chapters/01_welcome/index.rst +++ b/docs/chapters/01_welcome/index.rst @@ -24,7 +24,27 @@ If you're new to the SDK or installing it for the first time, be sure to check o You can find details on the latest releases, FAQs, known issues, and more in the :ref:`Overview` section. -If you already have the SDK installed and are looking to get started using it, please refer to the :ref:`Common Modules` section for authentication setup, and the :ref:`DQ Validator` section for data quality validation. +SDK Modules +----------- + +The SDK provides several powerful modules for data intelligence and governance: + +**Data Quality Validator** + Comprehensive data quality validation framework with support for multiple check types, integration with Pandas and PySpark DataFrames, and CEL (Common Expression Language) for complex validation rules. See :ref:`DQ Validator` for details. + +**Data Product Hub Services** + Python client library for IBM Data Product Hub API, enabling programmatic management of data products, containers, contract terms, and the complete data product lifecycle from creation to retirement. See :ref:`Data Product Hub Services` for details. + +**ODCS Generator** + Automated generation of Open Data Contract Standard (ODCS) v3.1.0 compliant YAML files from enterprise data catalogs including Collibra and Informatica CDGC. Streamlines data contract creation by extracting and transforming catalog metadata. See :ref:`ODCS Generator` for details. + +**Data Product Recommender** + Intelligent analysis of database query logs to identify high-value tables and logical groupings for data product prioritization. Supports multiple platforms (Snowflake, Databricks, BigQuery, watsonx.data) and provides actionable recommendations based on usage patterns. See :ref:`Data Product Recommender` for details. + +Getting Started +--------------- + +If you already have the SDK installed and are looking to get started using it, please refer to the :ref:`Common Modules` section for authentication setup, and explore the individual module documentation for specific use cases. Looking for documentation on the SDK's interfaces and abstractions? Please check out our :ref:`API Reference Documentation` for an in-depth breakdown of all the SDK's classes, properties, and methods - including detailed descriptions of any required or optional parameters. diff --git a/src/wxdi/data_product_recommender/cli.py b/src/wxdi/data_product_recommender/cli.py index d884a90..4fb136b 100644 --- a/src/wxdi/data_product_recommender/cli.py +++ b/src/wxdi/data_product_recommender/cli.py @@ -33,6 +33,7 @@ def main(): + """Main entry point for the data product recommender CLI.""" parser = argparse.ArgumentParser( description='Analyze query logs to recommend data products' ) diff --git a/src/wxdi/data_product_recommender/recommender.py b/src/wxdi/data_product_recommender/recommender.py index 65f9732..f9a7e95 100644 --- a/src/wxdi/data_product_recommender/recommender.py +++ b/src/wxdi/data_product_recommender/recommender.py @@ -944,14 +944,13 @@ def _get_star_rating(self, score: float) -> tuple: """Get star rating and label for a score""" if score >= 80: return FIVE_STARS, EXCELLENT_CANDIDATE - elif score >= 60: + if score >= 60: return FOUR_STARS, GOOD_CANDIDATE - elif score >= 40: + if score >= 40: return THREE_STARS, FAIR_CANDIDATE - elif score >= 20: + if score >= 20: return TWO_STARS, WEAK_CANDIDATE - else: - return ONE_STAR, POOR_CANDIDATE + return ONE_STAR, POOR_CANDIDATE def _merge_and_sort_products(self, recommendations: dict) -> list: """Merge groups and standalone tables, sorted by score""" @@ -1126,7 +1125,6 @@ def export_recommendations_markdown(self, recommendations: dict, output_file: st def _build_json_metadata(self, recommendations: dict) -> dict: """Build metadata section for JSON export""" - from datetime import datetime return { "generated_at": datetime.now().isoformat(), "total_queries_analyzed": len(self.query_logs) if self.query_logs is not None else 0, @@ -1259,8 +1257,6 @@ def _process_non_clustered_recommendations(self, recommendations: dict) -> list: def export_recommendations_json(self, recommendations: dict, output_file: str): """Export recommendations to JSON file for agent consumption""" - import json - output = { "recommendations": [], "metadata": self._build_json_metadata(recommendations) @@ -1280,13 +1276,12 @@ def _get_rating_label(self, score: float) -> str: """Convert numeric score to rating label""" if score >= 80: return "excellent" - elif score >= 60: + if score >= 60: return "good" - elif score >= 40: + if score >= 40: return "fair" - elif score >= 20: + if score >= 20: return "weak" - else: - return "poor" + return "poor" # Made with Bob diff --git a/src/wxdi/odcs_generator/generate_odcs_from_collibra.py b/src/wxdi/odcs_generator/generate_odcs_from_collibra.py index 493b21e..8f0a0d7 100644 --- a/src/wxdi/odcs_generator/generate_odcs_from_collibra.py +++ b/src/wxdi/odcs_generator/generate_odcs_from_collibra.py @@ -563,12 +563,11 @@ def _build_physical_type(self, attr_map: Dict[str, Any]) -> Optional[str]: # Build type string with parameters if scale is not None and precision is not None: return f"{base_type}({precision},{scale})" - elif precision is not None: + if precision is not None: return f"{base_type}({precision})" - elif size is not None: + if size is not None: return f"{base_type}({size})" - else: - return base_type + return base_type @staticmethod def _to_int(value: Any) -> Optional[int]: @@ -687,9 +686,9 @@ def _add_inline_comment_if_needed(line: str) -> str: """Add inline comment to server configuration fields if needed""" if ' server:' in line and 'CONFIGURE_SERVER_HOSTNAME' in line: return line + ' # ⚠️ UPDATE: e.g., prod.snowflake.acme.com' - elif ' type:' in line and 'DEFINE_SERVER_TYPE' in line: + if ' type:' in line and 'DEFINE_SERVER_TYPE' in line: return line + ' # ⚠️ UPDATE: e.g., snowflake, postgres, bigquery, redshift' - elif ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line: + if ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line: return line + ' # ⚠️ UPDATE: e.g., public, dbo, my_schema' return line diff --git a/src/wxdi/odcs_generator/generate_odcs_from_informatica.py b/src/wxdi/odcs_generator/generate_odcs_from_informatica.py index 31d4c6b..4cc309a 100644 --- a/src/wxdi/odcs_generator/generate_odcs_from_informatica.py +++ b/src/wxdi/odcs_generator/generate_odcs_from_informatica.py @@ -72,6 +72,7 @@ } class InformaticaClient: + """Client for interacting with Informatica CDGC API.""" CONTENT_TYPE_JSON = "application/json" HEADERS_JSON = {"Accept": CONTENT_TYPE_JSON} @@ -169,6 +170,7 @@ def get_column_details(self, column_id: str) -> Dict[str, Any]: return self._fetch_asset(column_id) def parse_arguments(): + """Parse command line arguments.""" parser = argparse.ArgumentParser( description='Generate ODCS YAML file from Informatica asset', formatter_class=argparse.RawDescriptionHelpFormatter, @@ -183,6 +185,7 @@ def parse_arguments(): return parser.parse_args() def validate_arguments(args): + """Validate required command line arguments.""" if not args.cdgc_url: print("Error: Informatica CDGC URL is required. Set INFORMATICA_CDGC_URL environment variable or use --cdgc-url") print("Example: --cdgc-url https://cdgc.dm-us.informaticacloud.com") @@ -361,9 +364,9 @@ def _add_inline_comment_if_needed(line: str) -> str: """Add inline comment to server configuration fields if needed""" if ' server:' in line and 'CONFIGURE_SERVER_HOSTNAME' in line: return line + ' # ⚠️ UPDATE: e.g., prod.snowflake.acme.com' - elif ' type:' in line and 'CONFIGURE_SERVER_TYPE' in line: + if ' type:' in line and 'CONFIGURE_SERVER_TYPE' in line: return line + ' # ⚠️ UPDATE: e.g., snowflake, postgres, bigquery, redshift' - elif ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line: + if ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line: return line + ' # ⚠️ UPDATE: e.g., public, dbo, my_schema' return line @@ -418,6 +421,7 @@ def write_yaml_file(output_file: str, odcs_data: Dict[str, Any]) -> None: def main(): + """Main entry point for the ODCS generator from Informatica.""" args = parse_arguments() validate_arguments(args)