Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion docs/chapters/01_welcome/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,27 @@ If you're new to the SDK or installing it for the first time, be sure to check o

You can find details on the latest releases, FAQs, known issues, and more in the :ref:`Overview<overview>` section.

If you already have the SDK installed and are looking to get started using it, please refer to the :ref:`Common Modules<common_modules>` section for authentication setup, and the :ref:`DQ Validator<dq_validator>` section for data quality validation.
SDK Modules
-----------

The SDK provides several powerful modules for data intelligence and governance:

**Data Quality Validator**
Comprehensive data quality validation framework with support for multiple check types, integration with Pandas and PySpark DataFrames, and CEL (Common Expression Language) for complex validation rules. See :ref:`DQ Validator<dq_validator>` for details.

**Data Product Hub Services**
Python client library for IBM Data Product Hub API, enabling programmatic management of data products, containers, contract terms, and the complete data product lifecycle from creation to retirement. See :ref:`Data Product Hub Services<dph_services>` for details.

**ODCS Generator**
Automated generation of Open Data Contract Standard (ODCS) v3.1.0 compliant YAML files from enterprise data catalogs including Collibra and Informatica CDGC. Streamlines data contract creation by extracting and transforming catalog metadata. See :ref:`ODCS Generator<odcs_generator>` for details.

**Data Product Recommender**
Intelligent analysis of database query logs to identify high-value tables and logical groupings for data product prioritization. Supports multiple platforms (Snowflake, Databricks, BigQuery, watsonx.data) and provides actionable recommendations based on usage patterns. See :ref:`Data Product Recommender<data_product_recommender>` for details.

Getting Started
---------------

If you already have the SDK installed and are looking to get started using it, please refer to the :ref:`Common Modules<common_modules>` section for authentication setup, and explore the individual module documentation for specific use cases.

Looking for documentation on the SDK's interfaces and abstractions? Please check out our :ref:`API Reference Documentation<api_ref>` for an in-depth breakdown of all the SDK's classes, properties, and methods - including detailed descriptions of any required or optional parameters.

Expand Down
1 change: 1 addition & 0 deletions src/wxdi/data_product_recommender/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@


def main():
"""Main entry point for the data product recommender CLI."""
parser = argparse.ArgumentParser(
description='Analyze query logs to recommend data products'
)
Expand Down
21 changes: 8 additions & 13 deletions src/wxdi/data_product_recommender/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,14 +944,13 @@ def _get_star_rating(self, score: float) -> tuple:
"""Get star rating and label for a score"""
if score >= 80:
return FIVE_STARS, EXCELLENT_CANDIDATE
elif score >= 60:
if score >= 60:
return FOUR_STARS, GOOD_CANDIDATE
elif score >= 40:
if score >= 40:
return THREE_STARS, FAIR_CANDIDATE
elif score >= 20:
if score >= 20:
return TWO_STARS, WEAK_CANDIDATE
else:
return ONE_STAR, POOR_CANDIDATE
return ONE_STAR, POOR_CANDIDATE

def _merge_and_sort_products(self, recommendations: dict) -> list:
"""Merge groups and standalone tables, sorted by score"""
Expand Down Expand Up @@ -1126,7 +1125,6 @@ def export_recommendations_markdown(self, recommendations: dict, output_file: st

def _build_json_metadata(self, recommendations: dict) -> dict:
"""Build metadata section for JSON export"""
from datetime import datetime
return {
"generated_at": datetime.now().isoformat(),
"total_queries_analyzed": len(self.query_logs) if self.query_logs is not None else 0,
Expand Down Expand Up @@ -1259,8 +1257,6 @@ def _process_non_clustered_recommendations(self, recommendations: dict) -> list:

def export_recommendations_json(self, recommendations: dict, output_file: str):
"""Export recommendations to JSON file for agent consumption"""
import json

output = {
"recommendations": [],
"metadata": self._build_json_metadata(recommendations)
Expand All @@ -1280,13 +1276,12 @@ def _get_rating_label(self, score: float) -> str:
"""Convert numeric score to rating label"""
if score >= 80:
return "excellent"
elif score >= 60:
if score >= 60:
return "good"
elif score >= 40:
if score >= 40:
return "fair"
elif score >= 20:
if score >= 20:
return "weak"
else:
return "poor"
return "poor"

# Made with Bob
11 changes: 5 additions & 6 deletions src/wxdi/odcs_generator/generate_odcs_from_collibra.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,12 +563,11 @@ def _build_physical_type(self, attr_map: Dict[str, Any]) -> Optional[str]:
# Build type string with parameters
if scale is not None and precision is not None:
return f"{base_type}({precision},{scale})"
elif precision is not None:
if precision is not None:
return f"{base_type}({precision})"
elif size is not None:
if size is not None:
return f"{base_type}({size})"
else:
return base_type
return base_type

@staticmethod
def _to_int(value: Any) -> Optional[int]:
Expand Down Expand Up @@ -687,9 +686,9 @@ def _add_inline_comment_if_needed(line: str) -> str:
"""Add inline comment to server configuration fields if needed"""
if ' server:' in line and 'CONFIGURE_SERVER_HOSTNAME' in line:
return line + ' # ⚠️ UPDATE: e.g., prod.snowflake.acme.com'
elif ' type:' in line and 'DEFINE_SERVER_TYPE' in line:
if ' type:' in line and 'DEFINE_SERVER_TYPE' in line:
return line + ' # ⚠️ UPDATE: e.g., snowflake, postgres, bigquery, redshift'
elif ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line:
if ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line:
return line + ' # ⚠️ UPDATE: e.g., public, dbo, my_schema'
return line

Expand Down
8 changes: 6 additions & 2 deletions src/wxdi/odcs_generator/generate_odcs_from_informatica.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
}

class InformaticaClient:
"""Client for interacting with Informatica CDGC API."""

CONTENT_TYPE_JSON = "application/json"
HEADERS_JSON = {"Accept": CONTENT_TYPE_JSON}
Expand Down Expand Up @@ -169,6 +170,7 @@ def get_column_details(self, column_id: str) -> Dict[str, Any]:
return self._fetch_asset(column_id)

def parse_arguments():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description='Generate ODCS YAML file from Informatica asset',
formatter_class=argparse.RawDescriptionHelpFormatter,
Expand All @@ -183,6 +185,7 @@ def parse_arguments():
return parser.parse_args()

def validate_arguments(args):
"""Validate required command line arguments."""
if not args.cdgc_url:
print("Error: Informatica CDGC URL is required. Set INFORMATICA_CDGC_URL environment variable or use --cdgc-url")
print("Example: --cdgc-url https://cdgc.dm-us.informaticacloud.com")
Expand Down Expand Up @@ -361,9 +364,9 @@ def _add_inline_comment_if_needed(line: str) -> str:
"""Add inline comment to server configuration fields if needed"""
if ' server:' in line and 'CONFIGURE_SERVER_HOSTNAME' in line:
return line + ' # ⚠️ UPDATE: e.g., prod.snowflake.acme.com'
elif ' type:' in line and 'CONFIGURE_SERVER_TYPE' in line:
if ' type:' in line and 'CONFIGURE_SERVER_TYPE' in line:
return line + ' # ⚠️ UPDATE: e.g., snowflake, postgres, bigquery, redshift'
elif ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line:
if ' schema:' in line and 'CONFIGURE_SCHEMA_NAME' in line:
return line + ' # ⚠️ UPDATE: e.g., public, dbo, my_schema'
return line

Expand Down Expand Up @@ -418,6 +421,7 @@ def write_yaml_file(output_file: str, odcs_data: Dict[str, Any]) -> None:


def main():
"""Main entry point for the ODCS generator from Informatica."""
args = parse_arguments()
validate_arguments(args)

Expand Down