-
Notifications
You must be signed in to change notification settings - Fork 732
feat: tnc connectors implementation [CM-1010] #3894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| INSERT INTO "activityTypes" ("activityType", platform, "isCodeContribution", "isCollaboration", description, "label") VALUES | ||
| ('enrolled-certification', 'tnc', false, false, 'Successful payment purchase of certification enrollment', 'Enrolled in certification'), | ||
| ('enrolled-training', 'tnc', false, false, 'Successful payment purchase of training enrollment', 'Enrolled in training'), | ||
| ('issued-certification', 'tnc', false, false, 'User is granted a certification', 'Issued certification'), | ||
| ('attempted-course', 'tnc', false, false, 'Certification course is completed', 'Attempted course'), | ||
| ('attempted-exam', 'tnc', false, false, 'Certification exam is completed', 'Attempted exam'); | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,103 @@ | ||
| import { IS_PROD_ENV } from '@crowd/common' | ||
|
|
||
| // Main: analytics.silver_fact.certificates (certificate data) | ||
| // Joins: | ||
| // - analytics.silver_dim._crowd_dev_segments_union (segment resolution) | ||
| // - analytics.bronze_fivetran_salesforce.bronze_salesforce_merged_user (LFID) | ||
| // - analytics.silver_dim.users (LFID fallback) | ||
| // - analytics.bronze_fivetran_salesforce.accounts + analytics.bronze_fivetran_salesforce_b2b.accounts (org data) | ||
|
|
||
| const CDP_MATCHED_SEGMENTS = ` | ||
| cdp_matched_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| )` | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| const ORG_ACCOUNTS = ` | ||
| org_accounts AS ( | ||
| SELECT account_id, account_name, website, domain_aliases, LOGO_URL, INDUSTRY, N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce.accounts | ||
| WHERE website IS NOT NULL | ||
| UNION ALL | ||
| SELECT account_id, account_name, website, domain_aliases, NULL AS LOGO_URL, NULL AS INDUSTRY, NULL AS N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce_b2b.accounts | ||
| WHERE website IS NOT NULL | ||
| )` | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| const LFID_COALESCE = `COALESCE(mu.user_name, u.lf_username)` | ||
|
|
||
| export const buildSourceQuery = (sinceTimestamp?: string): string => { | ||
| let select = ` | ||
| SELECT | ||
| c.*, | ||
| cms.slug AS PROJECT_SLUG, | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| org.account_name AS ORGANIZATION_NAME, | ||
| org.website AS ORG_WEBSITE, | ||
| org.domain_aliases AS ORG_DOMAIN_ALIASES, | ||
| org.logo_url AS LOGO_URL, | ||
| org.industry AS ORGANIZATION_INDUSTRY, | ||
| CAST(org.n_employees AS VARCHAR) AS ORGANIZATION_SIZE, | ||
| ${LFID_COALESCE} AS LFID | ||
| FROM analytics.silver_fact.certificates c | ||
| INNER JOIN cdp_matched_segments cms | ||
| ON cms.sourceId = c.project_id | ||
| LEFT JOIN analytics.bronze_fivetran_salesforce.bronze_salesforce_merged_user mu | ||
| ON c.user_id = mu.user_id | ||
| AND mu.user_name IS NOT NULL | ||
| LEFT JOIN analytics.silver_dim.users u | ||
| ON LOWER(c.user_email) = LOWER(u.email) | ||
| AND u.lf_username IS NOT NULL | ||
| LEFT JOIN org_accounts org | ||
| ON c.account_id = org.account_id | ||
| WHERE c.user_email IS NOT NULL` | ||
|
|
||
| // Limit to a single project in non-prod to avoid exporting all projects data | ||
| if (!IS_PROD_ENV) { | ||
| select += ` AND cms.slug = 'cncf'` | ||
| } | ||
|
Comment on lines
+60
to
+62
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nitpick: Can we add a comment here just to explain that this is being added for staging/testing purposes? |
||
|
|
||
| const dedup = ` | ||
| QUALIFY ROW_NUMBER() OVER (PARTITION BY c.certificate_id ORDER BY org.website DESC) = 1` | ||
|
|
||
| if (!sinceTimestamp) { | ||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS} | ||
| ${select} | ||
| ${dedup}`.trim() | ||
| } | ||
|
|
||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS}, | ||
| new_cdp_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.CREATED_TS >= '${sinceTimestamp}' | ||
| AND s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| ) | ||
|
|
||
| -- New certificates since last export | ||
| ${select} | ||
| AND c.issued_ts >= '${sinceTimestamp}' | ||
| ${dedup} | ||
|
|
||
| UNION | ||
|
|
||
| -- All certificates in newly created segments | ||
| ${select} | ||
| AND EXISTS ( | ||
| SELECT 1 FROM new_cdp_segments ncs | ||
| WHERE ncs.slug = cms.slug AND ncs.sourceId = cms.sourceId | ||
| ) | ||
| ${dedup}`.trim() | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,105 @@ | ||||||||||||||||
| import { TNC_GRID, TncActivityType } from '@crowd/integrations' | ||||||||||||||||
| import { getServiceChildLogger } from '@crowd/logging' | ||||||||||||||||
| import { | ||||||||||||||||
| IActivityData, | ||||||||||||||||
| IMemberData, | ||||||||||||||||
| MemberAttributeName, | ||||||||||||||||
| MemberIdentityType, | ||||||||||||||||
| PlatformType, | ||||||||||||||||
| } from '@crowd/types' | ||||||||||||||||
|
|
||||||||||||||||
| import { TransformedActivity } from '../../../core/transformerBase' | ||||||||||||||||
| import { TncTransformerBase } from '../tncTransformerBase' | ||||||||||||||||
|
|
||||||||||||||||
| const log = getServiceChildLogger('tncCertificatesTransformer') | ||||||||||||||||
|
|
||||||||||||||||
| export class TncCertificatesTransformer extends TncTransformerBase { | ||||||||||||||||
| transformRow(row: Record<string, unknown>): TransformedActivity | null { | ||||||||||||||||
| const email = (row.USER_EMAIL as string | null)?.trim() || null | ||||||||||||||||
| if (!email) { | ||||||||||||||||
| log.debug({ certificateId: row.CERTIFICATE_ID }, 'Skipping row: missing email') | ||||||||||||||||
| return null | ||||||||||||||||
| } | ||||||||||||||||
|
|
||||||||||||||||
| const certificateId = (row.CERTIFICATE_ID as string)?.trim() | ||||||||||||||||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing sourceId validation in certificates and enrollments transformersMedium Severity The certificates transformer extracts Additional Locations (1) |
||||||||||||||||
| const learnerName = (row.LEARNER_NAME as string | null)?.trim() || null | ||||||||||||||||
| const lfUsername = (row.LFID as string | null)?.trim() || null | ||||||||||||||||
|
|
||||||||||||||||
| const identities: IMemberData['identities'] = [] | ||||||||||||||||
| const sourceId = (row.USER_ID as string | null) || undefined | ||||||||||||||||
|
|
||||||||||||||||
| if (lfUsername) { | ||||||||||||||||
| identities.push( | ||||||||||||||||
| { | ||||||||||||||||
| platform: PlatformType.TNC, | ||||||||||||||||
| value: email, | ||||||||||||||||
| type: MemberIdentityType.EMAIL, | ||||||||||||||||
| verified: true, | ||||||||||||||||
| verifiedBy: PlatformType.TNC, | ||||||||||||||||
| sourceId, | ||||||||||||||||
|
Comment on lines
+34
to
+39
|
||||||||||||||||
| }, | ||||||||||||||||
| { | ||||||||||||||||
| platform: PlatformType.TNC, | ||||||||||||||||
| value: lfUsername, | ||||||||||||||||
| type: MemberIdentityType.USERNAME, | ||||||||||||||||
| verified: true, | ||||||||||||||||
| verifiedBy: PlatformType.TNC, | ||||||||||||||||
| sourceId, | ||||||||||||||||
| }, | ||||||||||||||||
| { | ||||||||||||||||
|
Comment on lines
+42
to
+49
|
||||||||||||||||
| platform: PlatformType.TNC, | |
| value: lfUsername, | |
| type: MemberIdentityType.USERNAME, | |
| verified: true, | |
| sourceId, | |
| }, | |
| { |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,114 @@ | ||
| import { IS_PROD_ENV } from '@crowd/common' | ||
|
|
||
| // Main: analytics.bronze_census_ti.course_actions (course action data) | ||
| // Joins: | ||
| // - analytics.bronze_census_ti.users (user resolution via internal_ti_user_id) | ||
| // - analytics.bronze_census_ti.courses (course metadata) | ||
| // - analytics.silver_fact.enrollments (segment + org resolution via email + course_id) | ||
| // - analytics.silver_dim._crowd_dev_segments_union (segment resolution) | ||
| // - analytics.bronze_fivetran_salesforce.accounts + analytics.bronze_fivetran_salesforce_b2b.accounts (org data) | ||
|
|
||
| const CDP_MATCHED_SEGMENTS = ` | ||
| cdp_matched_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| )` | ||
|
|
||
| const ORG_ACCOUNTS = ` | ||
| org_accounts AS ( | ||
| SELECT account_id, account_name, website, domain_aliases, LOGO_URL, INDUSTRY, N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce.accounts | ||
| WHERE website IS NOT NULL | ||
| UNION ALL | ||
| SELECT account_id, account_name, website, domain_aliases, NULL AS LOGO_URL, NULL AS INDUSTRY, NULL AS N_EMPLOYEES | ||
| FROM analytics.bronze_fivetran_salesforce_b2b.accounts | ||
| WHERE website IS NOT NULL | ||
| )` | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SQL CTE constants duplicated across three filesLow Severity The Additional Locations (2) |
||
|
|
||
| export const buildSourceQuery = (sinceTimestamp?: string): string => { | ||
| let select = ` | ||
| SELECT | ||
| ca.*, | ||
| co.*, | ||
mbani01 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| tu.user_email, | ||
| tu.lfid, | ||
| tu.learner_name, | ||
| tu.user_country, | ||
| tu.job_title, | ||
| e.project_slug AS PROJECT_SLUG, | ||
| e.project_id AS PROJECT_ID, | ||
| e.account_id, | ||
| org.account_name AS ORGANIZATION_NAME, | ||
| org.website AS ORG_WEBSITE, | ||
| org.domain_aliases AS ORG_DOMAIN_ALIASES, | ||
| org.logo_url AS LOGO_URL, | ||
| org.industry AS ORGANIZATION_INDUSTRY, | ||
| CAST(org.n_employees AS VARCHAR) AS ORGANIZATION_SIZE | ||
| FROM analytics.bronze_census_ti.course_actions ca | ||
| INNER JOIN analytics.bronze_census_ti.users tu | ||
| ON ca.internal_ti_user_id = tu.internal_ti_user_id | ||
| INNER JOIN analytics.bronze_census_ti.courses co | ||
| ON ca.course_id = co.course_id | ||
| INNER JOIN analytics.silver_fact.enrollments e | ||
| ON e.course_id = ca.course_id | ||
| AND LOWER(e.user_email) = LOWER(tu.user_email) | ||
| INNER JOIN cdp_matched_segments cms | ||
| ON cms.slug = e.project_slug | ||
| AND cms.sourceId = e.project_id | ||
| LEFT JOIN org_accounts org | ||
| ON e.account_id = org.account_id | ||
| WHERE ca.type = 'status_change' | ||
| AND ca.source = 'course_started' | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SQL filter captures "started" events but descriptions say "completed"Medium Severity The courses SQL query filters Additional Locations (1) |
||
| AND co.is_test_or_archived = false | ||
| AND tu.user_email IS NOT NULL` | ||
|
|
||
| // Limit to a single project in non-prod to avoid exporting all projects data | ||
| if (!IS_PROD_ENV) { | ||
| select += ` AND e.project_slug = 'cncf'` | ||
| } | ||
|
|
||
| const dedup = ` | ||
| QUALIFY ROW_NUMBER() OVER (PARTITION BY ca.course_action_id ORDER BY org.website DESC) = 1` | ||
|
|
||
| if (!sinceTimestamp) { | ||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS} | ||
| ${select} | ||
| ${dedup}`.trim() | ||
| } | ||
|
|
||
| return ` | ||
| WITH ${ORG_ACCOUNTS}, | ||
| ${CDP_MATCHED_SEGMENTS}, | ||
| new_cdp_segments AS ( | ||
| SELECT DISTINCT | ||
| s.SOURCE_ID AS sourceId, | ||
| s.slug | ||
| FROM ANALYTICS.SILVER_DIM._CROWD_DEV_SEGMENTS_UNION s | ||
| WHERE s.CREATED_TS >= '${sinceTimestamp}' | ||
| AND s.PARENT_SLUG IS NOT NULL | ||
| AND s.GRANDPARENTS_SLUG IS NOT NULL | ||
| AND s.SOURCE_ID IS NOT NULL | ||
| ) | ||
|
|
||
| -- New course actions since last export | ||
| ${select} | ||
| AND ca.timestamp >= '${sinceTimestamp}' | ||
| ${dedup} | ||
|
|
||
| UNION | ||
|
|
||
| -- All course actions in newly created segments | ||
| ${select} | ||
| AND EXISTS ( | ||
| SELECT 1 FROM new_cdp_segments ncs | ||
| WHERE ncs.slug = cms.slug AND ncs.sourceId = cms.sourceId | ||
| ) | ||
| ${dedup}`.trim() | ||
| } | ||


Uh oh!
There was an error while loading. Please reload this page.