From 7287891289dbe6c71f80e63b0814bd6d934a6183 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 13:17:29 +0100 Subject: [PATCH 01/26] feat: add static api key middleware for dev stats Signed-off-by: Umberto Sgueglia --- backend/.env.dist.local | 5 ++++- backend/config/custom-environment-variables.json | 3 +++ backend/config/default.json | 3 ++- backend/src/api/public/index.ts | 2 +- backend/src/conf/configTypes.ts | 4 ++++ backend/src/conf/index.ts | 4 ++++ 6 files changed, 18 insertions(+), 3 deletions(-) diff --git a/backend/.env.dist.local b/backend/.env.dist.local index aa81f0e379..f2de5ba325 100755 --- a/backend/.env.dist.local +++ b/backend/.env.dist.local @@ -166,4 +166,7 @@ CROWD_TINYBIRD_BASE_URL=http://localhost:7181/ # Auth0 CROWD_AUTH0_ISSUER_BASE_URL= -CROWD_AUTH0_AUDIENCE= \ No newline at end of file +CROWD_AUTH0_AUDIENCE= + +# DevStats +CROWD_DEV_STATS_API_KEY=lfx_F3QnbGd3L@YjLIDrbs&nJmodiZ6LJDr3 \ No newline at end of file diff --git a/backend/config/custom-environment-variables.json b/backend/config/custom-environment-variables.json index c13a0e82a2..9e255b891b 100644 --- a/backend/config/custom-environment-variables.json +++ b/backend/config/custom-environment-variables.json @@ -189,6 +189,9 @@ "callbackUrl": "CROWD_GITLAB_CALLBACK_URL", "webhookToken": "CROWD_GITLAB_WEBHOOK_TOKEN" }, + "devStats": { + "apiKey": "CROWD_DEV_STATS_API_KEY" + }, "snowflake": { "privateKey": "CROWD_SNOWFLAKE_PRIVATE_KEY", "account": "CROWD_SNOWFLAKE_ACCOUNT", diff --git a/backend/config/default.json b/backend/config/default.json index e60af044e2..1154de5f41 100644 --- a/backend/config/default.json +++ b/backend/config/default.json @@ -50,5 +50,6 @@ "jiraIssueReporter": {}, "snowflake": {}, "nango": {}, - "linuxFoundation": {} + "linuxFoundation": {}, + "devStats": {} } diff --git a/backend/src/api/public/index.ts b/backend/src/api/public/index.ts index 10b4b94abf..8894815d14 100644 --- a/backend/src/api/public/index.ts +++ b/backend/src/api/public/index.ts @@ -1,6 +1,6 @@ import { Router } from 'express' -import { AUTH0_CONFIG } from '../../conf' +import { AUTH0_CONFIG, DEV_STATS_CONFIG } from '../../conf' import { errorHandler } from './middlewares/errorHandler' import { oauth2Middleware } from './middlewares/oauth2Middleware' diff --git a/backend/src/conf/configTypes.ts b/backend/src/conf/configTypes.ts index ad764fa29e..290d9c0d38 100644 --- a/backend/src/conf/configTypes.ts +++ b/backend/src/conf/configTypes.ts @@ -206,3 +206,7 @@ export interface SnowflakeConfiguration { export interface LinuxFoundationConfiguration { collectionId: string } + +export interface DevStatsConfiguration { + apiKey: string +} diff --git a/backend/src/conf/index.ts b/backend/src/conf/index.ts index dfebb3c4ea..3441223e18 100644 --- a/backend/src/conf/index.ts +++ b/backend/src/conf/index.ts @@ -26,6 +26,7 @@ import { IOpenStatusApiConfig, IRedditConfig, IntegrationProcessingConfiguration, + DevStatsConfiguration, LinuxFoundationConfiguration, NangoConfiguration, OrganizationEnrichmentConfiguration, @@ -157,3 +158,6 @@ export const LINUX_FOUNDATION_CONFIG: LinuxFoundationConfiguration = export const ENABLE_LF_COLLECTION_MANAGEMENT: boolean = process.env.ENABLE_LF_COLLECTION_MANAGEMENT === 'true' + +export const DEV_STATS_CONFIG: DevStatsConfiguration = + config.get('devStats') From 98f9dc69b0262637400d3953dc3944d5f206f6a9 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 13:22:47 +0100 Subject: [PATCH 02/26] fix: lint Signed-off-by: Umberto Sgueglia --- backend/src/conf/index.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/src/conf/index.ts b/backend/src/conf/index.ts index 3441223e18..da89af3d08 100644 --- a/backend/src/conf/index.ts +++ b/backend/src/conf/index.ts @@ -14,6 +14,7 @@ import { ComprehendConfiguration, CrowdAnalyticsConfiguration, DbConfiguration, + DevStatsConfiguration, DiscordConfiguration, EagleEyeConfiguration, EncryptionConfiguration, @@ -26,7 +27,6 @@ import { IOpenStatusApiConfig, IRedditConfig, IntegrationProcessingConfiguration, - DevStatsConfiguration, LinuxFoundationConfiguration, NangoConfiguration, OrganizationEnrichmentConfiguration, @@ -159,5 +159,4 @@ export const LINUX_FOUNDATION_CONFIG: LinuxFoundationConfiguration = export const ENABLE_LF_COLLECTION_MANAGEMENT: boolean = process.env.ENABLE_LF_COLLECTION_MANAGEMENT === 'true' -export const DEV_STATS_CONFIG: DevStatsConfiguration = - config.get('devStats') +export const DEV_STATS_CONFIG: DevStatsConfiguration = config.get('devStats') From 35b907de46d3faf3646bb1164a95850a19b3b55a Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 13:25:28 +0100 Subject: [PATCH 03/26] fix: remove local secret Signed-off-by: Umberto Sgueglia --- backend/.env.dist.local | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/.env.dist.local b/backend/.env.dist.local index f2de5ba325..eafd9acc5a 100755 --- a/backend/.env.dist.local +++ b/backend/.env.dist.local @@ -169,4 +169,4 @@ CROWD_AUTH0_ISSUER_BASE_URL= CROWD_AUTH0_AUDIENCE= # DevStats -CROWD_DEV_STATS_API_KEY=lfx_F3QnbGd3L@YjLIDrbs&nJmodiZ6LJDr3 \ No newline at end of file +CROWD_DEV_STATS_API_KEY= \ No newline at end of file From 817d643d20046e90d0278c55f3bd24d91e210cb1 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 18:25:55 +0100 Subject: [PATCH 04/26] fix: use db oriented api keys Signed-off-by: Umberto Sgueglia --- backend/src/api/public/index.ts | 2 +- backend/src/api/public/middlewares/staticApiKeyMiddleware.ts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/src/api/public/index.ts b/backend/src/api/public/index.ts index 8894815d14..10b4b94abf 100644 --- a/backend/src/api/public/index.ts +++ b/backend/src/api/public/index.ts @@ -1,6 +1,6 @@ import { Router } from 'express' -import { AUTH0_CONFIG, DEV_STATS_CONFIG } from '../../conf' +import { AUTH0_CONFIG } from '../../conf' import { errorHandler } from './middlewares/errorHandler' import { oauth2Middleware } from './middlewares/oauth2Middleware' diff --git a/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts b/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts index 76d928f8a8..76945608c1 100644 --- a/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts +++ b/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts @@ -1,6 +1,5 @@ import crypto from 'crypto' import type { NextFunction, Request, RequestHandler, Response } from 'express' - import { UnauthorizedError } from '@crowd/common' import { findApiKeyByHash, optionsQx, touchApiKeyLastUsed } from '@crowd/data-access-layer' From ffbdfee71c1de621048955a1694b914bb8f437b9 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 18:36:42 +0100 Subject: [PATCH 05/26] fix: remove useless env var Signed-off-by: Umberto Sgueglia --- backend/.env.dist.local | 3 --- backend/config/custom-environment-variables.json | 3 --- backend/config/default.json | 3 +-- backend/src/conf/configTypes.ts | 3 --- backend/src/conf/index.ts | 3 --- 5 files changed, 1 insertion(+), 14 deletions(-) diff --git a/backend/.env.dist.local b/backend/.env.dist.local index eafd9acc5a..76ce6452c8 100755 --- a/backend/.env.dist.local +++ b/backend/.env.dist.local @@ -167,6 +167,3 @@ CROWD_TINYBIRD_BASE_URL=http://localhost:7181/ # Auth0 CROWD_AUTH0_ISSUER_BASE_URL= CROWD_AUTH0_AUDIENCE= - -# DevStats -CROWD_DEV_STATS_API_KEY= \ No newline at end of file diff --git a/backend/config/custom-environment-variables.json b/backend/config/custom-environment-variables.json index 9e255b891b..c13a0e82a2 100644 --- a/backend/config/custom-environment-variables.json +++ b/backend/config/custom-environment-variables.json @@ -189,9 +189,6 @@ "callbackUrl": "CROWD_GITLAB_CALLBACK_URL", "webhookToken": "CROWD_GITLAB_WEBHOOK_TOKEN" }, - "devStats": { - "apiKey": "CROWD_DEV_STATS_API_KEY" - }, "snowflake": { "privateKey": "CROWD_SNOWFLAKE_PRIVATE_KEY", "account": "CROWD_SNOWFLAKE_ACCOUNT", diff --git a/backend/config/default.json b/backend/config/default.json index 1154de5f41..e60af044e2 100644 --- a/backend/config/default.json +++ b/backend/config/default.json @@ -50,6 +50,5 @@ "jiraIssueReporter": {}, "snowflake": {}, "nango": {}, - "linuxFoundation": {}, - "devStats": {} + "linuxFoundation": {} } diff --git a/backend/src/conf/configTypes.ts b/backend/src/conf/configTypes.ts index 290d9c0d38..57698ed63e 100644 --- a/backend/src/conf/configTypes.ts +++ b/backend/src/conf/configTypes.ts @@ -207,6 +207,3 @@ export interface LinuxFoundationConfiguration { collectionId: string } -export interface DevStatsConfiguration { - apiKey: string -} diff --git a/backend/src/conf/index.ts b/backend/src/conf/index.ts index da89af3d08..dfebb3c4ea 100644 --- a/backend/src/conf/index.ts +++ b/backend/src/conf/index.ts @@ -14,7 +14,6 @@ import { ComprehendConfiguration, CrowdAnalyticsConfiguration, DbConfiguration, - DevStatsConfiguration, DiscordConfiguration, EagleEyeConfiguration, EncryptionConfiguration, @@ -158,5 +157,3 @@ export const LINUX_FOUNDATION_CONFIG: LinuxFoundationConfiguration = export const ENABLE_LF_COLLECTION_MANAGEMENT: boolean = process.env.ENABLE_LF_COLLECTION_MANAGEMENT === 'true' - -export const DEV_STATS_CONFIG: DevStatsConfiguration = config.get('devStats') From 03a1c3b9db70439bf2a8bcab094b097eced573fb Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 18:39:17 +0100 Subject: [PATCH 06/26] fix: lint Signed-off-by: Umberto Sgueglia --- backend/src/conf/configTypes.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/src/conf/configTypes.ts b/backend/src/conf/configTypes.ts index 57698ed63e..ad764fa29e 100644 --- a/backend/src/conf/configTypes.ts +++ b/backend/src/conf/configTypes.ts @@ -206,4 +206,3 @@ export interface SnowflakeConfiguration { export interface LinuxFoundationConfiguration { collectionId: string } - From 33758b67e96afd88f15cb7d5b03526ac6c101c9d Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Fri, 20 Mar 2026 09:22:46 +0100 Subject: [PATCH 07/26] fix: review Signed-off-by: Umberto Sgueglia --- backend/.env.dist.local | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/.env.dist.local b/backend/.env.dist.local index 76ce6452c8..aa81f0e379 100755 --- a/backend/.env.dist.local +++ b/backend/.env.dist.local @@ -166,4 +166,4 @@ CROWD_TINYBIRD_BASE_URL=http://localhost:7181/ # Auth0 CROWD_AUTH0_ISSUER_BASE_URL= -CROWD_AUTH0_AUDIENCE= +CROWD_AUTH0_AUDIENCE= \ No newline at end of file From e999e054726a02565e16a4b6072fdcbbb0470aad Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 16:31:54 +0100 Subject: [PATCH 08/26] feat: add query layer Signed-off-by: Umberto Sgueglia --- .../public/v1/dev-stats/getAffiliations.ts | 31 ++++ .../src/api/public/v1/dev-stats/queries.ts | 174 ++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 backend/src/api/public/v1/dev-stats/getAffiliations.ts create mode 100644 backend/src/api/public/v1/dev-stats/queries.ts diff --git a/backend/src/api/public/v1/dev-stats/getAffiliations.ts b/backend/src/api/public/v1/dev-stats/getAffiliations.ts new file mode 100644 index 0000000000..6d1988fea9 --- /dev/null +++ b/backend/src/api/public/v1/dev-stats/getAffiliations.ts @@ -0,0 +1,31 @@ +import type { Request, Response } from 'express' +import { z } from 'zod' + +import { optionsQx } from '@crowd/data-access-layer' + +import { ok } from '@/utils/api' +import { validateOrThrow } from '@/utils/validation' + +import { findAffiliationsByGithubHandles } from './queries' + +const MAX_HANDLES = 1000 + +const bodySchema = z.object({ + githubHandles: z + .array(z.string().min(1)) + .min(1) + .max(MAX_HANDLES, `Maximum ${MAX_HANDLES} handles per request`), +}) + +export async function getAffiliations(req: Request, res: Response): Promise { + const { githubHandles } = validateOrThrow(bodySchema, req.body) + const qx = optionsQx(req) + + const { contributors, notFound } = await findAffiliationsByGithubHandles(qx, githubHandles) + + ok(res, { + total_found: contributors.length, + contributors, + notFound, + }) +} diff --git a/backend/src/api/public/v1/dev-stats/queries.ts b/backend/src/api/public/v1/dev-stats/queries.ts new file mode 100644 index 0000000000..8e8b45ab21 --- /dev/null +++ b/backend/src/api/public/v1/dev-stats/queries.ts @@ -0,0 +1,174 @@ +import { fetchManyMemberOrgsWithOrgData } from '@crowd/data-access-layer' +import { getServiceChildLogger } from '@crowd/logging' +import { MemberIdentityType, PlatformType } from '@crowd/types' +import type { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor' + +const log = getServiceChildLogger('dev-stats-queries') + +export interface MemberRow { + githubHandle: string + memberId: string + displayName: string | null +} + +export interface AffiliationResult { + githubHandle: string + name: string | null + emails: string[] + affiliations: { + organization: string + startDate: string | null + endDate: string | null + }[] +} + +export interface DevStatsQueryResult { + contributors: AffiliationResult[] + notFound: string[] +} + +export async function findAffiliationsByGithubHandles( + qx: QueryExecutor, + githubHandles: string[], +): Promise { + const t0 = performance.now() + + // Step 1: find members by github handles + const lowercasedHandles = githubHandles.map((h) => h.toLowerCase()) + + log.info( + { + query: ` + SELECT mi.value AS "githubHandle", mi."memberId", m."displayName" + FROM "memberIdentities" mi + JOIN members m ON m.id = mi."memberId" + WHERE mi.platform = '${PlatformType.GITHUB}' + AND mi.type = '${MemberIdentityType.USERNAME}' + AND lower(mi.value) IN (${lowercasedHandles.map((h) => `'${h}'`).join(', ')}) + AND mi."deletedAt" IS NULL + AND m."deletedAt" IS NULL + `, + }, + 'Step 1 query', + ) + + const memberRows: MemberRow[] = await qx.select( + ` + SELECT + mi.value AS "githubHandle", + mi."memberId", + m."displayName" + FROM "memberIdentities" mi + JOIN members m ON m.id = mi."memberId" + WHERE mi.platform = $(platform) + AND mi.type = $(type) + AND lower(mi.value) IN ($(lowercasedHandles:csv)) + AND mi."deletedAt" IS NULL + AND m."deletedAt" IS NULL + `, + { + platform: PlatformType.GITHUB, + type: MemberIdentityType.USERNAME, + lowercasedHandles, + }, + ) + + const t1 = performance.now() + log.info({ handles: githubHandles.length, found: memberRows.length, ms: Math.round(t1 - t0) }, 'Step 1: members lookup') + + const foundHandles = new Set(memberRows.map((r) => r.githubHandle)) + const notFound = githubHandles.filter((h) => !foundHandles.has(h)) + + if (memberRows.length === 0) { + return { contributors: [], notFound } + } + + const memberIds = memberRows.map((r) => r.memberId) + + // Step 2: fetch verified emails for found members + log.info( + { + query: ` + SELECT "memberId", value AS email + FROM "memberIdentities" + WHERE "memberId" IN (${memberIds.map((id) => `'${id}'`).join(', ')}) + AND type = '${MemberIdentityType.EMAIL}' + AND verified = true + AND "deletedAt" IS NULL + `, + }, + 'Step 2 query', + ) + + const emailRows: { memberId: string; email: string }[] = await qx.select( + ` + SELECT "memberId", value AS email + FROM "memberIdentities" + WHERE "memberId" IN ($(memberIds:csv)) + AND type = $(type) + AND verified = true + AND "deletedAt" IS NULL + `, + { + memberIds, + type: MemberIdentityType.EMAIL, + }, + ) + + const emailsByMember = new Map() + for (const row of emailRows) { + const list = emailsByMember.get(row.memberId) ?? [] + list.push(row.email) + emailsByMember.set(row.memberId, list) + } + + const t2 = performance.now() + log.info({ members: memberIds.length, emails: emailRows.length, ms: Math.round(t2 - t1) }, 'Step 2: emails lookup') + + // Step 3: fetch work experiences for found members + log.info( + { + query: ` + SELECT mo.*, o."displayName" as "organizationName", o.logo as "organizationLogo" + FROM "memberOrganizations" mo + JOIN organizations o ON mo."organizationId" = o.id + WHERE mo."memberId" IN (${memberIds.map((id) => `'${id}'`).join(', ')}) + AND mo."deletedAt" IS NULL + `, + }, + 'Step 3 query', + ) + + const orgsMap = await fetchManyMemberOrgsWithOrgData(qx, memberIds) + + const t3 = performance.now() + log.info({ members: memberIds.length, ms: Math.round(t3 - t2) }, 'Step 3: work experiences lookup') + + // Step 4: build response + const contributors: AffiliationResult[] = memberRows.map((member) => { + const workExperiences = orgsMap.get(member.memberId) ?? [] + + const affiliations = workExperiences + .sort((a, b) => { + if (!a.dateStart) return 1 + if (!b.dateStart) return -1 + return new Date(b.dateStart).getTime() - new Date(a.dateStart).getTime() + }) + .map((we) => ({ + organization: we.organizationName, + startDate: we.dateStart ? new Date(we.dateStart).toISOString() : null, + endDate: we.dateEnd ? new Date(we.dateEnd).toISOString() : null, + })) + + return { + githubHandle: member.githubHandle, + name: member.displayName, + emails: emailsByMember.get(member.memberId) ?? [], + affiliations, + } + }) + + log.info({ handles: githubHandles.length, found: contributors.length, notFound: notFound.length, totalMs: Math.round(t3 - t0) }, 'dev-stats affiliations query complete') + + return { contributors, notFound } +} From fb7991a33e042173d04c0b80e288cd389ca5b124 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 17:24:21 +0100 Subject: [PATCH 09/26] feat: add filtering on query layer Signed-off-by: Umberto Sgueglia --- .../src/api/public/v1/dev-stats/queries.ts | 95 +++++++++++++++---- 1 file changed, 76 insertions(+), 19 deletions(-) diff --git a/backend/src/api/public/v1/dev-stats/queries.ts b/backend/src/api/public/v1/dev-stats/queries.ts index 8e8b45ab21..1f36d02523 100644 --- a/backend/src/api/public/v1/dev-stats/queries.ts +++ b/backend/src/api/public/v1/dev-stats/queries.ts @@ -1,15 +1,10 @@ -import { fetchManyMemberOrgsWithOrgData } from '@crowd/data-access-layer' import { getServiceChildLogger } from '@crowd/logging' import { MemberIdentityType, PlatformType } from '@crowd/types' import type { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor' const log = getServiceChildLogger('dev-stats-queries') -export interface MemberRow { - githubHandle: string - memberId: string - displayName: string | null -} +const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] export interface AffiliationResult { githubHandle: string @@ -27,15 +22,29 @@ export interface DevStatsQueryResult { notFound: string[] } +interface MemberRow { + githubHandle: string + memberId: string + displayName: string | null +} + +interface WorkExperienceRow { + memberId: string + organizationName: string + title: string | null + dateStart: string | null + dateEnd: string | null +} + export async function findAffiliationsByGithubHandles( qx: QueryExecutor, githubHandles: string[], ): Promise { const t0 = performance.now() - // Step 1: find members by github handles const lowercasedHandles = githubHandles.map((h) => h.toLowerCase()) + // Step 1: find verified members by github handles log.info( { query: ` @@ -44,6 +53,7 @@ export async function findAffiliationsByGithubHandles( JOIN members m ON m.id = mi."memberId" WHERE mi.platform = '${PlatformType.GITHUB}' AND mi.type = '${MemberIdentityType.USERNAME}' + AND mi.verified = true AND lower(mi.value) IN (${lowercasedHandles.map((h) => `'${h}'`).join(', ')}) AND mi."deletedAt" IS NULL AND m."deletedAt" IS NULL @@ -62,6 +72,7 @@ export async function findAffiliationsByGithubHandles( JOIN members m ON m.id = mi."memberId" WHERE mi.platform = $(platform) AND mi.type = $(type) + AND mi.verified = true AND lower(mi.value) IN ($(lowercasedHandles:csv)) AND mi."deletedAt" IS NULL AND m."deletedAt" IS NULL @@ -74,10 +85,13 @@ export async function findAffiliationsByGithubHandles( ) const t1 = performance.now() - log.info({ handles: githubHandles.length, found: memberRows.length, ms: Math.round(t1 - t0) }, 'Step 1: members lookup') + log.info( + { handles: githubHandles.length, found: memberRows.length, ms: Math.round(t1 - t0) }, + 'Step 1: members lookup', + ) - const foundHandles = new Set(memberRows.map((r) => r.githubHandle)) - const notFound = githubHandles.filter((h) => !foundHandles.has(h)) + const foundHandles = new Set(memberRows.map((r) => r.githubHandle.toLowerCase())) + const notFound = githubHandles.filter((h) => !foundHandles.has(h.toLowerCase())) if (memberRows.length === 0) { return { contributors: [], notFound } @@ -105,13 +119,13 @@ export async function findAffiliationsByGithubHandles( SELECT "memberId", value AS email FROM "memberIdentities" WHERE "memberId" IN ($(memberIds:csv)) - AND type = $(type) + AND type = $(emailType) AND verified = true AND "deletedAt" IS NULL `, { memberIds, - type: MemberIdentityType.EMAIL, + emailType: MemberIdentityType.EMAIL, }, ) @@ -123,30 +137,65 @@ export async function findAffiliationsByGithubHandles( } const t2 = performance.now() - log.info({ members: memberIds.length, emails: emailRows.length, ms: Math.round(t2 - t1) }, 'Step 2: emails lookup') + log.info( + { members: memberIds.length, emails: emailRows.length, ms: Math.round(t2 - t1) }, + 'Step 2: emails lookup', + ) - // Step 3: fetch work experiences for found members + // Step 3: fetch work experiences — excluding blocked affiliations log.info( { query: ` - SELECT mo.*, o."displayName" as "organizationName", o.logo as "organizationLogo" + SELECT mo."memberId", o."displayName" AS "organizationName", mo.title, mo."dateStart", mo."dateEnd" FROM "memberOrganizations" mo JOIN organizations o ON mo."organizationId" = o.id + LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id WHERE mo."memberId" IN (${memberIds.map((id) => `'${id}'`).join(', ')}) AND mo."deletedAt" IS NULL + AND COALESCE(ovr."allowAffiliation", true) = true `, }, 'Step 3 query', ) - const orgsMap = await fetchManyMemberOrgsWithOrgData(qx, memberIds) + const workExperienceRows: WorkExperienceRow[] = await qx.select( + ` + SELECT + mo."memberId", + o."displayName" AS "organizationName", + mo.title, + mo."dateStart", + mo."dateEnd" + FROM "memberOrganizations" mo + JOIN organizations o ON mo."organizationId" = o.id + LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id + WHERE mo."memberId" IN ($(memberIds:csv)) + AND mo."deletedAt" IS NULL + AND COALESCE(ovr."allowAffiliation", true) = true + `, + { memberIds }, + ) const t3 = performance.now() - log.info({ members: memberIds.length, ms: Math.round(t3 - t2) }, 'Step 3: work experiences lookup') + log.info( + { members: memberIds.length, ms: Math.round(t3 - t2) }, + 'Step 3: work experiences lookup', + ) + + // group work experiences by memberId, filtering blacklisted titles + const workExpByMember = new Map() + for (const row of workExperienceRows) { + if (row.title && BLACKLISTED_TITLES.some((t) => row.title.toLowerCase().includes(t))) { + continue + } + const list = workExpByMember.get(row.memberId) ?? [] + list.push(row) + workExpByMember.set(row.memberId, list) + } // Step 4: build response const contributors: AffiliationResult[] = memberRows.map((member) => { - const workExperiences = orgsMap.get(member.memberId) ?? [] + const workExperiences = workExpByMember.get(member.memberId) ?? [] const affiliations = workExperiences .sort((a, b) => { @@ -168,7 +217,15 @@ export async function findAffiliationsByGithubHandles( } }) - log.info({ handles: githubHandles.length, found: contributors.length, notFound: notFound.length, totalMs: Math.round(t3 - t0) }, 'dev-stats affiliations query complete') + log.info( + { + handles: githubHandles.length, + found: contributors.length, + notFound: notFound.length, + totalMs: Math.round(t3 - t0), + }, + 'dev-stats affiliations query complete', + ) return { contributors, notFound } } From f8a424268abb2553912ee20a56552ab7c24d6fca Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Thu, 19 Mar 2026 17:35:16 +0100 Subject: [PATCH 10/26] feat: refactor in dal Signed-off-by: Umberto Sgueglia --- .../public/v1/dev-stats/getAffiliations.ts | 103 +++++++- .../src/api/public/v1/dev-stats/queries.ts | 231 ------------------ .../data-access-layer/src/devStats/index.ts | 94 +++++++ services/libs/data-access-layer/src/index.ts | 1 + 4 files changed, 191 insertions(+), 238 deletions(-) delete mode 100644 backend/src/api/public/v1/dev-stats/queries.ts create mode 100644 services/libs/data-access-layer/src/devStats/index.ts diff --git a/backend/src/api/public/v1/dev-stats/getAffiliations.ts b/backend/src/api/public/v1/dev-stats/getAffiliations.ts index 6d1988fea9..2420e90f84 100644 --- a/backend/src/api/public/v1/dev-stats/getAffiliations.ts +++ b/backend/src/api/public/v1/dev-stats/getAffiliations.ts @@ -1,12 +1,18 @@ import type { Request, Response } from 'express' import { z } from 'zod' -import { optionsQx } from '@crowd/data-access-layer' +import { + findMembersByGithubHandles, + findVerifiedEmailsByMemberIds, + findWorkExperiencesByMemberIds, + optionsQx, +} from '@crowd/data-access-layer' +import { getServiceChildLogger } from '@crowd/logging' import { ok } from '@/utils/api' import { validateOrThrow } from '@/utils/validation' -import { findAffiliationsByGithubHandles } from './queries' +const log = getServiceChildLogger('dev-stats') const MAX_HANDLES = 1000 @@ -21,11 +27,94 @@ export async function getAffiliations(req: Request, res: Response): Promise h.toLowerCase()) + + // Step 1: find verified members by github handles + const memberRows = await findMembersByGithubHandles(qx, lowercasedHandles) + + const t1 = performance.now() + log.info( + { handles: githubHandles.length, found: memberRows.length, ms: Math.round(t1 - t0) }, + 'Step 1: members lookup', + ) + + const foundHandles = new Set(memberRows.map((r) => r.githubHandle.toLowerCase())) + const notFound = githubHandles.filter((h) => !foundHandles.has(h.toLowerCase())) + + if (memberRows.length === 0) { + ok(res, { total_found: 0, contributors: [], notFound }) + return + } + + const memberIds = memberRows.map((r) => r.memberId) + + // Step 2: fetch verified emails + const emailRows = await findVerifiedEmailsByMemberIds(qx, memberIds) + + const t2 = performance.now() + log.info( + { members: memberIds.length, emails: emailRows.length, ms: Math.round(t2 - t1) }, + 'Step 2: emails lookup', + ) + + const emailsByMember = new Map() + for (const row of emailRows) { + const list = emailsByMember.get(row.memberId) ?? [] + list.push(row.email) + emailsByMember.set(row.memberId, list) + } + + // Step 3: fetch work experiences + const workExperienceRows = await findWorkExperiencesByMemberIds(qx, memberIds) + + const t3 = performance.now() + log.info( + { members: memberIds.length, rows: workExperienceRows.length, ms: Math.round(t3 - t2) }, + 'Step 3: work experiences lookup', + ) + + const workExpByMember = new Map() + for (const row of workExperienceRows) { + const list = workExpByMember.get(row.memberId) ?? [] + list.push(row) + workExpByMember.set(row.memberId, list) + } + + // Step 4: build response + const contributors = memberRows.map((member) => { + const workExperiences = workExpByMember.get(member.memberId) ?? [] + + const affiliations = workExperiences + .sort((a, b) => { + if (!a.dateStart) return 1 + if (!b.dateStart) return -1 + return new Date(b.dateStart).getTime() - new Date(a.dateStart).getTime() + }) + .map((we) => ({ + organization: we.organizationName, + startDate: we.dateStart ? new Date(we.dateStart).toISOString() : null, + endDate: we.dateEnd ? new Date(we.dateEnd).toISOString() : null, + })) + + return { + githubHandle: member.githubHandle, + name: member.displayName, + emails: emailsByMember.get(member.memberId) ?? [], + affiliations, + } }) + + log.info( + { + handles: githubHandles.length, + found: contributors.length, + notFound: notFound.length, + totalMs: Math.round(t3 - t0), + }, + 'dev-stats affiliations complete', + ) + + ok(res, { total_found: contributors.length, contributors, notFound }) } diff --git a/backend/src/api/public/v1/dev-stats/queries.ts b/backend/src/api/public/v1/dev-stats/queries.ts deleted file mode 100644 index 1f36d02523..0000000000 --- a/backend/src/api/public/v1/dev-stats/queries.ts +++ /dev/null @@ -1,231 +0,0 @@ -import { getServiceChildLogger } from '@crowd/logging' -import { MemberIdentityType, PlatformType } from '@crowd/types' -import type { QueryExecutor } from '@crowd/data-access-layer/src/queryExecutor' - -const log = getServiceChildLogger('dev-stats-queries') - -const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] - -export interface AffiliationResult { - githubHandle: string - name: string | null - emails: string[] - affiliations: { - organization: string - startDate: string | null - endDate: string | null - }[] -} - -export interface DevStatsQueryResult { - contributors: AffiliationResult[] - notFound: string[] -} - -interface MemberRow { - githubHandle: string - memberId: string - displayName: string | null -} - -interface WorkExperienceRow { - memberId: string - organizationName: string - title: string | null - dateStart: string | null - dateEnd: string | null -} - -export async function findAffiliationsByGithubHandles( - qx: QueryExecutor, - githubHandles: string[], -): Promise { - const t0 = performance.now() - - const lowercasedHandles = githubHandles.map((h) => h.toLowerCase()) - - // Step 1: find verified members by github handles - log.info( - { - query: ` - SELECT mi.value AS "githubHandle", mi."memberId", m."displayName" - FROM "memberIdentities" mi - JOIN members m ON m.id = mi."memberId" - WHERE mi.platform = '${PlatformType.GITHUB}' - AND mi.type = '${MemberIdentityType.USERNAME}' - AND mi.verified = true - AND lower(mi.value) IN (${lowercasedHandles.map((h) => `'${h}'`).join(', ')}) - AND mi."deletedAt" IS NULL - AND m."deletedAt" IS NULL - `, - }, - 'Step 1 query', - ) - - const memberRows: MemberRow[] = await qx.select( - ` - SELECT - mi.value AS "githubHandle", - mi."memberId", - m."displayName" - FROM "memberIdentities" mi - JOIN members m ON m.id = mi."memberId" - WHERE mi.platform = $(platform) - AND mi.type = $(type) - AND mi.verified = true - AND lower(mi.value) IN ($(lowercasedHandles:csv)) - AND mi."deletedAt" IS NULL - AND m."deletedAt" IS NULL - `, - { - platform: PlatformType.GITHUB, - type: MemberIdentityType.USERNAME, - lowercasedHandles, - }, - ) - - const t1 = performance.now() - log.info( - { handles: githubHandles.length, found: memberRows.length, ms: Math.round(t1 - t0) }, - 'Step 1: members lookup', - ) - - const foundHandles = new Set(memberRows.map((r) => r.githubHandle.toLowerCase())) - const notFound = githubHandles.filter((h) => !foundHandles.has(h.toLowerCase())) - - if (memberRows.length === 0) { - return { contributors: [], notFound } - } - - const memberIds = memberRows.map((r) => r.memberId) - - // Step 2: fetch verified emails for found members - log.info( - { - query: ` - SELECT "memberId", value AS email - FROM "memberIdentities" - WHERE "memberId" IN (${memberIds.map((id) => `'${id}'`).join(', ')}) - AND type = '${MemberIdentityType.EMAIL}' - AND verified = true - AND "deletedAt" IS NULL - `, - }, - 'Step 2 query', - ) - - const emailRows: { memberId: string; email: string }[] = await qx.select( - ` - SELECT "memberId", value AS email - FROM "memberIdentities" - WHERE "memberId" IN ($(memberIds:csv)) - AND type = $(emailType) - AND verified = true - AND "deletedAt" IS NULL - `, - { - memberIds, - emailType: MemberIdentityType.EMAIL, - }, - ) - - const emailsByMember = new Map() - for (const row of emailRows) { - const list = emailsByMember.get(row.memberId) ?? [] - list.push(row.email) - emailsByMember.set(row.memberId, list) - } - - const t2 = performance.now() - log.info( - { members: memberIds.length, emails: emailRows.length, ms: Math.round(t2 - t1) }, - 'Step 2: emails lookup', - ) - - // Step 3: fetch work experiences — excluding blocked affiliations - log.info( - { - query: ` - SELECT mo."memberId", o."displayName" AS "organizationName", mo.title, mo."dateStart", mo."dateEnd" - FROM "memberOrganizations" mo - JOIN organizations o ON mo."organizationId" = o.id - LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id - WHERE mo."memberId" IN (${memberIds.map((id) => `'${id}'`).join(', ')}) - AND mo."deletedAt" IS NULL - AND COALESCE(ovr."allowAffiliation", true) = true - `, - }, - 'Step 3 query', - ) - - const workExperienceRows: WorkExperienceRow[] = await qx.select( - ` - SELECT - mo."memberId", - o."displayName" AS "organizationName", - mo.title, - mo."dateStart", - mo."dateEnd" - FROM "memberOrganizations" mo - JOIN organizations o ON mo."organizationId" = o.id - LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id - WHERE mo."memberId" IN ($(memberIds:csv)) - AND mo."deletedAt" IS NULL - AND COALESCE(ovr."allowAffiliation", true) = true - `, - { memberIds }, - ) - - const t3 = performance.now() - log.info( - { members: memberIds.length, ms: Math.round(t3 - t2) }, - 'Step 3: work experiences lookup', - ) - - // group work experiences by memberId, filtering blacklisted titles - const workExpByMember = new Map() - for (const row of workExperienceRows) { - if (row.title && BLACKLISTED_TITLES.some((t) => row.title.toLowerCase().includes(t))) { - continue - } - const list = workExpByMember.get(row.memberId) ?? [] - list.push(row) - workExpByMember.set(row.memberId, list) - } - - // Step 4: build response - const contributors: AffiliationResult[] = memberRows.map((member) => { - const workExperiences = workExpByMember.get(member.memberId) ?? [] - - const affiliations = workExperiences - .sort((a, b) => { - if (!a.dateStart) return 1 - if (!b.dateStart) return -1 - return new Date(b.dateStart).getTime() - new Date(a.dateStart).getTime() - }) - .map((we) => ({ - organization: we.organizationName, - startDate: we.dateStart ? new Date(we.dateStart).toISOString() : null, - endDate: we.dateEnd ? new Date(we.dateEnd).toISOString() : null, - })) - - return { - githubHandle: member.githubHandle, - name: member.displayName, - emails: emailsByMember.get(member.memberId) ?? [], - affiliations, - } - }) - - log.info( - { - handles: githubHandles.length, - found: contributors.length, - notFound: notFound.length, - totalMs: Math.round(t3 - t0), - }, - 'dev-stats affiliations query complete', - ) - - return { contributors, notFound } -} diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts new file mode 100644 index 0000000000..d573d311c8 --- /dev/null +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -0,0 +1,94 @@ +import { MemberIdentityType, PlatformType } from '@crowd/types' + +import { QueryExecutor } from '../queryExecutor' + +const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] + +export interface IDevStatsMemberRow { + githubHandle: string + memberId: string + displayName: string | null +} + +export interface IDevStatsWorkExperience { + memberId: string + organizationName: string + title: string | null + dateStart: string | null + dateEnd: string | null +} + +export async function findMembersByGithubHandles( + qx: QueryExecutor, + lowercasedHandles: string[], +): Promise { + return qx.select( + ` + SELECT + mi.value AS "githubHandle", + mi."memberId", + m."displayName" + FROM "memberIdentities" mi + JOIN members m ON m.id = mi."memberId" + WHERE mi.platform = $(platform) + AND mi.type = $(type) + AND mi.verified = true + AND lower(mi.value) IN ($(lowercasedHandles:csv)) + AND mi."deletedAt" IS NULL + AND m."deletedAt" IS NULL + `, + { + platform: PlatformType.GITHUB, + type: MemberIdentityType.USERNAME, + lowercasedHandles, + }, + ) +} + +export async function findVerifiedEmailsByMemberIds( + qx: QueryExecutor, + memberIds: string[], +): Promise<{ memberId: string; email: string }[]> { + return qx.select( + ` + SELECT "memberId", value AS email + FROM "memberIdentities" + WHERE "memberId" IN ($(memberIds:csv)) + AND type = $(type) + AND verified = true + AND "deletedAt" IS NULL + `, + { + memberIds, + type: MemberIdentityType.EMAIL, + }, + ) +} + +export async function findWorkExperiencesByMemberIds( + qx: QueryExecutor, + memberIds: string[], +): Promise { + const rows: IDevStatsWorkExperience[] = await qx.select( + ` + SELECT + mo."memberId", + o."displayName" AS "organizationName", + mo.title, + mo."dateStart", + mo."dateEnd" + FROM "memberOrganizations" mo + JOIN organizations o ON mo."organizationId" = o.id + LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id + WHERE mo."memberId" IN ($(memberIds:csv)) + AND mo."deletedAt" IS NULL + AND COALESCE(ovr."allowAffiliation", true) = true + `, + { memberIds }, + ) + + return rows.filter( + (r) => + !r.title || !BLACKLISTED_TITLES.some((t) => r.title.toLowerCase().includes(t)), + ) +} diff --git a/services/libs/data-access-layer/src/index.ts b/services/libs/data-access-layer/src/index.ts index 639f0547b8..459fa15495 100644 --- a/services/libs/data-access-layer/src/index.ts +++ b/services/libs/data-access-layer/src/index.ts @@ -1,4 +1,5 @@ export * from './activities' +export * from './devStats' export * from './activityRelations' export * from './apiKeys' export * from './dashboards' From 744f8b57af8903618bd74c6067139f7dbb1e9f10 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Fri, 20 Mar 2026 12:04:09 +0100 Subject: [PATCH 11/26] feat: add affiliations Signed-off-by: Umberto Sgueglia --- .../public/v1/dev-stats/getAffiliations.ts | 45 +-- backend/src/api/public/v1/dev-stats/index.ts | 6 +- .../data-access-layer/src/devStats/index.ts | 305 +++++++++++++++++- 3 files changed, 311 insertions(+), 45 deletions(-) diff --git a/backend/src/api/public/v1/dev-stats/getAffiliations.ts b/backend/src/api/public/v1/dev-stats/getAffiliations.ts index 2420e90f84..4063fea2b0 100644 --- a/backend/src/api/public/v1/dev-stats/getAffiliations.ts +++ b/backend/src/api/public/v1/dev-stats/getAffiliations.ts @@ -4,8 +4,8 @@ import { z } from 'zod' import { findMembersByGithubHandles, findVerifiedEmailsByMemberIds, - findWorkExperiencesByMemberIds, optionsQx, + resolveAffiliationsByMemberIds, } from '@crowd/data-access-layer' import { getServiceChildLogger } from '@crowd/logging' @@ -66,45 +66,22 @@ export async function getAffiliations(req: Request, res: Response): Promise() - for (const row of workExperienceRows) { - const list = workExpByMember.get(row.memberId) ?? [] - list.push(row) - workExpByMember.set(row.memberId, list) - } - // Step 4: build response - const contributors = memberRows.map((member) => { - const workExperiences = workExpByMember.get(member.memberId) ?? [] - - const affiliations = workExperiences - .sort((a, b) => { - if (!a.dateStart) return 1 - if (!b.dateStart) return -1 - return new Date(b.dateStart).getTime() - new Date(a.dateStart).getTime() - }) - .map((we) => ({ - organization: we.organizationName, - startDate: we.dateStart ? new Date(we.dateStart).toISOString() : null, - endDate: we.dateEnd ? new Date(we.dateEnd).toISOString() : null, - })) - - return { - githubHandle: member.githubHandle, - name: member.displayName, - emails: emailsByMember.get(member.memberId) ?? [], - affiliations, - } - }) + const contributors = memberRows.map((member) => ({ + githubHandle: member.githubHandle, + name: member.displayName, + emails: emailsByMember.get(member.memberId) ?? [], + affiliations: affiliationsByMember.get(member.memberId) ?? [], + })) log.info( { diff --git a/backend/src/api/public/v1/dev-stats/index.ts b/backend/src/api/public/v1/dev-stats/index.ts index 3dc77716a3..6ed9a9a440 100644 --- a/backend/src/api/public/v1/dev-stats/index.ts +++ b/backend/src/api/public/v1/dev-stats/index.ts @@ -4,6 +4,8 @@ import { createRateLimiter } from '@/api/apiRateLimiter' import { requireScopes } from '@/api/public/middlewares/requireScopes' import { SCOPES } from '@/security/scopes' +import { getAffiliations } from './getAffiliations' + const rateLimiter = createRateLimiter({ max: 60, windowMs: 60 * 1000 }) export function devStatsRouter(): Router { @@ -11,9 +13,7 @@ export function devStatsRouter(): Router { router.use(rateLimiter) - router.post('/affiliations', requireScopes([SCOPES.READ_AFFILIATIONS]), (_req, res) => { - res.json({ status: 'ok' }) - }) + router.post('/affiliations', requireScopes([SCOPES.READ_AFFILIATIONS]), getAffiliations) return router } diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index d573d311c8..f58a8cbb88 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -1,23 +1,46 @@ +import _ from 'lodash' + import { MemberIdentityType, PlatformType } from '@crowd/types' import { QueryExecutor } from '../queryExecutor' +// ─── Constants ──────────────────────────────────────────────────────────────── + const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] +// ─── Public interfaces ──────────────────────────────────────────────────────── + export interface IDevStatsMemberRow { githubHandle: string memberId: string displayName: string | null } -export interface IDevStatsWorkExperience { +export interface IDevStatsAffiliation { + organization: string + startDate: string | null + endDate: string | null +} + +// ─── Internal row type (union of memberOrganizations + manual affiliations) ─── + +interface IDevStatsWorkRow { + id: string memberId: string + organizationId: string organizationName: string title: string | null dateStart: string | null dateEnd: string | null + createdAt: string + isPrimaryWorkExperience: boolean + memberCount: number + /** null for memberOrganizations rows; non-null for memberSegmentAffiliations rows */ + segmentId: string | null } +// ─── Step 1: member lookup ──────────────────────────────────────────────────── + export async function findMembersByGithubHandles( qx: QueryExecutor, lowercasedHandles: string[], @@ -45,6 +68,8 @@ export async function findMembersByGithubHandles( ) } +// ─── Step 2: verified emails ────────────────────────────────────────────────── + export async function findVerifiedEmailsByMemberIds( qx: QueryExecutor, memberIds: string[], @@ -65,21 +90,42 @@ export async function findVerifiedEmailsByMemberIds( ) } -export async function findWorkExperiencesByMemberIds( +// ─── Step 3a: regular work experiences (bulk) ───────────────────────────────── + +async function findWorkExperiencesBulk( qx: QueryExecutor, memberIds: string[], -): Promise { - const rows: IDevStatsWorkExperience[] = await qx.select( +): Promise { + const rows: IDevStatsWorkRow[] = await qx.select( ` + WITH aggs AS ( + SELECT + osa."organizationId", + sum(osa."memberCount") AS total_count + FROM "organizationSegmentsAgg" osa + WHERE osa."segmentId" IN ( + SELECT id FROM segments + WHERE "grandparentId" IS NOT NULL + AND "parentId" IS NOT NULL + ) + GROUP BY osa."organizationId" + ) SELECT + mo.id, mo."memberId", - o."displayName" AS "organizationName", + mo."organizationId", + o."displayName" AS "organizationName", mo.title, mo."dateStart", - mo."dateEnd" + mo."dateEnd", + mo."createdAt", + COALESCE(ovr."isPrimaryWorkExperience", false) AS "isPrimaryWorkExperience", + COALESCE(a.total_count, 0) AS "memberCount", + NULL::text AS "segmentId" FROM "memberOrganizations" mo JOIN organizations o ON mo."organizationId" = o.id LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id + LEFT JOIN aggs a ON a."organizationId" = mo."organizationId" WHERE mo."memberId" IN ($(memberIds:csv)) AND mo."deletedAt" IS NULL AND COALESCE(ovr."allowAffiliation", true) = true @@ -88,7 +134,250 @@ export async function findWorkExperiencesByMemberIds( ) return rows.filter( - (r) => - !r.title || !BLACKLISTED_TITLES.some((t) => r.title.toLowerCase().includes(t)), + (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title!.toLowerCase().includes(t)), ) } + +// ─── Step 3b: manual affiliations (bulk) ───────────────────────────────────── + +async function findManualAffiliationsBulk( + qx: QueryExecutor, + memberIds: string[], +): Promise { + return qx.select( + ` + SELECT + msa.id, + msa."memberId", + msa."organizationId", + o."displayName" AS "organizationName", + NULL AS title, + msa."dateStart", + msa."dateEnd", + msa."createdAt", + false AS "isPrimaryWorkExperience", + 0 AS "memberCount", + msa."segmentId" + FROM "memberSegmentAffiliations" msa + JOIN organizations o ON msa."organizationId" = o.id + WHERE msa."memberId" IN ($(memberIds:csv)) + `, + { memberIds }, + ) +} + +// ─── Selection priority (mirrors selectPrimaryWorkExperience) ───────────────── + +function longestDateRange(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow { + const withDates = orgs.filter((r) => r.dateStart) + if (withDates.length === 0) return orgs[0] + + return withDates.reduce((best, curr) => { + const bestMs = + new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart!).getTime() + const currMs = + new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart!).getTime() + return currMs > bestMs ? curr : best + }) +} + +function selectPrimaryWorkExperience(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow { + if (orgs.length === 1) return orgs[0] + + // 1. Manual affiliations (segmentId non-null) always win + const manual = orgs.filter((r) => r.segmentId !== null) + if (manual.length > 0) { + if (manual.length === 1) return manual[0] + return longestDateRange(manual) + } + + // 2. isPrimaryWorkExperience = true — prefer those with a dateStart + const primary = orgs.filter((r) => r.isPrimaryWorkExperience) + if (primary.length > 0) { + const withDates = primary.filter((r) => r.dateStart) + if (withDates.length > 0) return withDates[0] + return primary[0] + } + + // 3. Only one org has a dateStart — pick it + const withDates = orgs.filter((r) => r.dateStart) + if (withDates.length === 1) return withDates[0] + + // 4. Org with strictly more members wins; if tied, fall through + const sorted = [...orgs].sort((a, b) => b.memberCount - a.memberCount) + if (sorted.length >= 2 && sorted[0].memberCount > sorted[1].memberCount) { + return sorted[0] + } + + // 5. Longest date range as final tiebreaker + return longestDateRange(orgs) +} + +// ─── Per-member affiliation resolution ─────────────────────────────────────── + +function resolveAffiliationsForMember(rows: IDevStatsWorkRow[]): IDevStatsAffiliation[] { + // Undated org cleanup: if one undated org is marked as primary, drop all other undated orgs + const primaryUndated = rows.find( + (r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd, + ) + const cleaned = primaryUndated + ? rows.filter((r) => r.dateStart || r.id === primaryUndated.id) + : rows + + // Fallback org: covers gaps and pre-history; primary undated takes precedence, + // otherwise use the earliest-created undated org + const fallbackOrg = + primaryUndated ?? + (_.chain(cleaned) + .filter((r) => !r.dateStart && !r.dateEnd) + .sortBy('createdAt') + .head() + .value() as IDevStatsWorkRow | undefined) ?? + null + + const datedRows = cleaned.filter((r) => r.dateStart) + if (datedRows.length === 0) { + return [] + } + + // Collect date boundaries: each dateStart and (dateEnd + 1 day) as interval edges + const today = new Date() + today.setHours(0, 0, 0, 0) + + const boundarySet = new Set() + for (const row of datedRows) { + const start = new Date(row.dateStart!) + start.setHours(0, 0, 0, 0) + if (start.getTime() <= today.getTime()) { + boundarySet.add(start.getTime()) + } + + if (row.dateEnd) { + const afterEnd = new Date(row.dateEnd) + afterEnd.setHours(0, 0, 0, 0) + afterEnd.setDate(afterEnd.getDate() + 1) + if (afterEnd.getTime() <= today.getTime()) { + boundarySet.add(afterEnd.getTime()) + } + } + } + boundarySet.add(today.getTime()) + + const boundaries = Array.from(boundarySet).sort((a, b) => a - b) + + const resolved: IDevStatsAffiliation[] = [] + let currentOrg: IDevStatsWorkRow | null = null + let currentStart: Date | null = null + let gapStart: Date | null = null + + for (let i = 0; i < boundaries.length - 1; i++) { + const intervalStart = new Date(boundaries[i]) + + // Orgs active at the start of this interval + const active = datedRows.filter((r) => { + const start = new Date(r.dateStart!) + start.setHours(0, 0, 0, 0) + const end = r.dateEnd ? new Date(r.dateEnd) : null + if (end) end.setHours(0, 0, 0, 0) + return intervalStart >= start && (!end || intervalStart <= end) + }) + + if (active.length === 0) { + // Gap — close current org segment if open + if (currentOrg && currentStart) { + const dayBefore = new Date(intervalStart) + dayBefore.setDate(dayBefore.getDate() - 1) + resolved.push({ + organization: currentOrg.organizationName, + startDate: currentStart.toISOString(), + endDate: dayBefore.toISOString(), + }) + currentOrg = null + currentStart = null + } + if (gapStart === null) gapStart = new Date(intervalStart) + } else { + // Close gap with fallback org if present + if (gapStart !== null) { + const dayBefore = new Date(intervalStart) + dayBefore.setDate(dayBefore.getDate() - 1) + if (fallbackOrg) { + resolved.push({ + organization: fallbackOrg.organizationName, + startDate: gapStart.toISOString(), + endDate: dayBefore.toISOString(), + }) + } + gapStart = null + } + + const winner = selectPrimaryWorkExperience(active) + + if (!currentOrg) { + currentOrg = winner + currentStart = new Date(intervalStart) + } else if (currentOrg.organizationId !== winner.organizationId) { + // Org changed — close previous segment and open a new one + const dayBefore = new Date(intervalStart) + dayBefore.setDate(dayBefore.getDate() - 1) + resolved.push({ + organization: currentOrg.organizationName, + startDate: currentStart!.toISOString(), + endDate: dayBefore.toISOString(), + }) + currentOrg = winner + currentStart = new Date(intervalStart) + } + } + } + + // Close the final open segment + if (currentOrg && currentStart) { + resolved.push({ + organization: currentOrg.organizationName, + startDate: currentStart.toISOString(), + endDate: currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null, + }) + } + + // Close a trailing gap with the fallback org + if (gapStart !== null && fallbackOrg) { + resolved.push({ + organization: fallbackOrg.organizationName, + startDate: gapStart.toISOString(), + endDate: null, + }) + } + + // Most recent affiliations first + return resolved.sort((a, b) => { + if (!a.startDate) return 1 + if (!b.startDate) return -1 + return new Date(b.startDate).getTime() - new Date(a.startDate).getTime() + }) +} + +// ─── Public bulk resolver ───────────────────────────────────────────────────── + +export async function resolveAffiliationsByMemberIds( + qx: QueryExecutor, + memberIds: string[], +): Promise> { + const [workExperiences, manualAffiliations] = await Promise.all([ + findWorkExperiencesBulk(qx, memberIds), + findManualAffiliationsBulk(qx, memberIds), + ]) + + const byMember = new Map() + for (const row of [...workExperiences, ...manualAffiliations]) { + const list = byMember.get(row.memberId) ?? [] + list.push(row) + byMember.set(row.memberId, list) + } + + const result = new Map() + for (const id of memberIds) { + result.set(id, resolveAffiliationsForMember(byMember.get(id) ?? [])) + } + return result +} From f11239a065497ac76cbf916fffea318d87e7c80c Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Fri, 20 Mar 2026 16:19:39 +0100 Subject: [PATCH 12/26] feat: adding logs Signed-off-by: Umberto Sgueglia --- .../data-access-layer/src/devStats/index.ts | 243 +++++++++++------- 1 file changed, 144 insertions(+), 99 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index f58a8cbb88..eb54742ca5 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -1,9 +1,10 @@ -import _ from 'lodash' - +import { getServiceChildLogger } from '@crowd/logging' import { MemberIdentityType, PlatformType } from '@crowd/types' import { QueryExecutor } from '../queryExecutor' +const log = getServiceChildLogger('dev-stats:affiliations') + // ─── Constants ──────────────────────────────────────────────────────────────── const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] @@ -215,142 +216,186 @@ function selectPrimaryWorkExperience(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow // ─── Per-member affiliation resolution ─────────────────────────────────────── -function resolveAffiliationsForMember(rows: IDevStatsWorkRow[]): IDevStatsAffiliation[] { - // Undated org cleanup: if one undated org is marked as primary, drop all other undated orgs - const primaryUndated = rows.find( - (r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd, - ) - const cleaned = primaryUndated - ? rows.filter((r) => r.dateStart || r.id === primaryUndated.id) - : rows +/** Returns the org used to fill gaps — primary undated wins, then earliest-created undated. */ +function findFallbackOrg(rows: IDevStatsWorkRow[]): IDevStatsWorkRow | null { + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + if (primaryUndated) return primaryUndated - // Fallback org: covers gaps and pre-history; primary undated takes precedence, - // otherwise use the earliest-created undated org - const fallbackOrg = - primaryUndated ?? - (_.chain(cleaned) + return ( + rows .filter((r) => !r.dateStart && !r.dateEnd) - .sortBy('createdAt') - .head() - .value() as IDevStatsWorkRow | undefined) ?? - null + .sort((a, b) => a.createdAt.localeCompare(b.createdAt)) + .at(0) ?? null + ) +} - const datedRows = cleaned.filter((r) => r.dateStart) - if (datedRows.length === 0) { - return [] - } +/** + * Collects all date boundaries from the dated rows, capped at today. + * Each dateStart and (dateEnd + 1 day) marks a point where active orgs can change. + */ +function collectBoundaries(datedRows: IDevStatsWorkRow[]): Date[] { + const today = startOfDay(new Date()) - // Collect date boundaries: each dateStart and (dateEnd + 1 day) as interval edges - const today = new Date() - today.setHours(0, 0, 0, 0) + const ms = new Set([today.getTime()]) - const boundarySet = new Set() for (const row of datedRows) { - const start = new Date(row.dateStart!) - start.setHours(0, 0, 0, 0) - if (start.getTime() <= today.getTime()) { - boundarySet.add(start.getTime()) - } + const start = startOfDay(row.dateStart!) + if (start <= today) ms.add(start.getTime()) if (row.dateEnd) { - const afterEnd = new Date(row.dateEnd) - afterEnd.setHours(0, 0, 0, 0) + const afterEnd = startOfDay(row.dateEnd) afterEnd.setDate(afterEnd.getDate() + 1) - if (afterEnd.getTime() <= today.getTime()) { - boundarySet.add(afterEnd.getTime()) - } + if (afterEnd <= today) ms.add(afterEnd.getTime()) } } - boundarySet.add(today.getTime()) - const boundaries = Array.from(boundarySet).sort((a, b) => a - b) + return Array.from(ms) + .sort((a, b) => a - b) + .map((t) => new Date(t)) +} + +function orgsActiveAt(datedRows: IDevStatsWorkRow[], point: Date): IDevStatsWorkRow[] { + return datedRows.filter((r) => { + const start = startOfDay(r.dateStart!) + const end = r.dateEnd ? startOfDay(r.dateEnd) : null + return point >= start && (!end || point <= end) + }) +} + +function startOfDay(date: Date | string): Date { + const d = new Date(date) + d.setHours(0, 0, 0, 0) + return d +} + +function dayBefore(date: Date): Date { + const d = new Date(date) + d.setDate(d.getDate() - 1) + return d +} +/** Iterates boundary intervals and builds non-overlapping affiliation segments. */ +function buildTimeline( + memberId: string, + datedRows: IDevStatsWorkRow[], + fallbackOrg: IDevStatsWorkRow | null, + boundaries: Date[], +): IDevStatsAffiliation[] { const resolved: IDevStatsAffiliation[] = [] let currentOrg: IDevStatsWorkRow | null = null let currentStart: Date | null = null let gapStart: Date | null = null + const closeSegment = (org: IDevStatsWorkRow, start: Date, end: Date) => { + log.debug( + { memberId, org: org.organizationName, start: start.toISOString(), end: end.toISOString() }, + 'closing segment', + ) + resolved.push({ organization: org.organizationName, startDate: start.toISOString(), endDate: end.toISOString() }) + } + for (let i = 0; i < boundaries.length - 1; i++) { - const intervalStart = new Date(boundaries[i]) - - // Orgs active at the start of this interval - const active = datedRows.filter((r) => { - const start = new Date(r.dateStart!) - start.setHours(0, 0, 0, 0) - const end = r.dateEnd ? new Date(r.dateEnd) : null - if (end) end.setHours(0, 0, 0, 0) - return intervalStart >= start && (!end || intervalStart <= end) - }) + const point = boundaries[i] + const active = orgsActiveAt(datedRows, point) + + log.debug( + { memberId, point: point.toISOString(), activeOrgs: active.map((r) => r.organizationName) }, + 'processing boundary', + ) if (active.length === 0) { - // Gap — close current org segment if open if (currentOrg && currentStart) { - const dayBefore = new Date(intervalStart) - dayBefore.setDate(dayBefore.getDate() - 1) - resolved.push({ - organization: currentOrg.organizationName, - startDate: currentStart.toISOString(), - endDate: dayBefore.toISOString(), - }) + closeSegment(currentOrg, currentStart, dayBefore(point)) currentOrg = null currentStart = null } - if (gapStart === null) gapStart = new Date(intervalStart) - } else { - // Close gap with fallback org if present - if (gapStart !== null) { - const dayBefore = new Date(intervalStart) - dayBefore.setDate(dayBefore.getDate() - 1) - if (fallbackOrg) { - resolved.push({ - organization: fallbackOrg.organizationName, - startDate: gapStart.toISOString(), - endDate: dayBefore.toISOString(), - }) - } - gapStart = null + if (gapStart === null) { + gapStart = point + log.debug({ memberId, gapStart: point.toISOString() }, 'gap started') } + continue + } - const winner = selectPrimaryWorkExperience(active) - - if (!currentOrg) { - currentOrg = winner - currentStart = new Date(intervalStart) - } else if (currentOrg.organizationId !== winner.organizationId) { - // Org changed — close previous segment and open a new one - const dayBefore = new Date(intervalStart) - dayBefore.setDate(dayBefore.getDate() - 1) - resolved.push({ - organization: currentOrg.organizationName, - startDate: currentStart!.toISOString(), - endDate: dayBefore.toISOString(), - }) - currentOrg = winner - currentStart = new Date(intervalStart) - } + if (gapStart !== null) { + log.debug( + { memberId, fallback: fallbackOrg?.organizationName ?? null, gapStart: gapStart.toISOString(), gapEnd: dayBefore(point).toISOString() }, + 'closing gap with fallback org', + ) + if (fallbackOrg) closeSegment(fallbackOrg, gapStart, dayBefore(point)) + gapStart = null + } + + const winner = selectPrimaryWorkExperience(active) + + if (!currentOrg) { + log.debug({ memberId, org: winner.organizationName, from: point.toISOString() }, 'opening segment') + currentOrg = winner + currentStart = point + } else if (currentOrg.organizationId !== winner.organizationId) { + log.debug( + { memberId, from: currentOrg.organizationName, to: winner.organizationName, at: point.toISOString() }, + 'org changed', + ) + closeSegment(currentOrg, currentStart!, dayBefore(point)) + currentOrg = winner + currentStart = point } } - // Close the final open segment + // Close the final open segment using the org's actual endDate (null = ongoing) if (currentOrg && currentStart) { + const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null + log.debug({ memberId, org: currentOrg.organizationName, start: currentStart.toISOString(), endDate }, 'closing final segment') resolved.push({ organization: currentOrg.organizationName, startDate: currentStart.toISOString(), - endDate: currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null, + endDate, }) } - // Close a trailing gap with the fallback org + // Close a trailing gap with the fallback org (ongoing, no endDate) if (gapStart !== null && fallbackOrg) { - resolved.push({ - organization: fallbackOrg.organizationName, - startDate: gapStart.toISOString(), - endDate: null, - }) + log.debug({ memberId, fallback: fallbackOrg.organizationName, gapStart: gapStart.toISOString() }, 'closing trailing gap with fallback org') + resolved.push({ organization: fallbackOrg.organizationName, startDate: gapStart.toISOString(), endDate: null }) } - // Most recent affiliations first - return resolved.sort((a, b) => { + return resolved +} + +function resolveAffiliationsForMember(memberId: string, rows: IDevStatsWorkRow[]): IDevStatsAffiliation[] { + log.debug({ memberId, totalRows: rows.length }, 'resolving affiliations') + + // If one undated org is marked primary, drop all other undated orgs to avoid infinite conflicts + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + const cleaned = primaryUndated + ? rows.filter((r) => r.dateStart || r.id === primaryUndated.id) + : rows + + if (cleaned.length < rows.length) { + log.debug({ memberId, dropped: rows.length - cleaned.length }, 'dropped undated orgs (primary undated exists)') + } + + const fallbackOrg = findFallbackOrg(cleaned) + const datedRows = cleaned.filter((r) => r.dateStart) + + log.debug( + { memberId, datedRows: datedRows.length, fallbackOrg: fallbackOrg?.organizationName ?? null }, + 'prepared rows', + ) + + if (datedRows.length === 0) { + log.debug({ memberId }, 'no dated rows — returning empty affiliations') + return [] + } + + const boundaries = collectBoundaries(datedRows) + log.debug({ memberId, boundaries: boundaries.length }, 'collected boundaries') + + const timeline = buildTimeline(memberId, datedRows, fallbackOrg, boundaries) + + log.debug({ memberId, affiliations: timeline.length }, 'timeline built') + + return timeline.sort((a, b) => { if (!a.startDate) return 1 if (!b.startDate) return -1 return new Date(b.startDate).getTime() - new Date(a.startDate).getTime() @@ -377,7 +422,7 @@ export async function resolveAffiliationsByMemberIds( const result = new Map() for (const id of memberIds) { - result.set(id, resolveAffiliationsForMember(byMember.get(id) ?? [])) + result.set(id, resolveAffiliationsForMember(id, byMember.get(id) ?? [])) } return result } From 801d5955ef76605696ad3db2f7375e510840b7a8 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Fri, 20 Mar 2026 16:22:28 +0100 Subject: [PATCH 13/26] fix: lint Signed-off-by: Umberto Sgueglia --- backend/src/api/public/middlewares/staticApiKeyMiddleware.ts | 1 + backend/src/api/public/v1/dev-stats/getAffiliations.ts | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts b/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts index 76945608c1..76d928f8a8 100644 --- a/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts +++ b/backend/src/api/public/middlewares/staticApiKeyMiddleware.ts @@ -1,5 +1,6 @@ import crypto from 'crypto' import type { NextFunction, Request, RequestHandler, Response } from 'express' + import { UnauthorizedError } from '@crowd/common' import { findApiKeyByHash, optionsQx, touchApiKeyLastUsed } from '@crowd/data-access-layer' diff --git a/backend/src/api/public/v1/dev-stats/getAffiliations.ts b/backend/src/api/public/v1/dev-stats/getAffiliations.ts index 4063fea2b0..1011526e4d 100644 --- a/backend/src/api/public/v1/dev-stats/getAffiliations.ts +++ b/backend/src/api/public/v1/dev-stats/getAffiliations.ts @@ -70,10 +70,7 @@ export async function getAffiliations(req: Request, res: Response): Promise ({ From 494e6700599c3fbb85a7ee3cd620a5b34280e2e3 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Fri, 20 Mar 2026 16:27:22 +0100 Subject: [PATCH 14/26] fix: lint Signed-off-by: Umberto Sgueglia --- .../data-access-layer/src/devStats/index.ts | 63 ++++++++++++++----- 1 file changed, 48 insertions(+), 15 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index eb54742ca5..452013335a 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -135,7 +135,7 @@ async function findWorkExperiencesBulk( ) return rows.filter( - (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title!.toLowerCase().includes(t)), + (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title?.toLowerCase().includes(t)), ) } @@ -175,9 +175,9 @@ function longestDateRange(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow { return withDates.reduce((best, curr) => { const bestMs = - new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart!).getTime() + new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart ?? '').getTime() const currMs = - new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart!).getTime() + new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart ?? '').getTime() return currMs > bestMs ? curr : best }) } @@ -239,7 +239,7 @@ function collectBoundaries(datedRows: IDevStatsWorkRow[]): Date[] { const ms = new Set([today.getTime()]) for (const row of datedRows) { - const start = startOfDay(row.dateStart!) + const start = startOfDay(row.dateStart ?? '') if (start <= today) ms.add(start.getTime()) if (row.dateEnd) { @@ -256,7 +256,7 @@ function collectBoundaries(datedRows: IDevStatsWorkRow[]): Date[] { function orgsActiveAt(datedRows: IDevStatsWorkRow[], point: Date): IDevStatsWorkRow[] { return datedRows.filter((r) => { - const start = startOfDay(r.dateStart!) + const start = startOfDay(r.dateStart ?? '') const end = r.dateEnd ? startOfDay(r.dateEnd) : null return point >= start && (!end || point <= end) }) @@ -291,7 +291,11 @@ function buildTimeline( { memberId, org: org.organizationName, start: start.toISOString(), end: end.toISOString() }, 'closing segment', ) - resolved.push({ organization: org.organizationName, startDate: start.toISOString(), endDate: end.toISOString() }) + resolved.push({ + organization: org.organizationName, + startDate: start.toISOString(), + endDate: end.toISOString(), + }) } for (let i = 0; i < boundaries.length - 1; i++) { @@ -318,7 +322,12 @@ function buildTimeline( if (gapStart !== null) { log.debug( - { memberId, fallback: fallbackOrg?.organizationName ?? null, gapStart: gapStart.toISOString(), gapEnd: dayBefore(point).toISOString() }, + { + memberId, + fallback: fallbackOrg?.organizationName ?? null, + gapStart: gapStart.toISOString(), + gapEnd: dayBefore(point).toISOString(), + }, 'closing gap with fallback org', ) if (fallbackOrg) closeSegment(fallbackOrg, gapStart, dayBefore(point)) @@ -328,15 +337,23 @@ function buildTimeline( const winner = selectPrimaryWorkExperience(active) if (!currentOrg) { - log.debug({ memberId, org: winner.organizationName, from: point.toISOString() }, 'opening segment') + log.debug( + { memberId, org: winner.organizationName, from: point.toISOString() }, + 'opening segment', + ) currentOrg = winner currentStart = point } else if (currentOrg.organizationId !== winner.organizationId) { log.debug( - { memberId, from: currentOrg.organizationName, to: winner.organizationName, at: point.toISOString() }, + { + memberId, + from: currentOrg.organizationName, + to: winner.organizationName, + at: point.toISOString(), + }, 'org changed', ) - closeSegment(currentOrg, currentStart!, dayBefore(point)) + closeSegment(currentOrg, currentStart ?? point, dayBefore(point)) currentOrg = winner currentStart = point } @@ -345,7 +362,10 @@ function buildTimeline( // Close the final open segment using the org's actual endDate (null = ongoing) if (currentOrg && currentStart) { const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null - log.debug({ memberId, org: currentOrg.organizationName, start: currentStart.toISOString(), endDate }, 'closing final segment') + log.debug( + { memberId, org: currentOrg.organizationName, start: currentStart.toISOString(), endDate }, + 'closing final segment', + ) resolved.push({ organization: currentOrg.organizationName, startDate: currentStart.toISOString(), @@ -355,14 +375,24 @@ function buildTimeline( // Close a trailing gap with the fallback org (ongoing, no endDate) if (gapStart !== null && fallbackOrg) { - log.debug({ memberId, fallback: fallbackOrg.organizationName, gapStart: gapStart.toISOString() }, 'closing trailing gap with fallback org') - resolved.push({ organization: fallbackOrg.organizationName, startDate: gapStart.toISOString(), endDate: null }) + log.debug( + { memberId, fallback: fallbackOrg.organizationName, gapStart: gapStart.toISOString() }, + 'closing trailing gap with fallback org', + ) + resolved.push({ + organization: fallbackOrg.organizationName, + startDate: gapStart.toISOString(), + endDate: null, + }) } return resolved } -function resolveAffiliationsForMember(memberId: string, rows: IDevStatsWorkRow[]): IDevStatsAffiliation[] { +function resolveAffiliationsForMember( + memberId: string, + rows: IDevStatsWorkRow[], +): IDevStatsAffiliation[] { log.debug({ memberId, totalRows: rows.length }, 'resolving affiliations') // If one undated org is marked primary, drop all other undated orgs to avoid infinite conflicts @@ -372,7 +402,10 @@ function resolveAffiliationsForMember(memberId: string, rows: IDevStatsWorkRow[] : rows if (cleaned.length < rows.length) { - log.debug({ memberId, dropped: rows.length - cleaned.length }, 'dropped undated orgs (primary undated exists)') + log.debug( + { memberId, dropped: rows.length - cleaned.length }, + 'dropped undated orgs (primary undated exists)', + ) } const fallbackOrg = findFallbackOrg(cleaned) From 1782c9eccb587b47a5abcbcd6458e0146d326574 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Fri, 20 Mar 2026 16:39:12 +0100 Subject: [PATCH 15/26] fix: created at error Signed-off-by: Umberto Sgueglia --- services/libs/data-access-layer/src/devStats/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index 452013335a..15e41c3aff 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -155,7 +155,7 @@ async function findManualAffiliationsBulk( NULL AS title, msa."dateStart", msa."dateEnd", - msa."createdAt", + NULL::timestamptz AS "createdAt", false AS "isPrimaryWorkExperience", 0 AS "memberCount", msa."segmentId" From 7342a9d88185da19481e76c81fbf3e7d38d3554c Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Fri, 20 Mar 2026 16:51:34 +0100 Subject: [PATCH 16/26] fix: createdAt as date Signed-off-by: Umberto Sgueglia --- services/libs/data-access-layer/src/devStats/index.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index 15e41c3aff..beb9dfab89 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -33,7 +33,7 @@ interface IDevStatsWorkRow { title: string | null dateStart: string | null dateEnd: string | null - createdAt: string + createdAt: Date | string isPrimaryWorkExperience: boolean memberCount: number /** null for memberOrganizations rows; non-null for memberSegmentAffiliations rows */ @@ -224,7 +224,7 @@ function findFallbackOrg(rows: IDevStatsWorkRow[]): IDevStatsWorkRow | null { return ( rows .filter((r) => !r.dateStart && !r.dateEnd) - .sort((a, b) => a.createdAt.localeCompare(b.createdAt)) + .sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime()) .at(0) ?? null ) } From debdae6e22c4da7030136e2e965d8dec98303f7d Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 10:53:30 +0100 Subject: [PATCH 17/26] feat: adding logs Signed-off-by: Umberto Sgueglia --- .../data-access-layer/src/devStats/index.ts | 57 +++++++++++++++++-- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index beb9dfab89..3795805afe 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -303,7 +303,18 @@ function buildTimeline( const active = orgsActiveAt(datedRows, point) log.debug( - { memberId, point: point.toISOString(), activeOrgs: active.map((r) => r.organizationName) }, + { + memberId, + point: point.toISOString(), + activeOrgs: active.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + isPrimary: r.isPrimaryWorkExperience, + memberCount: r.memberCount, + isManual: r.segmentId !== null, + })), + }, 'processing boundary', ) @@ -393,7 +404,21 @@ function resolveAffiliationsForMember( memberId: string, rows: IDevStatsWorkRow[], ): IDevStatsAffiliation[] { - log.debug({ memberId, totalRows: rows.length }, 'resolving affiliations') + log.debug( + { + memberId, + totalRows: rows.length, + rows: rows.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + isPrimary: r.isPrimaryWorkExperience, + memberCount: r.memberCount, + isManual: r.segmentId !== null, + })), + }, + 'resolving affiliations', + ) // If one undated org is marked primary, drop all other undated orgs to avoid infinite conflicts const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) @@ -403,7 +428,7 @@ function resolveAffiliationsForMember( if (cleaned.length < rows.length) { log.debug( - { memberId, dropped: rows.length - cleaned.length }, + { memberId, dropped: rows.length - cleaned.length, keptPrimaryUndated: primaryUndated?.organizationName }, 'dropped undated orgs (primary undated exists)', ) } @@ -412,7 +437,17 @@ function resolveAffiliationsForMember( const datedRows = cleaned.filter((r) => r.dateStart) log.debug( - { memberId, datedRows: datedRows.length, fallbackOrg: fallbackOrg?.organizationName ?? null }, + { + memberId, + datedRows: datedRows.length, + undatedRows: cleaned.length - datedRows.length, + fallbackOrg: fallbackOrg?.organizationName ?? null, + datedRowsList: datedRows.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + })), + }, 'prepared rows', ) @@ -422,11 +457,21 @@ function resolveAffiliationsForMember( } const boundaries = collectBoundaries(datedRows) - log.debug({ memberId, boundaries: boundaries.length }, 'collected boundaries') + log.debug( + { memberId, boundaries: boundaries.length, boundaryDates: boundaries.map((b) => b.toISOString()) }, + 'collected boundaries', + ) const timeline = buildTimeline(memberId, datedRows, fallbackOrg, boundaries) - log.debug({ memberId, affiliations: timeline.length }, 'timeline built') + log.debug( + { + memberId, + affiliations: timeline.length, + result: timeline.map((a) => ({ org: a.organization, startDate: a.startDate, endDate: a.endDate })), + }, + 'timeline built', + ) return timeline.sort((a, b) => { if (!a.startDate) return 1 From 09ba86fcd94346a7ccef852d777bb623a7185407 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 15:47:23 +0100 Subject: [PATCH 18/26] refactor: simplify buildTimeline Signed-off-by: Umberto Sgueglia --- .../data-access-layer/src/devStats/index.ts | 157 ++++++++++-------- 1 file changed, 92 insertions(+), 65 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index 3795805afe..9b0f8bb6a6 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -254,11 +254,13 @@ function collectBoundaries(datedRows: IDevStatsWorkRow[]): Date[] { .map((t) => new Date(t)) } -function orgsActiveAt(datedRows: IDevStatsWorkRow[], point: Date): IDevStatsWorkRow[] { - return datedRows.filter((r) => { - const start = startOfDay(r.dateStart ?? '') - const end = r.dateEnd ? startOfDay(r.dateEnd) : null - return point >= start && (!end || point <= end) +function orgsActiveAt(datedRows: IDevStatsWorkRow[], boundaryDate: Date): IDevStatsWorkRow[] { + return datedRows.filter((role) => { + const roleStart = startOfDay(role.dateStart ?? '') + const roleEnd = role.dateEnd ? startOfDay(role.dateEnd) : null + + // org is active if the boundary date falls within its employment period + return boundaryDate >= roleStart && (!roleEnd || boundaryDate <= roleEnd) }) } @@ -274,39 +276,50 @@ function dayBefore(date: Date): Date { return d } -/** Iterates boundary intervals and builds non-overlapping affiliation segments. */ +function closeAffiliationWindow( + memberId: string, + affiliations: IDevStatsAffiliation[], + org: IDevStatsWorkRow, + windowStart: Date, + windowEnd: Date, +): void { + log.debug( + { + memberId, + org: org.organizationName, + windowStart: windowStart.toISOString(), + windowEnd: windowEnd.toISOString(), + }, + 'closing affiliation window', + ) + affiliations.push({ + organization: org.organizationName, + startDate: windowStart.toISOString(), + endDate: windowEnd.toISOString(), + }) +} + +/** Iterates boundary intervals and builds non-overlapping affiliation windows. */ function buildTimeline( memberId: string, datedRows: IDevStatsWorkRow[], fallbackOrg: IDevStatsWorkRow | null, boundaries: Date[], ): IDevStatsAffiliation[] { - const resolved: IDevStatsAffiliation[] = [] - let currentOrg: IDevStatsWorkRow | null = null - let currentStart: Date | null = null - let gapStart: Date | null = null - - const closeSegment = (org: IDevStatsWorkRow, start: Date, end: Date) => { - log.debug( - { memberId, org: org.organizationName, start: start.toISOString(), end: end.toISOString() }, - 'closing segment', - ) - resolved.push({ - organization: org.organizationName, - startDate: start.toISOString(), - endDate: end.toISOString(), - }) - } + const affiliations: IDevStatsAffiliation[] = [] + let currentOrg: IDevStatsWorkRow = null + let currentWindowStart: Date = null + let uncoveredPeriodStart: Date = null for (let i = 0; i < boundaries.length - 1; i++) { - const point = boundaries[i] - const active = orgsActiveAt(datedRows, point) + const boundaryDate = boundaries[i] + const activeOrgsAtBoundary = orgsActiveAt(datedRows, boundaryDate) log.debug( { memberId, - point: point.toISOString(), - activeOrgs: active.map((r) => ({ + boundaryDate: boundaryDate.toISOString(), + orgsAtBoundary: activeOrgsAtBoundary.map((r) => ({ org: r.organizationName, dateStart: r.dateStart, dateEnd: r.dateEnd, @@ -318,86 +331,100 @@ function buildTimeline( 'processing boundary', ) - if (active.length === 0) { - if (currentOrg && currentStart) { - closeSegment(currentOrg, currentStart, dayBefore(point)) + // No orgs active at this boundary — close the current window and start tracking a gap + if (activeOrgsAtBoundary.length === 0) { + + if (currentOrg && currentWindowStart) { + closeAffiliationWindow(memberId, affiliations, currentOrg, currentWindowStart, dayBefore(boundaryDate)) currentOrg = null - currentStart = null + currentWindowStart = null } - if (gapStart === null) { - gapStart = point - log.debug({ memberId, gapStart: point.toISOString() }, 'gap started') + + if (uncoveredPeriodStart === null) { + uncoveredPeriodStart = boundaryDate + log.debug({ memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, 'uncovered period started') } + continue } - if (gapStart !== null) { + // Orgs are active again — close the uncovered period using the fallback org if available + if (uncoveredPeriodStart !== null) { log.debug( { memberId, - fallback: fallbackOrg?.organizationName ?? null, - gapStart: gapStart.toISOString(), - gapEnd: dayBefore(point).toISOString(), + fallbackOrg: fallbackOrg?.organizationName ?? null, + uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), + uncoveredPeriodEnd: dayBefore(boundaryDate).toISOString(), }, - 'closing gap with fallback org', + 'closing uncovered period with fallback org', ) - if (fallbackOrg) closeSegment(fallbackOrg, gapStart, dayBefore(point)) - gapStart = null + + if (fallbackOrg) { + closeAffiliationWindow(memberId, affiliations, fallbackOrg, uncoveredPeriodStart, dayBefore(boundaryDate)) + } + + uncoveredPeriodStart = null } - const winner = selectPrimaryWorkExperience(active) + const winningAffiliation = selectPrimaryWorkExperience(activeOrgsAtBoundary) + // No current window open — start a new one with the winning org if (!currentOrg) { log.debug( - { memberId, org: winner.organizationName, from: point.toISOString() }, - 'opening segment', + { memberId, org: winningAffiliation.organizationName, from: boundaryDate.toISOString() }, + 'opening affiliation window', ) - currentOrg = winner - currentStart = point - } else if (currentOrg.organizationId !== winner.organizationId) { + currentOrg = winningAffiliation + currentWindowStart = boundaryDate + continue + } + + // Winning org changed — close the current window and open a new one + if (currentOrg.organizationId !== winningAffiliation.organizationId) { log.debug( { memberId, from: currentOrg.organizationName, - to: winner.organizationName, - at: point.toISOString(), + to: winningAffiliation.organizationName, + at: boundaryDate.toISOString(), }, - 'org changed', + 'affiliation changed', ) - closeSegment(currentOrg, currentStart ?? point, dayBefore(point)) - currentOrg = winner - currentStart = point + closeAffiliationWindow(memberId, affiliations, currentOrg, currentWindowStart ?? boundaryDate, dayBefore(boundaryDate)) + currentOrg = winningAffiliation + currentWindowStart = boundaryDate } } - // Close the final open segment using the org's actual endDate (null = ongoing) - if (currentOrg && currentStart) { + // Close the last open window using the org's actual end date (null = ongoing) + if (currentOrg && currentWindowStart) { const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null log.debug( - { memberId, org: currentOrg.organizationName, start: currentStart.toISOString(), endDate }, - 'closing final segment', + { memberId, org: currentOrg.organizationName, start: currentWindowStart.toISOString(), endDate }, + 'closing final affiliation window', ) - resolved.push({ + affiliations.push({ organization: currentOrg.organizationName, - startDate: currentStart.toISOString(), + startDate: currentWindowStart.toISOString(), endDate, }) } - // Close a trailing gap with the fallback org (ongoing, no endDate) - if (gapStart !== null && fallbackOrg) { + // Close a trailing uncovered period using the fallback org (ongoing, no end date) + if (uncoveredPeriodStart !== null && fallbackOrg) { log.debug( - { memberId, fallback: fallbackOrg.organizationName, gapStart: gapStart.toISOString() }, - 'closing trailing gap with fallback org', + { memberId, fallbackOrg: fallbackOrg.organizationName, uncoveredPeriodStart: uncoveredPeriodStart.toISOString() }, + 'closing trailing uncovered period with fallback org', ) - resolved.push({ + affiliations.push({ organization: fallbackOrg.organizationName, - startDate: gapStart.toISOString(), + startDate: uncoveredPeriodStart.toISOString(), endDate: null, }) } - return resolved + return affiliations } function resolveAffiliationsForMember( From 895d9ed60d27105f46ae5d7b7042e01859620b05 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 16:06:33 +0100 Subject: [PATCH 19/26] fix: lint Signed-off-by: Umberto Sgueglia --- .../data-access-layer/src/devStats/index.ts | 65 +++++++++++++++---- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index 9b0f8bb6a6..00efefd57a 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -258,7 +258,7 @@ function orgsActiveAt(datedRows: IDevStatsWorkRow[], boundaryDate: Date): IDevSt return datedRows.filter((role) => { const roleStart = startOfDay(role.dateStart ?? '') const roleEnd = role.dateEnd ? startOfDay(role.dateEnd) : null - + // org is active if the boundary date falls within its employment period return boundaryDate >= roleStart && (!roleEnd || boundaryDate <= roleEnd) }) @@ -333,16 +333,24 @@ function buildTimeline( // No orgs active at this boundary — close the current window and start tracking a gap if (activeOrgsAtBoundary.length === 0) { - if (currentOrg && currentWindowStart) { - closeAffiliationWindow(memberId, affiliations, currentOrg, currentWindowStart, dayBefore(boundaryDate)) + closeAffiliationWindow( + memberId, + affiliations, + currentOrg, + currentWindowStart, + dayBefore(boundaryDate), + ) currentOrg = null currentWindowStart = null } if (uncoveredPeriodStart === null) { uncoveredPeriodStart = boundaryDate - log.debug({ memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, 'uncovered period started') + log.debug( + { memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, + 'uncovered period started', + ) } continue @@ -361,9 +369,15 @@ function buildTimeline( ) if (fallbackOrg) { - closeAffiliationWindow(memberId, affiliations, fallbackOrg, uncoveredPeriodStart, dayBefore(boundaryDate)) + closeAffiliationWindow( + memberId, + affiliations, + fallbackOrg, + uncoveredPeriodStart, + dayBefore(boundaryDate), + ) } - + uncoveredPeriodStart = null } @@ -391,7 +405,13 @@ function buildTimeline( }, 'affiliation changed', ) - closeAffiliationWindow(memberId, affiliations, currentOrg, currentWindowStart ?? boundaryDate, dayBefore(boundaryDate)) + closeAffiliationWindow( + memberId, + affiliations, + currentOrg, + currentWindowStart ?? boundaryDate, + dayBefore(boundaryDate), + ) currentOrg = winningAffiliation currentWindowStart = boundaryDate } @@ -401,7 +421,12 @@ function buildTimeline( if (currentOrg && currentWindowStart) { const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null log.debug( - { memberId, org: currentOrg.organizationName, start: currentWindowStart.toISOString(), endDate }, + { + memberId, + org: currentOrg.organizationName, + start: currentWindowStart.toISOString(), + endDate, + }, 'closing final affiliation window', ) affiliations.push({ @@ -414,7 +439,11 @@ function buildTimeline( // Close a trailing uncovered period using the fallback org (ongoing, no end date) if (uncoveredPeriodStart !== null && fallbackOrg) { log.debug( - { memberId, fallbackOrg: fallbackOrg.organizationName, uncoveredPeriodStart: uncoveredPeriodStart.toISOString() }, + { + memberId, + fallbackOrg: fallbackOrg.organizationName, + uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), + }, 'closing trailing uncovered period with fallback org', ) affiliations.push({ @@ -455,7 +484,11 @@ function resolveAffiliationsForMember( if (cleaned.length < rows.length) { log.debug( - { memberId, dropped: rows.length - cleaned.length, keptPrimaryUndated: primaryUndated?.organizationName }, + { + memberId, + dropped: rows.length - cleaned.length, + keptPrimaryUndated: primaryUndated?.organizationName, + }, 'dropped undated orgs (primary undated exists)', ) } @@ -485,7 +518,11 @@ function resolveAffiliationsForMember( const boundaries = collectBoundaries(datedRows) log.debug( - { memberId, boundaries: boundaries.length, boundaryDates: boundaries.map((b) => b.toISOString()) }, + { + memberId, + boundaries: boundaries.length, + boundaryDates: boundaries.map((b) => b.toISOString()), + }, 'collected boundaries', ) @@ -495,7 +532,11 @@ function resolveAffiliationsForMember( { memberId, affiliations: timeline.length, - result: timeline.map((a) => ({ org: a.organization, startDate: a.startDate, endDate: a.endDate })), + result: timeline.map((a) => ({ + org: a.organization, + startDate: a.startDate, + endDate: a.endDate, + })), }, 'timeline built', ) From 64afb6459e6b1ffcae0b9485321fce1b8da2201f Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 16:13:37 +0100 Subject: [PATCH 20/26] fix: remove logs Signed-off-by: Umberto Sgueglia --- .../public/v1/dev-stats/getAffiliations.ts | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/backend/src/api/public/v1/dev-stats/getAffiliations.ts b/backend/src/api/public/v1/dev-stats/getAffiliations.ts index 1011526e4d..b3f6107daa 100644 --- a/backend/src/api/public/v1/dev-stats/getAffiliations.ts +++ b/backend/src/api/public/v1/dev-stats/getAffiliations.ts @@ -7,13 +7,10 @@ import { optionsQx, resolveAffiliationsByMemberIds, } from '@crowd/data-access-layer' -import { getServiceChildLogger } from '@crowd/logging' import { ok } from '@/utils/api' import { validateOrThrow } from '@/utils/validation' -const log = getServiceChildLogger('dev-stats') - const MAX_HANDLES = 1000 const bodySchema = z.object({ @@ -27,19 +24,11 @@ export async function getAffiliations(req: Request, res: Response): Promise h.toLowerCase()) // Step 1: find verified members by github handles const memberRows = await findMembersByGithubHandles(qx, lowercasedHandles) - const t1 = performance.now() - log.info( - { handles: githubHandles.length, found: memberRows.length, ms: Math.round(t1 - t0) }, - 'Step 1: members lookup', - ) - const foundHandles = new Set(memberRows.map((r) => r.githubHandle.toLowerCase())) const notFound = githubHandles.filter((h) => !foundHandles.has(h.toLowerCase())) @@ -53,12 +42,6 @@ export async function getAffiliations(req: Request, res: Response): Promise() for (const row of emailRows) { const list = emailsByMember.get(row.memberId) ?? [] @@ -69,9 +52,6 @@ export async function getAffiliations(req: Request, res: Response): Promise ({ githubHandle: member.githubHandle, @@ -80,15 +60,5 @@ export async function getAffiliations(req: Request, res: Response): Promise Date: Mon, 23 Mar 2026 16:43:05 +0100 Subject: [PATCH 21/26] fix: remove comments Signed-off-by: Umberto Sgueglia --- .../libs/data-access-layer/src/devStats/index.ts | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index 00efefd57a..1c4c403b49 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -5,12 +5,8 @@ import { QueryExecutor } from '../queryExecutor' const log = getServiceChildLogger('dev-stats:affiliations') -// ─── Constants ──────────────────────────────────────────────────────────────── - const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] -// ─── Public interfaces ──────────────────────────────────────────────────────── - export interface IDevStatsMemberRow { githubHandle: string memberId: string @@ -23,8 +19,6 @@ export interface IDevStatsAffiliation { endDate: string | null } -// ─── Internal row type (union of memberOrganizations + manual affiliations) ─── - interface IDevStatsWorkRow { id: string memberId: string @@ -167,8 +161,6 @@ async function findManualAffiliationsBulk( ) } -// ─── Selection priority (mirrors selectPrimaryWorkExperience) ───────────────── - function longestDateRange(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow { const withDates = orgs.filter((r) => r.dateStart) if (withDates.length === 0) return orgs[0] @@ -214,8 +206,6 @@ function selectPrimaryWorkExperience(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow return longestDateRange(orgs) } -// ─── Per-member affiliation resolution ─────────────────────────────────────── - /** Returns the org used to fill gaps — primary undated wins, then earliest-created undated. */ function findFallbackOrg(rows: IDevStatsWorkRow[]): IDevStatsWorkRow | null { const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) @@ -548,8 +538,6 @@ function resolveAffiliationsForMember( }) } -// ─── Public bulk resolver ───────────────────────────────────────────────────── - export async function resolveAffiliationsByMemberIds( qx: QueryExecutor, memberIds: string[], From 0cdb48d1e264ad22d0bbc0582821cacde98ada56 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 17:06:01 +0100 Subject: [PATCH 22/26] fix: change logging Signed-off-by: Umberto Sgueglia --- .../libs/data-access-layer/src/devStats/index.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index 1c4c403b49..cc0e84eacc 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -305,7 +305,7 @@ function buildTimeline( const boundaryDate = boundaries[i] const activeOrgsAtBoundary = orgsActiveAt(datedRows, boundaryDate) - log.debug( + log.info( { memberId, boundaryDate: boundaryDate.toISOString(), @@ -337,7 +337,7 @@ function buildTimeline( if (uncoveredPeriodStart === null) { uncoveredPeriodStart = boundaryDate - log.debug( + log.info( { memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, 'uncovered period started', ) @@ -348,7 +348,7 @@ function buildTimeline( // Orgs are active again — close the uncovered period using the fallback org if available if (uncoveredPeriodStart !== null) { - log.debug( + log.info( { memberId, fallbackOrg: fallbackOrg?.organizationName ?? null, @@ -375,7 +375,7 @@ function buildTimeline( // No current window open — start a new one with the winning org if (!currentOrg) { - log.debug( + log.info( { memberId, org: winningAffiliation.organizationName, from: boundaryDate.toISOString() }, 'opening affiliation window', ) @@ -386,7 +386,7 @@ function buildTimeline( // Winning org changed — close the current window and open a new one if (currentOrg.organizationId !== winningAffiliation.organizationId) { - log.debug( + log.info( { memberId, from: currentOrg.organizationName, @@ -410,7 +410,7 @@ function buildTimeline( // Close the last open window using the org's actual end date (null = ongoing) if (currentOrg && currentWindowStart) { const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null - log.debug( + log.info( { memberId, org: currentOrg.organizationName, @@ -428,7 +428,7 @@ function buildTimeline( // Close a trailing uncovered period using the fallback org (ongoing, no end date) if (uncoveredPeriodStart !== null && fallbackOrg) { - log.debug( + log.info( { memberId, fallbackOrg: fallbackOrg.organizationName, From a00037c39d06c95505d6bb4af74155f9d822cdae Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 17:15:24 +0100 Subject: [PATCH 23/26] refactor: create dal for affiliations Signed-off-by: Umberto Sgueglia --- .../src/affiliations/index.ts | 1 + .../src/affiliations/resolve.ts | 516 ++++++++++++++++++ .../data-access-layer/src/devStats/index.ts | 504 +---------------- services/libs/data-access-layer/src/index.ts | 1 + 4 files changed, 520 insertions(+), 502 deletions(-) create mode 100644 services/libs/data-access-layer/src/affiliations/index.ts create mode 100644 services/libs/data-access-layer/src/affiliations/resolve.ts diff --git a/services/libs/data-access-layer/src/affiliations/index.ts b/services/libs/data-access-layer/src/affiliations/index.ts new file mode 100644 index 0000000000..d0104e83ca --- /dev/null +++ b/services/libs/data-access-layer/src/affiliations/index.ts @@ -0,0 +1 @@ +export * from './resolve' diff --git a/services/libs/data-access-layer/src/affiliations/resolve.ts b/services/libs/data-access-layer/src/affiliations/resolve.ts new file mode 100644 index 0000000000..374e879b1e --- /dev/null +++ b/services/libs/data-access-layer/src/affiliations/resolve.ts @@ -0,0 +1,516 @@ +import { getServiceChildLogger } from '@crowd/logging' +import { MemberIdentityType, PlatformType } from '@crowd/types' + +import { QueryExecutor } from '../queryExecutor' + +const log = getServiceChildLogger('affiliations:resolve') + +// ─── Constants ──────────────────────────────────────────────────────────────── + +const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] + +// ─── Public interfaces ──────────────────────────────────────────────────────── + +export interface IAffiliationPeriod { + organization: string + startDate: string | null + endDate: string | null +} + +// ─── Internal row type (union of memberOrganizations + manual affiliations) ─── + +interface IWorkRow { + id: string + memberId: string + organizationId: string + organizationName: string + title: string | null + dateStart: string | null + dateEnd: string | null + createdAt: Date | string + isPrimaryWorkExperience: boolean + memberCount: number + /** null for memberOrganizations rows; non-null for memberSegmentAffiliations rows */ + segmentId: string | null +} + +// ─── Query: regular work experiences (bulk) ────────────────────────────────── + +export async function findWorkExperiencesBulk( + qx: QueryExecutor, + memberIds: string[], +): Promise { + const rows: IWorkRow[] = await qx.select( + ` + WITH aggs AS ( + SELECT + osa."organizationId", + sum(osa."memberCount") AS total_count + FROM "organizationSegmentsAgg" osa + WHERE osa."segmentId" IN ( + SELECT id FROM segments + WHERE "grandparentId" IS NOT NULL + AND "parentId" IS NOT NULL + ) + GROUP BY osa."organizationId" + ) + SELECT + mo.id, + mo."memberId", + mo."organizationId", + o."displayName" AS "organizationName", + mo.title, + mo."dateStart", + mo."dateEnd", + mo."createdAt", + COALESCE(ovr."isPrimaryWorkExperience", false) AS "isPrimaryWorkExperience", + COALESCE(a.total_count, 0) AS "memberCount", + NULL::text AS "segmentId" + FROM "memberOrganizations" mo + JOIN organizations o ON mo."organizationId" = o.id + LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id + LEFT JOIN aggs a ON a."organizationId" = mo."organizationId" + WHERE mo."memberId" IN ($(memberIds:csv)) + AND mo."deletedAt" IS NULL + AND COALESCE(ovr."allowAffiliation", true) = true + `, + { memberIds }, + ) + + return rows.filter( + (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title?.toLowerCase().includes(t)), + ) +} + +// ─── Query: manual affiliations (bulk) ─────────────────────────────────────── + +export async function findManualAffiliationsBulk( + qx: QueryExecutor, + memberIds: string[], +): Promise { + return qx.select( + ` + SELECT + msa.id, + msa."memberId", + msa."organizationId", + o."displayName" AS "organizationName", + NULL AS title, + msa."dateStart", + msa."dateEnd", + NULL::timestamptz AS "createdAt", + false AS "isPrimaryWorkExperience", + 0 AS "memberCount", + msa."segmentId" + FROM "memberSegmentAffiliations" msa + JOIN organizations o ON msa."organizationId" = o.id + WHERE msa."memberId" IN ($(memberIds:csv)) + `, + { memberIds }, + ) +} + +// ─── Selection priority ─────────────────────────────────────────────────────── + +function longestDateRange(orgs: IWorkRow[]): IWorkRow { + const withDates = orgs.filter((r) => r.dateStart) + if (withDates.length === 0) return orgs[0] + + return withDates.reduce((best, curr) => { + const bestMs = + new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart ?? '').getTime() + const currMs = + new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart ?? '').getTime() + return currMs > bestMs ? curr : best + }) +} + +function selectPrimaryWorkExperience(orgs: IWorkRow[]): IWorkRow { + if (orgs.length === 1) return orgs[0] + + // 1. Manual affiliations (segmentId non-null) always win + const manual = orgs.filter((r) => r.segmentId !== null) + if (manual.length > 0) { + if (manual.length === 1) return manual[0] + return longestDateRange(manual) + } + + // 2. isPrimaryWorkExperience = true — prefer those with a dateStart + const primary = orgs.filter((r) => r.isPrimaryWorkExperience) + if (primary.length > 0) { + const withDates = primary.filter((r) => r.dateStart) + if (withDates.length > 0) return withDates[0] + return primary[0] + } + + // 3. Only one org has a dateStart — pick it + const withDates = orgs.filter((r) => r.dateStart) + if (withDates.length === 1) return withDates[0] + + // 4. Org with strictly more members wins; if tied, fall through + const sorted = [...orgs].sort((a, b) => b.memberCount - a.memberCount) + if (sorted.length >= 2 && sorted[0].memberCount > sorted[1].memberCount) { + return sorted[0] + } + + // 5. Longest date range as final tiebreaker + return longestDateRange(orgs) +} + +// ─── Timeline helpers ───────────────────────────────────────────────────────── + +/** Returns the org used to fill gaps — primary undated wins, then earliest-created undated. */ +function findFallbackOrg(rows: IWorkRow[]): IWorkRow | null { + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + if (primaryUndated) return primaryUndated + + return ( + rows + .filter((r) => !r.dateStart && !r.dateEnd) + .sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime()) + .at(0) ?? null + ) +} + +/** + * Collects all date boundaries from the dated rows, capped at today. + * Each dateStart and (dateEnd + 1 day) marks a point where active orgs can change. + */ +function collectBoundaries(datedRows: IWorkRow[]): Date[] { + const today = startOfDay(new Date()) + + const ms = new Set([today.getTime()]) + + for (const row of datedRows) { + const start = startOfDay(row.dateStart ?? '') + if (start <= today) ms.add(start.getTime()) + + if (row.dateEnd) { + const afterEnd = startOfDay(row.dateEnd) + afterEnd.setDate(afterEnd.getDate() + 1) + if (afterEnd <= today) ms.add(afterEnd.getTime()) + } + } + + return Array.from(ms) + .sort((a, b) => a - b) + .map((t) => new Date(t)) +} + +function orgsActiveAt(datedRows: IWorkRow[], boundaryDate: Date): IWorkRow[] { + return datedRows.filter((role) => { + const roleStart = startOfDay(role.dateStart ?? '') + const roleEnd = role.dateEnd ? startOfDay(role.dateEnd) : null + + // org is active if the boundary date falls within its employment period + return boundaryDate >= roleStart && (!roleEnd || boundaryDate <= roleEnd) + }) +} + +function startOfDay(date: Date | string): Date { + const d = new Date(date) + d.setHours(0, 0, 0, 0) + return d +} + +function dayBefore(date: Date): Date { + const d = new Date(date) + d.setDate(d.getDate() - 1) + return d +} + +function closeAffiliationWindow( + memberId: string, + affiliations: IAffiliationPeriod[], + org: IWorkRow, + windowStart: Date, + windowEnd: Date, +): void { + log.debug( + { + memberId, + org: org.organizationName, + windowStart: windowStart.toISOString(), + windowEnd: windowEnd.toISOString(), + }, + 'closing affiliation window', + ) + affiliations.push({ + organization: org.organizationName, + startDate: windowStart.toISOString(), + endDate: windowEnd.toISOString(), + }) +} + +/** Iterates boundary intervals and builds non-overlapping affiliation windows. */ +function buildTimeline( + memberId: string, + datedRows: IWorkRow[], + fallbackOrg: IWorkRow | null, + boundaries: Date[], +): IAffiliationPeriod[] { + const affiliations: IAffiliationPeriod[] = [] + let currentOrg: IWorkRow = null + let currentWindowStart: Date = null + let uncoveredPeriodStart: Date = null + + for (let i = 0; i < boundaries.length - 1; i++) { + const boundaryDate = boundaries[i] + const activeOrgsAtBoundary = orgsActiveAt(datedRows, boundaryDate) + + log.info( + { + memberId, + boundaryDate: boundaryDate.toISOString(), + orgsAtBoundary: activeOrgsAtBoundary.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + isPrimary: r.isPrimaryWorkExperience, + memberCount: r.memberCount, + isManual: r.segmentId !== null, + })), + }, + 'processing boundary', + ) + + // No orgs active at this boundary — close the current window and start tracking a gap + if (activeOrgsAtBoundary.length === 0) { + if (currentOrg && currentWindowStart) { + closeAffiliationWindow( + memberId, + affiliations, + currentOrg, + currentWindowStart, + dayBefore(boundaryDate), + ) + currentOrg = null + currentWindowStart = null + } + + if (uncoveredPeriodStart === null) { + uncoveredPeriodStart = boundaryDate + log.info( + { memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, + 'uncovered period started', + ) + } + + continue + } + + // Orgs are active again — close the uncovered period using the fallback org if available + if (uncoveredPeriodStart !== null) { + log.info( + { + memberId, + fallbackOrg: fallbackOrg?.organizationName ?? null, + uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), + uncoveredPeriodEnd: dayBefore(boundaryDate).toISOString(), + }, + 'closing uncovered period with fallback org', + ) + + if (fallbackOrg) { + closeAffiliationWindow( + memberId, + affiliations, + fallbackOrg, + uncoveredPeriodStart, + dayBefore(boundaryDate), + ) + } + + uncoveredPeriodStart = null + } + + const winningAffiliation = selectPrimaryWorkExperience(activeOrgsAtBoundary) + + // No current window open — start a new one with the winning org + if (!currentOrg) { + log.info( + { memberId, org: winningAffiliation.organizationName, from: boundaryDate.toISOString() }, + 'opening affiliation window', + ) + currentOrg = winningAffiliation + currentWindowStart = boundaryDate + continue + } + + // Winning org changed — close the current window and open a new one + if (currentOrg.organizationId !== winningAffiliation.organizationId) { + log.info( + { + memberId, + from: currentOrg.organizationName, + to: winningAffiliation.organizationName, + at: boundaryDate.toISOString(), + }, + 'affiliation changed', + ) + closeAffiliationWindow( + memberId, + affiliations, + currentOrg, + currentWindowStart ?? boundaryDate, + dayBefore(boundaryDate), + ) + currentOrg = winningAffiliation + currentWindowStart = boundaryDate + } + } + + // Close the last open window using the org's actual end date (null = ongoing) + if (currentOrg && currentWindowStart) { + const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null + log.info( + { + memberId, + org: currentOrg.organizationName, + start: currentWindowStart.toISOString(), + endDate, + }, + 'closing final affiliation window', + ) + affiliations.push({ + organization: currentOrg.organizationName, + startDate: currentWindowStart.toISOString(), + endDate, + }) + } + + // Close a trailing uncovered period using the fallback org (ongoing, no end date) + if (uncoveredPeriodStart !== null && fallbackOrg) { + log.info( + { + memberId, + fallbackOrg: fallbackOrg.organizationName, + uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), + }, + 'closing trailing uncovered period with fallback org', + ) + affiliations.push({ + organization: fallbackOrg.organizationName, + startDate: uncoveredPeriodStart.toISOString(), + endDate: null, + }) + } + + return affiliations +} + +// ─── Per-member resolution ──────────────────────────────────────────────────── + +function resolveAffiliationsForMember(memberId: string, rows: IWorkRow[]): IAffiliationPeriod[] { + log.debug( + { + memberId, + totalRows: rows.length, + rows: rows.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + isPrimary: r.isPrimaryWorkExperience, + memberCount: r.memberCount, + isManual: r.segmentId !== null, + })), + }, + 'resolving affiliations', + ) + + // If one undated org is marked primary, drop all other undated orgs to avoid infinite conflicts + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + const cleaned = primaryUndated + ? rows.filter((r) => r.dateStart || r.id === primaryUndated.id) + : rows + + if (cleaned.length < rows.length) { + log.debug( + { + memberId, + dropped: rows.length - cleaned.length, + keptPrimaryUndated: primaryUndated?.organizationName, + }, + 'dropped undated orgs (primary undated exists)', + ) + } + + const fallbackOrg = findFallbackOrg(cleaned) + const datedRows = cleaned.filter((r) => r.dateStart) + + log.debug( + { + memberId, + datedRows: datedRows.length, + undatedRows: cleaned.length - datedRows.length, + fallbackOrg: fallbackOrg?.organizationName ?? null, + datedRowsList: datedRows.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + })), + }, + 'prepared rows', + ) + + if (datedRows.length === 0) { + log.debug({ memberId }, 'no dated rows — returning empty affiliations') + return [] + } + + const boundaries = collectBoundaries(datedRows) + log.debug( + { + memberId, + boundaries: boundaries.length, + boundaryDates: boundaries.map((b) => b.toISOString()), + }, + 'collected boundaries', + ) + + const timeline = buildTimeline(memberId, datedRows, fallbackOrg, boundaries) + + log.debug( + { + memberId, + affiliations: timeline.length, + result: timeline.map((a) => ({ + org: a.organization, + startDate: a.startDate, + endDate: a.endDate, + })), + }, + 'timeline built', + ) + + return timeline.sort((a, b) => { + if (!a.startDate) return 1 + if (!b.startDate) return -1 + return new Date(b.startDate).getTime() - new Date(a.startDate).getTime() + }) +} + +// ─── Public bulk resolver ───────────────────────────────────────────────────── + +export async function resolveAffiliationsByMemberIds( + qx: QueryExecutor, + memberIds: string[], +): Promise> { + const [workExperiences, manualAffiliations] = await Promise.all([ + findWorkExperiencesBulk(qx, memberIds), + findManualAffiliationsBulk(qx, memberIds), + ]) + + const byMember = new Map() + for (const row of [...workExperiences, ...manualAffiliations]) { + const list = byMember.get(row.memberId) ?? [] + list.push(row) + byMember.set(row.memberId, list) + } + + const result = new Map() + for (const id of memberIds) { + result.set(id, resolveAffiliationsForMember(id, byMember.get(id) ?? [])) + } + return result +} diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index cc0e84eacc..01d1c267bd 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -1,11 +1,8 @@ -import { getServiceChildLogger } from '@crowd/logging' import { MemberIdentityType, PlatformType } from '@crowd/types' import { QueryExecutor } from '../queryExecutor' -const log = getServiceChildLogger('dev-stats:affiliations') - -const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] +// ─── Public interfaces ──────────────────────────────────────────────────────── export interface IDevStatsMemberRow { githubHandle: string @@ -13,28 +10,7 @@ export interface IDevStatsMemberRow { displayName: string | null } -export interface IDevStatsAffiliation { - organization: string - startDate: string | null - endDate: string | null -} - -interface IDevStatsWorkRow { - id: string - memberId: string - organizationId: string - organizationName: string - title: string | null - dateStart: string | null - dateEnd: string | null - createdAt: Date | string - isPrimaryWorkExperience: boolean - memberCount: number - /** null for memberOrganizations rows; non-null for memberSegmentAffiliations rows */ - segmentId: string | null -} - -// ─── Step 1: member lookup ──────────────────────────────────────────────────── +// ─── Step 1: member lookup by GitHub handle ─────────────────────────────────── export async function findMembersByGithubHandles( qx: QueryExecutor, @@ -84,479 +60,3 @@ export async function findVerifiedEmailsByMemberIds( }, ) } - -// ─── Step 3a: regular work experiences (bulk) ───────────────────────────────── - -async function findWorkExperiencesBulk( - qx: QueryExecutor, - memberIds: string[], -): Promise { - const rows: IDevStatsWorkRow[] = await qx.select( - ` - WITH aggs AS ( - SELECT - osa."organizationId", - sum(osa."memberCount") AS total_count - FROM "organizationSegmentsAgg" osa - WHERE osa."segmentId" IN ( - SELECT id FROM segments - WHERE "grandparentId" IS NOT NULL - AND "parentId" IS NOT NULL - ) - GROUP BY osa."organizationId" - ) - SELECT - mo.id, - mo."memberId", - mo."organizationId", - o."displayName" AS "organizationName", - mo.title, - mo."dateStart", - mo."dateEnd", - mo."createdAt", - COALESCE(ovr."isPrimaryWorkExperience", false) AS "isPrimaryWorkExperience", - COALESCE(a.total_count, 0) AS "memberCount", - NULL::text AS "segmentId" - FROM "memberOrganizations" mo - JOIN organizations o ON mo."organizationId" = o.id - LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id - LEFT JOIN aggs a ON a."organizationId" = mo."organizationId" - WHERE mo."memberId" IN ($(memberIds:csv)) - AND mo."deletedAt" IS NULL - AND COALESCE(ovr."allowAffiliation", true) = true - `, - { memberIds }, - ) - - return rows.filter( - (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title?.toLowerCase().includes(t)), - ) -} - -// ─── Step 3b: manual affiliations (bulk) ───────────────────────────────────── - -async function findManualAffiliationsBulk( - qx: QueryExecutor, - memberIds: string[], -): Promise { - return qx.select( - ` - SELECT - msa.id, - msa."memberId", - msa."organizationId", - o."displayName" AS "organizationName", - NULL AS title, - msa."dateStart", - msa."dateEnd", - NULL::timestamptz AS "createdAt", - false AS "isPrimaryWorkExperience", - 0 AS "memberCount", - msa."segmentId" - FROM "memberSegmentAffiliations" msa - JOIN organizations o ON msa."organizationId" = o.id - WHERE msa."memberId" IN ($(memberIds:csv)) - `, - { memberIds }, - ) -} - -function longestDateRange(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow { - const withDates = orgs.filter((r) => r.dateStart) - if (withDates.length === 0) return orgs[0] - - return withDates.reduce((best, curr) => { - const bestMs = - new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart ?? '').getTime() - const currMs = - new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart ?? '').getTime() - return currMs > bestMs ? curr : best - }) -} - -function selectPrimaryWorkExperience(orgs: IDevStatsWorkRow[]): IDevStatsWorkRow { - if (orgs.length === 1) return orgs[0] - - // 1. Manual affiliations (segmentId non-null) always win - const manual = orgs.filter((r) => r.segmentId !== null) - if (manual.length > 0) { - if (manual.length === 1) return manual[0] - return longestDateRange(manual) - } - - // 2. isPrimaryWorkExperience = true — prefer those with a dateStart - const primary = orgs.filter((r) => r.isPrimaryWorkExperience) - if (primary.length > 0) { - const withDates = primary.filter((r) => r.dateStart) - if (withDates.length > 0) return withDates[0] - return primary[0] - } - - // 3. Only one org has a dateStart — pick it - const withDates = orgs.filter((r) => r.dateStart) - if (withDates.length === 1) return withDates[0] - - // 4. Org with strictly more members wins; if tied, fall through - const sorted = [...orgs].sort((a, b) => b.memberCount - a.memberCount) - if (sorted.length >= 2 && sorted[0].memberCount > sorted[1].memberCount) { - return sorted[0] - } - - // 5. Longest date range as final tiebreaker - return longestDateRange(orgs) -} - -/** Returns the org used to fill gaps — primary undated wins, then earliest-created undated. */ -function findFallbackOrg(rows: IDevStatsWorkRow[]): IDevStatsWorkRow | null { - const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) - if (primaryUndated) return primaryUndated - - return ( - rows - .filter((r) => !r.dateStart && !r.dateEnd) - .sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime()) - .at(0) ?? null - ) -} - -/** - * Collects all date boundaries from the dated rows, capped at today. - * Each dateStart and (dateEnd + 1 day) marks a point where active orgs can change. - */ -function collectBoundaries(datedRows: IDevStatsWorkRow[]): Date[] { - const today = startOfDay(new Date()) - - const ms = new Set([today.getTime()]) - - for (const row of datedRows) { - const start = startOfDay(row.dateStart ?? '') - if (start <= today) ms.add(start.getTime()) - - if (row.dateEnd) { - const afterEnd = startOfDay(row.dateEnd) - afterEnd.setDate(afterEnd.getDate() + 1) - if (afterEnd <= today) ms.add(afterEnd.getTime()) - } - } - - return Array.from(ms) - .sort((a, b) => a - b) - .map((t) => new Date(t)) -} - -function orgsActiveAt(datedRows: IDevStatsWorkRow[], boundaryDate: Date): IDevStatsWorkRow[] { - return datedRows.filter((role) => { - const roleStart = startOfDay(role.dateStart ?? '') - const roleEnd = role.dateEnd ? startOfDay(role.dateEnd) : null - - // org is active if the boundary date falls within its employment period - return boundaryDate >= roleStart && (!roleEnd || boundaryDate <= roleEnd) - }) -} - -function startOfDay(date: Date | string): Date { - const d = new Date(date) - d.setHours(0, 0, 0, 0) - return d -} - -function dayBefore(date: Date): Date { - const d = new Date(date) - d.setDate(d.getDate() - 1) - return d -} - -function closeAffiliationWindow( - memberId: string, - affiliations: IDevStatsAffiliation[], - org: IDevStatsWorkRow, - windowStart: Date, - windowEnd: Date, -): void { - log.debug( - { - memberId, - org: org.organizationName, - windowStart: windowStart.toISOString(), - windowEnd: windowEnd.toISOString(), - }, - 'closing affiliation window', - ) - affiliations.push({ - organization: org.organizationName, - startDate: windowStart.toISOString(), - endDate: windowEnd.toISOString(), - }) -} - -/** Iterates boundary intervals and builds non-overlapping affiliation windows. */ -function buildTimeline( - memberId: string, - datedRows: IDevStatsWorkRow[], - fallbackOrg: IDevStatsWorkRow | null, - boundaries: Date[], -): IDevStatsAffiliation[] { - const affiliations: IDevStatsAffiliation[] = [] - let currentOrg: IDevStatsWorkRow = null - let currentWindowStart: Date = null - let uncoveredPeriodStart: Date = null - - for (let i = 0; i < boundaries.length - 1; i++) { - const boundaryDate = boundaries[i] - const activeOrgsAtBoundary = orgsActiveAt(datedRows, boundaryDate) - - log.info( - { - memberId, - boundaryDate: boundaryDate.toISOString(), - orgsAtBoundary: activeOrgsAtBoundary.map((r) => ({ - org: r.organizationName, - dateStart: r.dateStart, - dateEnd: r.dateEnd, - isPrimary: r.isPrimaryWorkExperience, - memberCount: r.memberCount, - isManual: r.segmentId !== null, - })), - }, - 'processing boundary', - ) - - // No orgs active at this boundary — close the current window and start tracking a gap - if (activeOrgsAtBoundary.length === 0) { - if (currentOrg && currentWindowStart) { - closeAffiliationWindow( - memberId, - affiliations, - currentOrg, - currentWindowStart, - dayBefore(boundaryDate), - ) - currentOrg = null - currentWindowStart = null - } - - if (uncoveredPeriodStart === null) { - uncoveredPeriodStart = boundaryDate - log.info( - { memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, - 'uncovered period started', - ) - } - - continue - } - - // Orgs are active again — close the uncovered period using the fallback org if available - if (uncoveredPeriodStart !== null) { - log.info( - { - memberId, - fallbackOrg: fallbackOrg?.organizationName ?? null, - uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), - uncoveredPeriodEnd: dayBefore(boundaryDate).toISOString(), - }, - 'closing uncovered period with fallback org', - ) - - if (fallbackOrg) { - closeAffiliationWindow( - memberId, - affiliations, - fallbackOrg, - uncoveredPeriodStart, - dayBefore(boundaryDate), - ) - } - - uncoveredPeriodStart = null - } - - const winningAffiliation = selectPrimaryWorkExperience(activeOrgsAtBoundary) - - // No current window open — start a new one with the winning org - if (!currentOrg) { - log.info( - { memberId, org: winningAffiliation.organizationName, from: boundaryDate.toISOString() }, - 'opening affiliation window', - ) - currentOrg = winningAffiliation - currentWindowStart = boundaryDate - continue - } - - // Winning org changed — close the current window and open a new one - if (currentOrg.organizationId !== winningAffiliation.organizationId) { - log.info( - { - memberId, - from: currentOrg.organizationName, - to: winningAffiliation.organizationName, - at: boundaryDate.toISOString(), - }, - 'affiliation changed', - ) - closeAffiliationWindow( - memberId, - affiliations, - currentOrg, - currentWindowStart ?? boundaryDate, - dayBefore(boundaryDate), - ) - currentOrg = winningAffiliation - currentWindowStart = boundaryDate - } - } - - // Close the last open window using the org's actual end date (null = ongoing) - if (currentOrg && currentWindowStart) { - const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null - log.info( - { - memberId, - org: currentOrg.organizationName, - start: currentWindowStart.toISOString(), - endDate, - }, - 'closing final affiliation window', - ) - affiliations.push({ - organization: currentOrg.organizationName, - startDate: currentWindowStart.toISOString(), - endDate, - }) - } - - // Close a trailing uncovered period using the fallback org (ongoing, no end date) - if (uncoveredPeriodStart !== null && fallbackOrg) { - log.info( - { - memberId, - fallbackOrg: fallbackOrg.organizationName, - uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), - }, - 'closing trailing uncovered period with fallback org', - ) - affiliations.push({ - organization: fallbackOrg.organizationName, - startDate: uncoveredPeriodStart.toISOString(), - endDate: null, - }) - } - - return affiliations -} - -function resolveAffiliationsForMember( - memberId: string, - rows: IDevStatsWorkRow[], -): IDevStatsAffiliation[] { - log.debug( - { - memberId, - totalRows: rows.length, - rows: rows.map((r) => ({ - org: r.organizationName, - dateStart: r.dateStart, - dateEnd: r.dateEnd, - isPrimary: r.isPrimaryWorkExperience, - memberCount: r.memberCount, - isManual: r.segmentId !== null, - })), - }, - 'resolving affiliations', - ) - - // If one undated org is marked primary, drop all other undated orgs to avoid infinite conflicts - const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) - const cleaned = primaryUndated - ? rows.filter((r) => r.dateStart || r.id === primaryUndated.id) - : rows - - if (cleaned.length < rows.length) { - log.debug( - { - memberId, - dropped: rows.length - cleaned.length, - keptPrimaryUndated: primaryUndated?.organizationName, - }, - 'dropped undated orgs (primary undated exists)', - ) - } - - const fallbackOrg = findFallbackOrg(cleaned) - const datedRows = cleaned.filter((r) => r.dateStart) - - log.debug( - { - memberId, - datedRows: datedRows.length, - undatedRows: cleaned.length - datedRows.length, - fallbackOrg: fallbackOrg?.organizationName ?? null, - datedRowsList: datedRows.map((r) => ({ - org: r.organizationName, - dateStart: r.dateStart, - dateEnd: r.dateEnd, - })), - }, - 'prepared rows', - ) - - if (datedRows.length === 0) { - log.debug({ memberId }, 'no dated rows — returning empty affiliations') - return [] - } - - const boundaries = collectBoundaries(datedRows) - log.debug( - { - memberId, - boundaries: boundaries.length, - boundaryDates: boundaries.map((b) => b.toISOString()), - }, - 'collected boundaries', - ) - - const timeline = buildTimeline(memberId, datedRows, fallbackOrg, boundaries) - - log.debug( - { - memberId, - affiliations: timeline.length, - result: timeline.map((a) => ({ - org: a.organization, - startDate: a.startDate, - endDate: a.endDate, - })), - }, - 'timeline built', - ) - - return timeline.sort((a, b) => { - if (!a.startDate) return 1 - if (!b.startDate) return -1 - return new Date(b.startDate).getTime() - new Date(a.startDate).getTime() - }) -} - -export async function resolveAffiliationsByMemberIds( - qx: QueryExecutor, - memberIds: string[], -): Promise> { - const [workExperiences, manualAffiliations] = await Promise.all([ - findWorkExperiencesBulk(qx, memberIds), - findManualAffiliationsBulk(qx, memberIds), - ]) - - const byMember = new Map() - for (const row of [...workExperiences, ...manualAffiliations]) { - const list = byMember.get(row.memberId) ?? [] - list.push(row) - byMember.set(row.memberId, list) - } - - const result = new Map() - for (const id of memberIds) { - result.set(id, resolveAffiliationsForMember(id, byMember.get(id) ?? [])) - } - return result -} diff --git a/services/libs/data-access-layer/src/index.ts b/services/libs/data-access-layer/src/index.ts index 459fa15495..1d092b26b6 100644 --- a/services/libs/data-access-layer/src/index.ts +++ b/services/libs/data-access-layer/src/index.ts @@ -1,4 +1,5 @@ export * from './activities' +export * from './affiliations' export * from './devStats' export * from './activityRelations' export * from './apiKeys' From 4e9bee6e0544f57b1c6e55c490d865cfff9e6bc5 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 17:18:16 +0100 Subject: [PATCH 24/26] fix: lint Signed-off-by: Umberto Sgueglia --- services/libs/data-access-layer/src/affiliations/resolve.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/services/libs/data-access-layer/src/affiliations/resolve.ts b/services/libs/data-access-layer/src/affiliations/resolve.ts index 374e879b1e..fcf64130ee 100644 --- a/services/libs/data-access-layer/src/affiliations/resolve.ts +++ b/services/libs/data-access-layer/src/affiliations/resolve.ts @@ -1,5 +1,4 @@ import { getServiceChildLogger } from '@crowd/logging' -import { MemberIdentityType, PlatformType } from '@crowd/types' import { QueryExecutor } from '../queryExecutor' From 67381e2f1a75a7c4360e0c50a39e8e8902211089 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 17:36:29 +0100 Subject: [PATCH 25/26] refactor: export affiliation on dal Signed-off-by: Umberto Sgueglia --- .../src/affiliations/index.ts | 510 ++++++++++++++++- .../src/affiliations/resolve.ts | 515 ------------------ .../data-access-layer/src/devStats/index.ts | 6 - 3 files changed, 509 insertions(+), 522 deletions(-) delete mode 100644 services/libs/data-access-layer/src/affiliations/resolve.ts diff --git a/services/libs/data-access-layer/src/affiliations/index.ts b/services/libs/data-access-layer/src/affiliations/index.ts index d0104e83ca..5a73d00ff5 100644 --- a/services/libs/data-access-layer/src/affiliations/index.ts +++ b/services/libs/data-access-layer/src/affiliations/index.ts @@ -1 +1,509 @@ -export * from './resolve' +import { getServiceChildLogger } from '@crowd/logging' + +import { QueryExecutor } from '../queryExecutor' + +const log = getServiceChildLogger('affiliations:resolve') + +const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] + +export interface IAffiliationPeriod { + organization: string + startDate: string | null + endDate: string | null +} + +interface IWorkRow { + id: string + memberId: string + organizationId: string + organizationName: string + title: string | null + dateStart: string | null + dateEnd: string | null + createdAt: Date | string + isPrimaryWorkExperience: boolean + memberCount: number + /** null for memberOrganizations rows; non-null for memberSegmentAffiliations rows */ + segmentId: string | null +} + +// ─── Query: regular work experiences (bulk) ────────────────────────────────── + +export async function findWorkExperiencesBulk( + qx: QueryExecutor, + memberIds: string[], +): Promise { + const rows: IWorkRow[] = await qx.select( + ` + WITH aggs AS ( + SELECT + osa."organizationId", + sum(osa."memberCount") AS total_count + FROM "organizationSegmentsAgg" osa + WHERE osa."segmentId" IN ( + SELECT id FROM segments + WHERE "grandparentId" IS NOT NULL + AND "parentId" IS NOT NULL + ) + GROUP BY osa."organizationId" + ) + SELECT + mo.id, + mo."memberId", + mo."organizationId", + o."displayName" AS "organizationName", + mo.title, + mo."dateStart", + mo."dateEnd", + mo."createdAt", + COALESCE(ovr."isPrimaryWorkExperience", false) AS "isPrimaryWorkExperience", + COALESCE(a.total_count, 0) AS "memberCount", + NULL::text AS "segmentId" + FROM "memberOrganizations" mo + JOIN organizations o ON mo."organizationId" = o.id + LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id + LEFT JOIN aggs a ON a."organizationId" = mo."organizationId" + WHERE mo."memberId" IN ($(memberIds:csv)) + AND mo."deletedAt" IS NULL + AND COALESCE(ovr."allowAffiliation", true) = true + `, + { memberIds }, + ) + + return rows.filter( + (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title?.toLowerCase().includes(t)), + ) +} + +// ─── Query: manual affiliations (bulk) ─────────────────────────────────────── + +export async function findManualAffiliationsBulk( + qx: QueryExecutor, + memberIds: string[], +): Promise { + return qx.select( + ` + SELECT + msa.id, + msa."memberId", + msa."organizationId", + o."displayName" AS "organizationName", + NULL AS title, + msa."dateStart", + msa."dateEnd", + NULL::timestamptz AS "createdAt", + false AS "isPrimaryWorkExperience", + 0 AS "memberCount", + msa."segmentId" + FROM "memberSegmentAffiliations" msa + JOIN organizations o ON msa."organizationId" = o.id + WHERE msa."memberId" IN ($(memberIds:csv)) + `, + { memberIds }, + ) +} + +// ─── Selection priority ─────────────────────────────────────────────────────── + +function longestDateRange(orgs: IWorkRow[]): IWorkRow { + const withDates = orgs.filter((r) => r.dateStart) + if (withDates.length === 0) return orgs[0] + + return withDates.reduce((best, curr) => { + const bestMs = + new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart ?? '').getTime() + const currMs = + new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart ?? '').getTime() + return currMs > bestMs ? curr : best + }) +} + +function selectPrimaryWorkExperience(orgs: IWorkRow[]): IWorkRow { + if (orgs.length === 1) return orgs[0] + + // 1. Manual affiliations (segmentId non-null) always win + const manual = orgs.filter((r) => r.segmentId !== null) + if (manual.length > 0) { + if (manual.length === 1) return manual[0] + return longestDateRange(manual) + } + + // 2. isPrimaryWorkExperience = true — prefer those with a dateStart + const primary = orgs.filter((r) => r.isPrimaryWorkExperience) + if (primary.length > 0) { + const withDates = primary.filter((r) => r.dateStart) + if (withDates.length > 0) return withDates[0] + return primary[0] + } + + // 3. Only one org has a dateStart — pick it + const withDates = orgs.filter((r) => r.dateStart) + if (withDates.length === 1) return withDates[0] + + // 4. Org with strictly more members wins; if tied, fall through + const sorted = [...orgs].sort((a, b) => b.memberCount - a.memberCount) + if (sorted.length >= 2 && sorted[0].memberCount > sorted[1].memberCount) { + return sorted[0] + } + + // 5. Longest date range as final tiebreaker + return longestDateRange(orgs) +} + +// ─── Timeline helpers ───────────────────────────────────────────────────────── + +/** Returns the org used to fill gaps — primary undated wins, then earliest-created undated. */ +function findFallbackOrg(rows: IWorkRow[]): IWorkRow | null { + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + if (primaryUndated) return primaryUndated + + return ( + rows + .filter((r) => !r.dateStart && !r.dateEnd) + .sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime()) + .at(0) ?? null + ) +} + +/** + * Collects all date boundaries from the dated rows, capped at today. + * Each dateStart and (dateEnd + 1 day) marks a point where active orgs can change. + */ +function collectBoundaries(datedRows: IWorkRow[]): Date[] { + const today = startOfDay(new Date()) + + const ms = new Set([today.getTime()]) + + for (const row of datedRows) { + const start = startOfDay(row.dateStart ?? '') + if (start <= today) ms.add(start.getTime()) + + if (row.dateEnd) { + const afterEnd = startOfDay(row.dateEnd) + afterEnd.setDate(afterEnd.getDate() + 1) + if (afterEnd <= today) ms.add(afterEnd.getTime()) + } + } + + return Array.from(ms) + .sort((a, b) => a - b) + .map((t) => new Date(t)) +} + +function orgsActiveAt(datedRows: IWorkRow[], boundaryDate: Date): IWorkRow[] { + return datedRows.filter((role) => { + const roleStart = startOfDay(role.dateStart ?? '') + const roleEnd = role.dateEnd ? startOfDay(role.dateEnd) : null + + // org is active if the boundary date falls within its employment period + return boundaryDate >= roleStart && (!roleEnd || boundaryDate <= roleEnd) + }) +} + +function startOfDay(date: Date | string): Date { + const d = new Date(date) + d.setHours(0, 0, 0, 0) + return d +} + +function dayBefore(date: Date): Date { + const d = new Date(date) + d.setDate(d.getDate() - 1) + return d +} + +function closeAffiliationWindow( + memberId: string, + affiliations: IAffiliationPeriod[], + org: IWorkRow, + windowStart: Date, + windowEnd: Date, +): void { + log.debug( + { + memberId, + org: org.organizationName, + windowStart: windowStart.toISOString(), + windowEnd: windowEnd.toISOString(), + }, + 'closing affiliation window', + ) + affiliations.push({ + organization: org.organizationName, + startDate: windowStart.toISOString(), + endDate: windowEnd.toISOString(), + }) +} + +/** Iterates boundary intervals and builds non-overlapping affiliation windows. */ +function buildTimeline( + memberId: string, + datedRows: IWorkRow[], + fallbackOrg: IWorkRow | null, + boundaries: Date[], +): IAffiliationPeriod[] { + const affiliations: IAffiliationPeriod[] = [] + let currentOrg: IWorkRow = null + let currentWindowStart: Date = null + let uncoveredPeriodStart: Date = null + + for (let i = 0; i < boundaries.length - 1; i++) { + const boundaryDate = boundaries[i] + const activeOrgsAtBoundary = orgsActiveAt(datedRows, boundaryDate) + + log.info( + { + memberId, + boundaryDate: boundaryDate.toISOString(), + orgsAtBoundary: activeOrgsAtBoundary.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + isPrimary: r.isPrimaryWorkExperience, + memberCount: r.memberCount, + isManual: r.segmentId !== null, + })), + }, + 'processing boundary', + ) + + // No orgs active at this boundary — close the current window and start tracking a gap + if (activeOrgsAtBoundary.length === 0) { + if (currentOrg && currentWindowStart) { + closeAffiliationWindow( + memberId, + affiliations, + currentOrg, + currentWindowStart, + dayBefore(boundaryDate), + ) + currentOrg = null + currentWindowStart = null + } + + if (uncoveredPeriodStart === null) { + uncoveredPeriodStart = boundaryDate + log.info( + { memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, + 'uncovered period started', + ) + } + + continue + } + + // Orgs are active again — close the uncovered period using the fallback org if available + if (uncoveredPeriodStart !== null) { + log.info( + { + memberId, + fallbackOrg: fallbackOrg?.organizationName ?? null, + uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), + uncoveredPeriodEnd: dayBefore(boundaryDate).toISOString(), + }, + 'closing uncovered period with fallback org', + ) + + if (fallbackOrg) { + closeAffiliationWindow( + memberId, + affiliations, + fallbackOrg, + uncoveredPeriodStart, + dayBefore(boundaryDate), + ) + } + + uncoveredPeriodStart = null + } + + const winningAffiliation = selectPrimaryWorkExperience(activeOrgsAtBoundary) + + // No current window open — start a new one with the winning org + if (!currentOrg) { + log.info( + { memberId, org: winningAffiliation.organizationName, from: boundaryDate.toISOString() }, + 'opening affiliation window', + ) + currentOrg = winningAffiliation + currentWindowStart = boundaryDate + continue + } + + // Winning org changed — close the current window and open a new one + if (currentOrg.organizationId !== winningAffiliation.organizationId) { + log.info( + { + memberId, + from: currentOrg.organizationName, + to: winningAffiliation.organizationName, + at: boundaryDate.toISOString(), + }, + 'affiliation changed', + ) + closeAffiliationWindow( + memberId, + affiliations, + currentOrg, + currentWindowStart ?? boundaryDate, + dayBefore(boundaryDate), + ) + currentOrg = winningAffiliation + currentWindowStart = boundaryDate + } + } + + // Close the last open window using the org's actual end date (null = ongoing) + if (currentOrg && currentWindowStart) { + const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null + log.info( + { + memberId, + org: currentOrg.organizationName, + start: currentWindowStart.toISOString(), + endDate, + }, + 'closing final affiliation window', + ) + affiliations.push({ + organization: currentOrg.organizationName, + startDate: currentWindowStart.toISOString(), + endDate, + }) + } + + // Close a trailing uncovered period using the fallback org (ongoing, no end date) + if (uncoveredPeriodStart !== null && fallbackOrg) { + log.info( + { + memberId, + fallbackOrg: fallbackOrg.organizationName, + uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), + }, + 'closing trailing uncovered period with fallback org', + ) + affiliations.push({ + organization: fallbackOrg.organizationName, + startDate: uncoveredPeriodStart.toISOString(), + endDate: null, + }) + } + + return affiliations +} + +// ─── Per-member resolution ──────────────────────────────────────────────────── + +function resolveAffiliationsForMember(memberId: string, rows: IWorkRow[]): IAffiliationPeriod[] { + log.debug( + { + memberId, + totalRows: rows.length, + rows: rows.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + isPrimary: r.isPrimaryWorkExperience, + memberCount: r.memberCount, + isManual: r.segmentId !== null, + })), + }, + 'resolving affiliations', + ) + + // If one undated org is marked primary, drop all other undated orgs to avoid infinite conflicts + const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) + const cleaned = primaryUndated + ? rows.filter((r) => r.dateStart || r.id === primaryUndated.id) + : rows + + if (cleaned.length < rows.length) { + log.debug( + { + memberId, + dropped: rows.length - cleaned.length, + keptPrimaryUndated: primaryUndated?.organizationName, + }, + 'dropped undated orgs (primary undated exists)', + ) + } + + const fallbackOrg = findFallbackOrg(cleaned) + const datedRows = cleaned.filter((r) => r.dateStart) + + log.debug( + { + memberId, + datedRows: datedRows.length, + undatedRows: cleaned.length - datedRows.length, + fallbackOrg: fallbackOrg?.organizationName ?? null, + datedRowsList: datedRows.map((r) => ({ + org: r.organizationName, + dateStart: r.dateStart, + dateEnd: r.dateEnd, + })), + }, + 'prepared rows', + ) + + if (datedRows.length === 0) { + log.debug({ memberId }, 'no dated rows — returning empty affiliations') + return [] + } + + const boundaries = collectBoundaries(datedRows) + log.debug( + { + memberId, + boundaries: boundaries.length, + boundaryDates: boundaries.map((b) => b.toISOString()), + }, + 'collected boundaries', + ) + + const timeline = buildTimeline(memberId, datedRows, fallbackOrg, boundaries) + + log.debug( + { + memberId, + affiliations: timeline.length, + result: timeline.map((a) => ({ + org: a.organization, + startDate: a.startDate, + endDate: a.endDate, + })), + }, + 'timeline built', + ) + + return timeline.sort((a, b) => { + if (!a.startDate) return 1 + if (!b.startDate) return -1 + return new Date(b.startDate).getTime() - new Date(a.startDate).getTime() + }) +} + +// ─── Public bulk resolver ───────────────────────────────────────────────────── + +export async function resolveAffiliationsByMemberIds( + qx: QueryExecutor, + memberIds: string[], +): Promise> { + const [workExperiences, manualAffiliations] = await Promise.all([ + findWorkExperiencesBulk(qx, memberIds), + findManualAffiliationsBulk(qx, memberIds), + ]) + + const byMember = new Map() + for (const row of [...workExperiences, ...manualAffiliations]) { + const list = byMember.get(row.memberId) ?? [] + list.push(row) + byMember.set(row.memberId, list) + } + + const result = new Map() + for (const id of memberIds) { + result.set(id, resolveAffiliationsForMember(id, byMember.get(id) ?? [])) + } + return result +} diff --git a/services/libs/data-access-layer/src/affiliations/resolve.ts b/services/libs/data-access-layer/src/affiliations/resolve.ts deleted file mode 100644 index fcf64130ee..0000000000 --- a/services/libs/data-access-layer/src/affiliations/resolve.ts +++ /dev/null @@ -1,515 +0,0 @@ -import { getServiceChildLogger } from '@crowd/logging' - -import { QueryExecutor } from '../queryExecutor' - -const log = getServiceChildLogger('affiliations:resolve') - -// ─── Constants ──────────────────────────────────────────────────────────────── - -const BLACKLISTED_TITLES = ['investor', 'mentor', 'board member'] - -// ─── Public interfaces ──────────────────────────────────────────────────────── - -export interface IAffiliationPeriod { - organization: string - startDate: string | null - endDate: string | null -} - -// ─── Internal row type (union of memberOrganizations + manual affiliations) ─── - -interface IWorkRow { - id: string - memberId: string - organizationId: string - organizationName: string - title: string | null - dateStart: string | null - dateEnd: string | null - createdAt: Date | string - isPrimaryWorkExperience: boolean - memberCount: number - /** null for memberOrganizations rows; non-null for memberSegmentAffiliations rows */ - segmentId: string | null -} - -// ─── Query: regular work experiences (bulk) ────────────────────────────────── - -export async function findWorkExperiencesBulk( - qx: QueryExecutor, - memberIds: string[], -): Promise { - const rows: IWorkRow[] = await qx.select( - ` - WITH aggs AS ( - SELECT - osa."organizationId", - sum(osa."memberCount") AS total_count - FROM "organizationSegmentsAgg" osa - WHERE osa."segmentId" IN ( - SELECT id FROM segments - WHERE "grandparentId" IS NOT NULL - AND "parentId" IS NOT NULL - ) - GROUP BY osa."organizationId" - ) - SELECT - mo.id, - mo."memberId", - mo."organizationId", - o."displayName" AS "organizationName", - mo.title, - mo."dateStart", - mo."dateEnd", - mo."createdAt", - COALESCE(ovr."isPrimaryWorkExperience", false) AS "isPrimaryWorkExperience", - COALESCE(a.total_count, 0) AS "memberCount", - NULL::text AS "segmentId" - FROM "memberOrganizations" mo - JOIN organizations o ON mo."organizationId" = o.id - LEFT JOIN "memberOrganizationAffiliationOverrides" ovr ON ovr."memberOrganizationId" = mo.id - LEFT JOIN aggs a ON a."organizationId" = mo."organizationId" - WHERE mo."memberId" IN ($(memberIds:csv)) - AND mo."deletedAt" IS NULL - AND COALESCE(ovr."allowAffiliation", true) = true - `, - { memberIds }, - ) - - return rows.filter( - (r) => !r.title || !BLACKLISTED_TITLES.some((t) => r.title?.toLowerCase().includes(t)), - ) -} - -// ─── Query: manual affiliations (bulk) ─────────────────────────────────────── - -export async function findManualAffiliationsBulk( - qx: QueryExecutor, - memberIds: string[], -): Promise { - return qx.select( - ` - SELECT - msa.id, - msa."memberId", - msa."organizationId", - o."displayName" AS "organizationName", - NULL AS title, - msa."dateStart", - msa."dateEnd", - NULL::timestamptz AS "createdAt", - false AS "isPrimaryWorkExperience", - 0 AS "memberCount", - msa."segmentId" - FROM "memberSegmentAffiliations" msa - JOIN organizations o ON msa."organizationId" = o.id - WHERE msa."memberId" IN ($(memberIds:csv)) - `, - { memberIds }, - ) -} - -// ─── Selection priority ─────────────────────────────────────────────────────── - -function longestDateRange(orgs: IWorkRow[]): IWorkRow { - const withDates = orgs.filter((r) => r.dateStart) - if (withDates.length === 0) return orgs[0] - - return withDates.reduce((best, curr) => { - const bestMs = - new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart ?? '').getTime() - const currMs = - new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart ?? '').getTime() - return currMs > bestMs ? curr : best - }) -} - -function selectPrimaryWorkExperience(orgs: IWorkRow[]): IWorkRow { - if (orgs.length === 1) return orgs[0] - - // 1. Manual affiliations (segmentId non-null) always win - const manual = orgs.filter((r) => r.segmentId !== null) - if (manual.length > 0) { - if (manual.length === 1) return manual[0] - return longestDateRange(manual) - } - - // 2. isPrimaryWorkExperience = true — prefer those with a dateStart - const primary = orgs.filter((r) => r.isPrimaryWorkExperience) - if (primary.length > 0) { - const withDates = primary.filter((r) => r.dateStart) - if (withDates.length > 0) return withDates[0] - return primary[0] - } - - // 3. Only one org has a dateStart — pick it - const withDates = orgs.filter((r) => r.dateStart) - if (withDates.length === 1) return withDates[0] - - // 4. Org with strictly more members wins; if tied, fall through - const sorted = [...orgs].sort((a, b) => b.memberCount - a.memberCount) - if (sorted.length >= 2 && sorted[0].memberCount > sorted[1].memberCount) { - return sorted[0] - } - - // 5. Longest date range as final tiebreaker - return longestDateRange(orgs) -} - -// ─── Timeline helpers ───────────────────────────────────────────────────────── - -/** Returns the org used to fill gaps — primary undated wins, then earliest-created undated. */ -function findFallbackOrg(rows: IWorkRow[]): IWorkRow | null { - const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) - if (primaryUndated) return primaryUndated - - return ( - rows - .filter((r) => !r.dateStart && !r.dateEnd) - .sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime()) - .at(0) ?? null - ) -} - -/** - * Collects all date boundaries from the dated rows, capped at today. - * Each dateStart and (dateEnd + 1 day) marks a point where active orgs can change. - */ -function collectBoundaries(datedRows: IWorkRow[]): Date[] { - const today = startOfDay(new Date()) - - const ms = new Set([today.getTime()]) - - for (const row of datedRows) { - const start = startOfDay(row.dateStart ?? '') - if (start <= today) ms.add(start.getTime()) - - if (row.dateEnd) { - const afterEnd = startOfDay(row.dateEnd) - afterEnd.setDate(afterEnd.getDate() + 1) - if (afterEnd <= today) ms.add(afterEnd.getTime()) - } - } - - return Array.from(ms) - .sort((a, b) => a - b) - .map((t) => new Date(t)) -} - -function orgsActiveAt(datedRows: IWorkRow[], boundaryDate: Date): IWorkRow[] { - return datedRows.filter((role) => { - const roleStart = startOfDay(role.dateStart ?? '') - const roleEnd = role.dateEnd ? startOfDay(role.dateEnd) : null - - // org is active if the boundary date falls within its employment period - return boundaryDate >= roleStart && (!roleEnd || boundaryDate <= roleEnd) - }) -} - -function startOfDay(date: Date | string): Date { - const d = new Date(date) - d.setHours(0, 0, 0, 0) - return d -} - -function dayBefore(date: Date): Date { - const d = new Date(date) - d.setDate(d.getDate() - 1) - return d -} - -function closeAffiliationWindow( - memberId: string, - affiliations: IAffiliationPeriod[], - org: IWorkRow, - windowStart: Date, - windowEnd: Date, -): void { - log.debug( - { - memberId, - org: org.organizationName, - windowStart: windowStart.toISOString(), - windowEnd: windowEnd.toISOString(), - }, - 'closing affiliation window', - ) - affiliations.push({ - organization: org.organizationName, - startDate: windowStart.toISOString(), - endDate: windowEnd.toISOString(), - }) -} - -/** Iterates boundary intervals and builds non-overlapping affiliation windows. */ -function buildTimeline( - memberId: string, - datedRows: IWorkRow[], - fallbackOrg: IWorkRow | null, - boundaries: Date[], -): IAffiliationPeriod[] { - const affiliations: IAffiliationPeriod[] = [] - let currentOrg: IWorkRow = null - let currentWindowStart: Date = null - let uncoveredPeriodStart: Date = null - - for (let i = 0; i < boundaries.length - 1; i++) { - const boundaryDate = boundaries[i] - const activeOrgsAtBoundary = orgsActiveAt(datedRows, boundaryDate) - - log.info( - { - memberId, - boundaryDate: boundaryDate.toISOString(), - orgsAtBoundary: activeOrgsAtBoundary.map((r) => ({ - org: r.organizationName, - dateStart: r.dateStart, - dateEnd: r.dateEnd, - isPrimary: r.isPrimaryWorkExperience, - memberCount: r.memberCount, - isManual: r.segmentId !== null, - })), - }, - 'processing boundary', - ) - - // No orgs active at this boundary — close the current window and start tracking a gap - if (activeOrgsAtBoundary.length === 0) { - if (currentOrg && currentWindowStart) { - closeAffiliationWindow( - memberId, - affiliations, - currentOrg, - currentWindowStart, - dayBefore(boundaryDate), - ) - currentOrg = null - currentWindowStart = null - } - - if (uncoveredPeriodStart === null) { - uncoveredPeriodStart = boundaryDate - log.info( - { memberId, uncoveredPeriodStart: boundaryDate.toISOString() }, - 'uncovered period started', - ) - } - - continue - } - - // Orgs are active again — close the uncovered period using the fallback org if available - if (uncoveredPeriodStart !== null) { - log.info( - { - memberId, - fallbackOrg: fallbackOrg?.organizationName ?? null, - uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), - uncoveredPeriodEnd: dayBefore(boundaryDate).toISOString(), - }, - 'closing uncovered period with fallback org', - ) - - if (fallbackOrg) { - closeAffiliationWindow( - memberId, - affiliations, - fallbackOrg, - uncoveredPeriodStart, - dayBefore(boundaryDate), - ) - } - - uncoveredPeriodStart = null - } - - const winningAffiliation = selectPrimaryWorkExperience(activeOrgsAtBoundary) - - // No current window open — start a new one with the winning org - if (!currentOrg) { - log.info( - { memberId, org: winningAffiliation.organizationName, from: boundaryDate.toISOString() }, - 'opening affiliation window', - ) - currentOrg = winningAffiliation - currentWindowStart = boundaryDate - continue - } - - // Winning org changed — close the current window and open a new one - if (currentOrg.organizationId !== winningAffiliation.organizationId) { - log.info( - { - memberId, - from: currentOrg.organizationName, - to: winningAffiliation.organizationName, - at: boundaryDate.toISOString(), - }, - 'affiliation changed', - ) - closeAffiliationWindow( - memberId, - affiliations, - currentOrg, - currentWindowStart ?? boundaryDate, - dayBefore(boundaryDate), - ) - currentOrg = winningAffiliation - currentWindowStart = boundaryDate - } - } - - // Close the last open window using the org's actual end date (null = ongoing) - if (currentOrg && currentWindowStart) { - const endDate = currentOrg.dateEnd ? new Date(currentOrg.dateEnd).toISOString() : null - log.info( - { - memberId, - org: currentOrg.organizationName, - start: currentWindowStart.toISOString(), - endDate, - }, - 'closing final affiliation window', - ) - affiliations.push({ - organization: currentOrg.organizationName, - startDate: currentWindowStart.toISOString(), - endDate, - }) - } - - // Close a trailing uncovered period using the fallback org (ongoing, no end date) - if (uncoveredPeriodStart !== null && fallbackOrg) { - log.info( - { - memberId, - fallbackOrg: fallbackOrg.organizationName, - uncoveredPeriodStart: uncoveredPeriodStart.toISOString(), - }, - 'closing trailing uncovered period with fallback org', - ) - affiliations.push({ - organization: fallbackOrg.organizationName, - startDate: uncoveredPeriodStart.toISOString(), - endDate: null, - }) - } - - return affiliations -} - -// ─── Per-member resolution ──────────────────────────────────────────────────── - -function resolveAffiliationsForMember(memberId: string, rows: IWorkRow[]): IAffiliationPeriod[] { - log.debug( - { - memberId, - totalRows: rows.length, - rows: rows.map((r) => ({ - org: r.organizationName, - dateStart: r.dateStart, - dateEnd: r.dateEnd, - isPrimary: r.isPrimaryWorkExperience, - memberCount: r.memberCount, - isManual: r.segmentId !== null, - })), - }, - 'resolving affiliations', - ) - - // If one undated org is marked primary, drop all other undated orgs to avoid infinite conflicts - const primaryUndated = rows.find((r) => r.isPrimaryWorkExperience && !r.dateStart && !r.dateEnd) - const cleaned = primaryUndated - ? rows.filter((r) => r.dateStart || r.id === primaryUndated.id) - : rows - - if (cleaned.length < rows.length) { - log.debug( - { - memberId, - dropped: rows.length - cleaned.length, - keptPrimaryUndated: primaryUndated?.organizationName, - }, - 'dropped undated orgs (primary undated exists)', - ) - } - - const fallbackOrg = findFallbackOrg(cleaned) - const datedRows = cleaned.filter((r) => r.dateStart) - - log.debug( - { - memberId, - datedRows: datedRows.length, - undatedRows: cleaned.length - datedRows.length, - fallbackOrg: fallbackOrg?.organizationName ?? null, - datedRowsList: datedRows.map((r) => ({ - org: r.organizationName, - dateStart: r.dateStart, - dateEnd: r.dateEnd, - })), - }, - 'prepared rows', - ) - - if (datedRows.length === 0) { - log.debug({ memberId }, 'no dated rows — returning empty affiliations') - return [] - } - - const boundaries = collectBoundaries(datedRows) - log.debug( - { - memberId, - boundaries: boundaries.length, - boundaryDates: boundaries.map((b) => b.toISOString()), - }, - 'collected boundaries', - ) - - const timeline = buildTimeline(memberId, datedRows, fallbackOrg, boundaries) - - log.debug( - { - memberId, - affiliations: timeline.length, - result: timeline.map((a) => ({ - org: a.organization, - startDate: a.startDate, - endDate: a.endDate, - })), - }, - 'timeline built', - ) - - return timeline.sort((a, b) => { - if (!a.startDate) return 1 - if (!b.startDate) return -1 - return new Date(b.startDate).getTime() - new Date(a.startDate).getTime() - }) -} - -// ─── Public bulk resolver ───────────────────────────────────────────────────── - -export async function resolveAffiliationsByMemberIds( - qx: QueryExecutor, - memberIds: string[], -): Promise> { - const [workExperiences, manualAffiliations] = await Promise.all([ - findWorkExperiencesBulk(qx, memberIds), - findManualAffiliationsBulk(qx, memberIds), - ]) - - const byMember = new Map() - for (const row of [...workExperiences, ...manualAffiliations]) { - const list = byMember.get(row.memberId) ?? [] - list.push(row) - byMember.set(row.memberId, list) - } - - const result = new Map() - for (const id of memberIds) { - result.set(id, resolveAffiliationsForMember(id, byMember.get(id) ?? [])) - } - return result -} diff --git a/services/libs/data-access-layer/src/devStats/index.ts b/services/libs/data-access-layer/src/devStats/index.ts index 01d1c267bd..166d1b9992 100644 --- a/services/libs/data-access-layer/src/devStats/index.ts +++ b/services/libs/data-access-layer/src/devStats/index.ts @@ -2,16 +2,12 @@ import { MemberIdentityType, PlatformType } from '@crowd/types' import { QueryExecutor } from '../queryExecutor' -// ─── Public interfaces ──────────────────────────────────────────────────────── - export interface IDevStatsMemberRow { githubHandle: string memberId: string displayName: string | null } -// ─── Step 1: member lookup by GitHub handle ─────────────────────────────────── - export async function findMembersByGithubHandles( qx: QueryExecutor, lowercasedHandles: string[], @@ -39,8 +35,6 @@ export async function findMembersByGithubHandles( ) } -// ─── Step 2: verified emails ────────────────────────────────────────────────── - export async function findVerifiedEmailsByMemberIds( qx: QueryExecutor, memberIds: string[], From 9970c769917d724a2aa61e5fd5acca8c0dd8fc80 Mon Sep 17 00:00:00 2001 From: Umberto Sgueglia Date: Mon, 23 Mar 2026 17:47:29 +0100 Subject: [PATCH 26/26] refactor: simplify longestDateRange Signed-off-by: Umberto Sgueglia --- .../src/affiliations/index.ts | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/services/libs/data-access-layer/src/affiliations/index.ts b/services/libs/data-access-layer/src/affiliations/index.ts index 5a73d00ff5..3eaaf111df 100644 --- a/services/libs/data-access-layer/src/affiliations/index.ts +++ b/services/libs/data-access-layer/src/affiliations/index.ts @@ -27,8 +27,6 @@ interface IWorkRow { segmentId: string | null } -// ─── Query: regular work experiences (bulk) ────────────────────────────────── - export async function findWorkExperiencesBulk( qx: QueryExecutor, memberIds: string[], @@ -75,8 +73,6 @@ export async function findWorkExperiencesBulk( ) } -// ─── Query: manual affiliations (bulk) ─────────────────────────────────────── - export async function findManualAffiliationsBulk( qx: QueryExecutor, memberIds: string[], @@ -105,17 +101,23 @@ export async function findManualAffiliationsBulk( // ─── Selection priority ─────────────────────────────────────────────────────── +function durationMs(org: IWorkRow): number { + const start = new Date(org.dateStart ?? '').getTime() + const end = new Date(org.dateEnd ?? '9999-12-31').getTime() + return end - start +} + function longestDateRange(orgs: IWorkRow[]): IWorkRow { const withDates = orgs.filter((r) => r.dateStart) - if (withDates.length === 0) return orgs[0] - - return withDates.reduce((best, curr) => { - const bestMs = - new Date(best.dateEnd ?? '9999-12-31').getTime() - new Date(best.dateStart ?? '').getTime() - const currMs = - new Date(curr.dateEnd ?? '9999-12-31').getTime() - new Date(curr.dateStart ?? '').getTime() - return currMs > bestMs ? curr : best - }) + const candidates = withDates.length > 0 ? withDates : orgs + + let best = candidates[0] + + for (const org of candidates) { + if (durationMs(org) > durationMs(best)) best = org + } + + return best } function selectPrimaryWorkExperience(orgs: IWorkRow[]): IWorkRow {