From 86ca717faf9533f5236126da595a1098a5c6a46d Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 18:48:17 +0900 Subject: [PATCH 01/16] Add OTel metrics for key lookup and document fetch Introduce five new OpenTelemetry instruments in FederationMetrics, with bounded attribute taxonomies so operators get aggregate visibility into public-key lookups and remote JSON-LD document fetches without exploding metric cardinality: - activitypub.key.lookup (counter) - activitypub.key.lookup.duration (histogram) - activitypub.document.fetch (counter) - activitypub.document.fetch.duration (histogram) - activitypub.document.cache (counter) The taxonomy is encoded as the LookupKind and LookupResult union types, together with the narrower DocumentFetchKind / DocumentFetchResult / KeyLookupResult aliases so the type system rejects impossible combinations (e.g. recordDocumentFetch with kind public_key, or a key lookup ending in cache miss). recordKeyLookup, recordDocumentFetch, and recordDocumentCache take a remoteUrl: URL rather than a host string and derive activitypub.remote.host through the existing getRemoteHost() helper, so callers cannot accidentally smuggle key IDs, actor URLs, or full URLs into a metric attribute. classifyFetchError centralises the result classification, mapping FetchError responses to not_found (404 / 410) or error with their status code and treating FetchError without a Response, bare TypeError (the shape native fetch() raises on DNS / connect / TLS failures), and AbortError as network_error. Counter and histogram are always emitted together on document fetches so aggregate rate and latency views stay in sync. Only the instrument and helper plumbing lands in this commit; call-site wiring in fetchKey, the document loader, kvCache, and the Federation middleware follows in subsequent commits. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- .../fedify/src/federation/metrics.test.ts | 223 ++++++++++++ packages/fedify/src/federation/metrics.ts | 337 ++++++++++++++++++ 2 files changed, 560 insertions(+) diff --git a/packages/fedify/src/federation/metrics.test.ts b/packages/fedify/src/federation/metrics.test.ts index 61df566ca..c8c222e27 100644 --- a/packages/fedify/src/federation/metrics.test.ts +++ b/packages/fedify/src/federation/metrics.test.ts @@ -1,9 +1,14 @@ import { createTestMeterProvider, test } from "@fedify/fixture"; import { assertEquals } from "@std/assert"; +import { FetchError } from "@fedify/vocab-runtime"; import type { MessageQueue } from "./mq.ts"; import { + classifyFetchError, + recordDocumentCache, + recordDocumentFetch, recordFanoutRecipients, recordInboxActivity, + recordKeyLookup, recordOutboxActivity, recordOutboxEnqueue, } from "./metrics.ts"; @@ -153,3 +158,221 @@ test("recordOutboxActivity() records counter with result and activity type", () ["queued", "retried", "abandoned"], ); }); + +test("recordKeyLookup() records counter and duration with all attributes", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordKeyLookup(meterProvider, { + durationMs: 42, + result: "fetched", + remoteUrl: new URL("https://example.com/users/alice#main-key"), + cacheEnabled: true, + statusCode: 200, + }); + + const counters = recorder.getMeasurements("activitypub.key.lookup"); + assertEquals(counters.length, 1); + assertEquals(counters[0].type, "counter"); + assertEquals(counters[0].value, 1); + assertEquals( + counters[0].attributes["activitypub.lookup.kind"], + "public_key", + ); + assertEquals( + counters[0].attributes["activitypub.lookup.result"], + "fetched", + ); + assertEquals( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + assertEquals(counters[0].attributes["activitypub.cache.enabled"], true); + assertEquals(counters[0].attributes["http.response.status_code"], 200); + + const durations = recorder.getMeasurements( + "activitypub.key.lookup.duration", + ); + assertEquals(durations.length, 1); + assertEquals(durations[0].type, "histogram"); + assertEquals(durations[0].value, 42); + assertEquals( + durations[0].attributes["activitypub.lookup.kind"], + "public_key", + ); + assertEquals( + durations[0].attributes["activitypub.lookup.result"], + "fetched", + ); + assertEquals( + durations[0].attributes["activitypub.remote.host"], + "example.com", + ); + assertEquals(durations[0].attributes["activitypub.cache.enabled"], true); + assertEquals(durations[0].attributes["http.response.status_code"], 200); +}); + +test("recordKeyLookup() omits optional attributes when not provided", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordKeyLookup(meterProvider, { + durationMs: 0, + result: "error", + cacheEnabled: false, + }); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "error"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals("activitypub.remote.host" in counter!.attributes, false); + assertEquals("http.response.status_code" in counter!.attributes, false); + + const duration = recorder.getMeasurement("activitypub.key.lookup.duration"); + assertEquals("activitypub.remote.host" in duration!.attributes, false); + assertEquals("http.response.status_code" in duration!.attributes, false); +}); + +test("recordDocumentFetch() records counter and duration with all attributes", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordDocumentFetch(meterProvider, { + durationMs: 123, + kind: "object", + result: "not_found", + remoteUrl: new URL("https://remote.test/objects/123"), + cacheEnabled: true, + statusCode: 404, + }); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.type, "counter"); + assertEquals(counter?.value, 1); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "object"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["activitypub.remote.host"], "remote.test"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], true); + assertEquals(counter?.attributes["http.response.status_code"], 404); + + const duration = recorder.getMeasurement( + "activitypub.document.fetch.duration", + ); + assertEquals(duration?.type, "histogram"); + assertEquals(duration?.value, 123); + assertEquals(duration?.attributes["activitypub.lookup.kind"], "object"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "not_found"); +}); + +test("recordDocumentFetch() omits optional attributes when not provided", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordDocumentFetch(meterProvider, { + durationMs: 5, + kind: "context", + result: "fetched", + }); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "context"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals("activitypub.remote.host" in counter!.attributes, false); + assertEquals("activitypub.cache.enabled" in counter!.attributes, false); + assertEquals("http.response.status_code" in counter!.attributes, false); + + const duration = recorder.getMeasurement( + "activitypub.document.fetch.duration", + ); + assertEquals(duration?.value, 5); +}); + +test("recordDocumentCache() records hit and miss as a counter", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordDocumentCache(meterProvider, { + kind: "object", + result: "hit", + remoteUrl: new URL("https://remote.test/objects/1"), + }); + recordDocumentCache(meterProvider, { + kind: "context", + result: "miss", + remoteUrl: new URL("https://w3id.org/security/v1"), + }); + + const measurements = recorder.getMeasurements("activitypub.document.cache"); + assertEquals(measurements.length, 2); + for (const m of measurements) { + assertEquals(m.type, "counter"); + assertEquals(m.value, 1); + } + assertEquals(measurements[0].attributes["activitypub.lookup.kind"], "object"); + assertEquals(measurements[0].attributes["activitypub.lookup.result"], "hit"); + assertEquals( + measurements[0].attributes["activitypub.remote.host"], + "remote.test", + ); + assertEquals( + measurements[1].attributes["activitypub.lookup.kind"], + "context", + ); + assertEquals(measurements[1].attributes["activitypub.lookup.result"], "miss"); + assertEquals( + measurements[1].attributes["activitypub.remote.host"], + "w3id.org", + ); +}); + +test("classifyFetchError() classifies FetchError with 404 as not_found", () => { + const response = new Response("", { status: 404 }); + const error = new FetchError( + "https://example.com/k", + "not found", + response, + ); + assertEquals(classifyFetchError(error), { + result: "not_found", + statusCode: 404, + }); +}); + +test("classifyFetchError() classifies FetchError with 410 as not_found", () => { + const response = new Response("", { status: 410 }); + const error = new FetchError( + "https://example.com/k", + "gone", + response, + ); + assertEquals(classifyFetchError(error), { + result: "not_found", + statusCode: 410, + }); +}); + +test("classifyFetchError() classifies FetchError with 500 as error", () => { + const response = new Response("", { status: 500 }); + const error = new FetchError( + "https://example.com/k", + "server error", + response, + ); + assertEquals(classifyFetchError(error), { + result: "error", + statusCode: 500, + }); +}); + +test("classifyFetchError() classifies FetchError without response as network_error", () => { + const error = new FetchError("https://example.com/k", "boom"); + assertEquals(classifyFetchError(error), { result: "network_error" }); +}); + +test("classifyFetchError() classifies a bare TypeError as network_error", () => { + assertEquals(classifyFetchError(new TypeError("connect failed")), { + result: "network_error", + }); +}); + +test("classifyFetchError() classifies an AbortError as network_error", () => { + const abort = new Error("aborted"); + abort.name = "AbortError"; + assertEquals(classifyFetchError(abort), { result: "network_error" }); +}); + +test("classifyFetchError() classifies any other thrown value as error", () => { + assertEquals(classifyFetchError(new Error("nope")), { result: "error" }); + assertEquals(classifyFetchError("string error"), { result: "error" }); + assertEquals(classifyFetchError(undefined), { result: "error" }); +}); diff --git a/packages/fedify/src/federation/metrics.ts b/packages/fedify/src/federation/metrics.ts index 49a0ceddb..044ec67c8 100644 --- a/packages/fedify/src/federation/metrics.ts +++ b/packages/fedify/src/federation/metrics.ts @@ -1,3 +1,4 @@ +import { FetchError } from "@fedify/vocab-runtime"; import { type Attributes, type Counter, @@ -186,6 +187,134 @@ export type LinkedDataSignatureMetricType = "RsaSignature2017"; */ export type ObjectIntegrityProofMetricCryptosuite = "eddsa-jcs-2022"; +/** + * The kind of remote ActivityPub lookup, recorded as + * `activitypub.lookup.kind` on the public-key lookup and remote document + * fetch metric families. + * + * - `public_key`: a public key lookup performed by `fetchKey` / + * `fetchKeyDetailed` (always recorded on `activitypub.key.lookup*`). + * - `actor`: a document fetch whose resolved value is an Actor. The + * bucket exists in the taxonomy for future actor-aware call sites; + * today, actor documents fetched through Fedify's generic document + * loader are still classified as `object` because the kind is decided + * at the loader boundary, before the response is parsed. + * - `object`: a generic ActivityPub object fetch through Fedify's + * document loader. This is the default classification for + * `documentLoader` invocations that do not match a more specific + * bucket. + * - `context`: a JSON-LD `@context` document fetch through Fedify's + * context loader. + * - `other`: a fetch that does not fit any of the above classifications. + * @since 2.3.0 + */ +export type LookupKind = + | "public_key" + | "actor" + | "object" + | "context" + | "other"; + +/** + * The terminal classification of a public-key lookup or remote document + * fetch, recorded as `activitypub.lookup.result` on the lookup metric + * families. + * + * - `hit`: served from a cache without going to the network. + * - `miss`: a cache was consulted and returned no entry; only used on + * `activitypub.document.cache`. + * - `fetched`: the remote document or key was loaded successfully. + * - `not_found`: the remote responded with HTTP `404 Not Found` or + * `410 Gone`, or otherwise reported the resource is absent. + * - `invalid`: the remote responded with content Fedify could not parse + * into the expected shape. + * - `network_error`: no HTTP response was received (DNS failure, connect + * timeout, abort, redirect loop, etc.). + * - `error`: any other unexpected failure. + * @since 2.3.0 + */ +export type LookupResult = + | "hit" + | "miss" + | "fetched" + | "not_found" + | "invalid" + | "network_error" + | "error"; + +/** + * The {@link LookupKind} values that can appear on remote document fetch + * metrics. `public_key` lookups are reported on the + * `activitypub.key.lookup` metric family instead, so it is excluded here. + * @since 2.3.0 + */ +export type DocumentFetchKind = Exclude; + +/** + * The {@link LookupResult} values that can appear on the + * `activitypub.document.fetch` and `activitypub.document.fetch.duration` + * metrics. Cache `hit` / `miss` outcomes are reported on + * `activitypub.document.cache`, so they are excluded here. + * @since 2.3.0 + */ +export type DocumentFetchResult = Exclude; + +/** + * The {@link LookupResult} values that can appear on the + * `activitypub.key.lookup` and `activitypub.key.lookup.duration` metrics. + * `miss` is a cache-internal classification that surfaces on + * `activitypub.document.cache` only and is not a terminal key lookup + * outcome, so it is excluded here. + * @since 2.3.0 + */ +export type KeyLookupResult = Exclude; + +/** + * Attributes accepted by {@link recordKeyLookup}. `remoteUrl` is taken as + * a `URL` so that the helper can derive the hostname-only + * `activitypub.remote.host` attribute internally and refuse to record + * high-cardinality values such as full key IDs or actor URLs. + * @since 2.3.0 + */ +export interface KeyLookupAttributes { + /** The terminal lookup result. */ + result: KeyLookupResult; + /** Elapsed lookup duration in milliseconds. */ + durationMs: number; + /** URL of the key, used to derive `activitypub.remote.host`. */ + remoteUrl?: URL; + /** Whether the lookup path had a `KeyCache` configured. */ + cacheEnabled: boolean; + /** The HTTP response status code, when an HTTP response was received. */ + statusCode?: number; +} + +/** + * Attributes accepted by {@link recordDocumentFetch}. + * @since 2.3.0 + */ +export interface DocumentFetchAttributes { + kind: DocumentFetchKind; + result: DocumentFetchResult; + /** URL of the fetched document, used to derive `activitypub.remote.host`. */ + remoteUrl?: URL; + /** Elapsed fetch duration in milliseconds. */ + durationMs: number; + cacheEnabled?: boolean; + statusCode?: number; +} + +/** + * Attributes accepted by {@link recordDocumentCache}. + * @since 2.3.0 + */ +export interface DocumentCacheAttributes { + kind: DocumentFetchKind; + result: "hit" | "miss"; + /** URL of the looked-up document, used to derive `activitypub.remote.host`. */ + remoteUrl?: URL; +} + /** * Optional attributes recorded alongside an * `activitypub.signature.verification.duration` measurement. Each field is @@ -228,6 +357,11 @@ class FederationMetrics { readonly fanoutRecipients: Histogram; readonly inboxActivity: Counter; readonly outboxActivity: Counter; + readonly keyLookup: Counter; + readonly keyLookupDuration: Histogram; + readonly documentFetch: Counter; + readonly documentFetchDuration: Histogram; + readonly documentCache: Counter; constructor(meterProvider: MeterProvider) { const meter = meterProvider.getMeter(metadata.name, metadata.version); @@ -392,6 +526,81 @@ class FederationMetrics { "live on `activitypub.delivery.*`.", unit: "{activity}", }); + this.keyLookup = meter.createCounter("activitypub.key.lookup", { + description: + "Public-key lookup attempts performed by Fedify, including both " + + "cache hits and remote fetches.", + unit: "{lookup}", + }); + this.keyLookupDuration = meter.createHistogram( + "activitypub.key.lookup.duration", + { + description: + "Duration of public-key lookups performed by Fedify, including " + + "any remote fetch.", + unit: "ms", + advice: { + // Reuse the OpenTelemetry HTTP server semantic-conventions buckets + // since key lookups span the same 5 ms to 10 s range that other + // network-bound histograms in this package already use. + explicitBucketBoundaries: [ + 5, + 10, + 25, + 50, + 75, + 100, + 250, + 500, + 750, + 1000, + 2500, + 5000, + 7500, + 10000, + ], + }, + }, + ); + this.documentFetch = meter.createCounter("activitypub.document.fetch", { + description: + "Remote JSON-LD document loader invocations made by Fedify-wrapped " + + "loaders.", + unit: "{fetch}", + }); + this.documentFetchDuration = meter.createHistogram( + "activitypub.document.fetch.duration", + { + description: + "Duration of remote JSON-LD document loader invocations made by " + + "Fedify-wrapped loaders.", + unit: "ms", + advice: { + explicitBucketBoundaries: [ + 5, + 10, + 25, + 50, + 75, + 100, + 250, + 500, + 750, + 1000, + 2500, + 5000, + 7500, + 10000, + ], + }, + }, + ); + this.documentCache = meter.createCounter("activitypub.document.cache", { + description: + "KV-backed document loader cache lookups, with `hit` or `miss` " + + "classification.", + unit: "{lookup}", + }); } recordDelivery( @@ -559,6 +768,51 @@ class FederationMetrics { buildActivityLifecycleAttributes(result, activityType), ); } + + recordKeyLookup(attrs: KeyLookupAttributes): void { + const attributes: Attributes = { + "activitypub.lookup.kind": "public_key", + "activitypub.lookup.result": attrs.result, + "activitypub.cache.enabled": attrs.cacheEnabled, + }; + if (attrs.remoteUrl != null) { + attributes["activitypub.remote.host"] = getRemoteHost(attrs.remoteUrl); + } + if (attrs.statusCode != null) { + attributes["http.response.status_code"] = attrs.statusCode; + } + this.keyLookup.add(1, attributes); + this.keyLookupDuration.record(attrs.durationMs, attributes); + } + + recordDocumentFetch(attrs: DocumentFetchAttributes): void { + const attributes: Attributes = { + "activitypub.lookup.kind": attrs.kind, + "activitypub.lookup.result": attrs.result, + }; + if (attrs.remoteUrl != null) { + attributes["activitypub.remote.host"] = getRemoteHost(attrs.remoteUrl); + } + if (attrs.cacheEnabled != null) { + attributes["activitypub.cache.enabled"] = attrs.cacheEnabled; + } + if (attrs.statusCode != null) { + attributes["http.response.status_code"] = attrs.statusCode; + } + this.documentFetch.add(1, attributes); + this.documentFetchDuration.record(attrs.durationMs, attributes); + } + + recordDocumentCache(attrs: DocumentCacheAttributes): void { + const attributes: Attributes = { + "activitypub.lookup.kind": attrs.kind, + "activitypub.lookup.result": attrs.result, + }; + if (attrs.remoteUrl != null) { + attributes["activitypub.remote.host"] = getRemoteHost(attrs.remoteUrl); + } + this.documentCache.add(1, attributes); + } } function buildActivityLifecycleAttributes( @@ -701,6 +955,89 @@ export function recordOutboxActivity( ); } +/** + * Records one measurement on `activitypub.key.lookup` (counter) and + * `activitypub.key.lookup.duration` (histogram) for a public-key lookup. + * + * `activitypub.lookup.kind` is always recorded as `public_key`; the result + * classification, remote host, HTTP status code (when an HTTP response was + * received), and `activitypub.cache.enabled` are recorded as attributes on + * both measurements. Full key URLs and key IDs are deliberately omitted to + * keep cardinality bounded. + * @since 2.3.0 + */ +export function recordKeyLookup( + meterProvider: MeterProvider | undefined, + attrs: KeyLookupAttributes, +): void { + getFederationMetrics(meterProvider).recordKeyLookup(attrs); +} + +/** + * Records one measurement each on `activitypub.document.fetch` (counter) + * and `activitypub.document.fetch.duration` (histogram) for one remote + * JSON-LD document loader invocation, with bounded + * `activitypub.lookup.kind` and `activitypub.lookup.result` attributes + * plus the optional remote-host, cache-enabled, and HTTP status-code + * attributes. Counter and histogram are always recorded together so + * aggregate rate and latency views stay in sync. + * @since 2.3.0 + */ +export function recordDocumentFetch( + meterProvider: MeterProvider | undefined, + attrs: DocumentFetchAttributes, +): void { + getFederationMetrics(meterProvider).recordDocumentFetch(attrs); +} + +/** + * Records one `activitypub.document.cache` measurement, classifying the + * lookup as `hit` (the cache returned an entry) or `miss` (the cache was + * consulted and returned nothing, prompting a delegate fetch). + * @since 2.3.0 + */ +export function recordDocumentCache( + meterProvider: MeterProvider | undefined, + attrs: DocumentCacheAttributes, +): void { + getFederationMetrics(meterProvider).recordDocumentCache(attrs); +} + +/** + * Classifies a thrown value from a key or document fetch into the bounded + * {@link LookupResult} taxonomy and, when an HTTP response was received, + * surfaces its status code. + * + * - `FetchError` with a `Response` whose status is `404` or `410`: + * `result=not_found` and the response status code. + * - `FetchError` with any other `Response`: `result=error` and the + * response status code. + * - `FetchError` without a `Response`: `result=network_error`. + * - An `AbortError` (typically from a cancelled fetch): `result=network_error`. + * - A bare `TypeError` (the shape native `fetch()` raises on DNS, connect, + * and TLS failures before any response is observed): + * `result=network_error`. + * - Any other value: `result=error`. + * @since 2.3.0 + */ +export function classifyFetchError( + error: unknown, +): { result: LookupResult; statusCode?: number } { + if (error instanceof FetchError) { + if (error.response != null) { + const status = error.response.status; + const result: LookupResult = status === 404 || status === 410 + ? "not_found" + : "error"; + return { result, statusCode: status }; + } + return { result: "network_error" }; + } + if (isAbortError(error)) return { result: "network_error" }; + if (error instanceof TypeError) return { result: "network_error" }; + return { result: "error" }; +} + /** * Times an awaited public key fetch and records exactly one * `activitypub.signature.key_fetch.duration` measurement, classifying the From f5895200778c3c38ffa4f1227fa1844c7ecfdcf3 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 18:55:46 +0900 Subject: [PATCH 02/16] Record activitypub.document.cache hit/miss in kvCache Extend `kvCache()` with optional `meterProvider` and `kind` parameters so it can emit one `activitypub.document.cache` measurement each time it consults the KV store: `result=miss` when it falls through to the inner loader and `result=hit` when the KV store returns a cached `RemoteDocument`. The metric is recorded through `recordDocumentCache()` from Step 1, which derives `activitypub.remote.host` from a `URL` and keeps the bounded `activitypub.lookup.kind` / `activitypub.lookup.result` attribute taxonomy. `kind` defaults to `"object"` so the generic document-loader case does not need a new option, and context loaders opt in by passing `kind: "context"`. Preloaded-context short-circuits and rule-less calls do not consult the KV cache and therefore do not emit a measurement. The next commit wires this option through the Federation middleware's document and context loader factories. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- packages/fedify/src/utils/kv-cache.test.ts | 122 ++++++++++++++++++++- packages/fedify/src/utils/kv-cache.ts | 48 +++++++- 2 files changed, 168 insertions(+), 2 deletions(-) diff --git a/packages/fedify/src/utils/kv-cache.test.ts b/packages/fedify/src/utils/kv-cache.test.ts index 8b9090ff8..4642aa9c2 100644 --- a/packages/fedify/src/utils/kv-cache.test.ts +++ b/packages/fedify/src/utils/kv-cache.test.ts @@ -1,4 +1,8 @@ -import { mockDocumentLoader, test } from "@fedify/fixture"; +import { + createTestMeterProvider, + mockDocumentLoader, + test, +} from "@fedify/fixture"; import type { DocumentLoader } from "@fedify/vocab-runtime"; import { preloadedContexts } from "@fedify/vocab-runtime"; import { deepStrictEqual, throws } from "node:assert"; @@ -190,6 +194,122 @@ test("kvCache()", async (t) => { }); }); + await t.step( + "records activitypub.document.cache with miss then hit", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + rules: [["https://example.com/object", { days: 1 }]], + prefix: ["_test", "doc-cache"], + meterProvider, + kind: "object", + }); + + await loader("https://example.com/object"); + const afterMiss = recorder.getMeasurements("activitypub.document.cache"); + deepStrictEqual(afterMiss.length, 1); + deepStrictEqual(afterMiss[0].type, "counter"); + deepStrictEqual(afterMiss[0].value, 1); + deepStrictEqual( + afterMiss[0].attributes["activitypub.lookup.kind"], + "object", + ); + deepStrictEqual( + afterMiss[0].attributes["activitypub.lookup.result"], + "miss", + ); + deepStrictEqual( + afterMiss[0].attributes["activitypub.remote.host"], + "example.com", + ); + + await loader("https://example.com/object"); + const all = recorder.getMeasurements("activitypub.document.cache"); + deepStrictEqual(all.length, 2); + deepStrictEqual(all[1].attributes["activitypub.lookup.result"], "hit"); + deepStrictEqual( + all[1].attributes["activitypub.lookup.kind"], + "object", + ); + deepStrictEqual( + all[1].attributes["activitypub.remote.host"], + "example.com", + ); + }, + ); + + await t.step( + "preloaded contexts emit no activitypub.document.cache", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + prefix: ["_test", "doc-cache-preloaded"], + meterProvider, + kind: "context", + }); + + await loader("https://www.w3.org/ns/activitystreams"); + deepStrictEqual( + recorder.getMeasurements("activitypub.document.cache").length, + 0, + "preloaded contexts must bypass the KV cache and the cache metric", + ); + }, + ); + + await t.step( + "no cache rule match emits no activitypub.document.cache", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + rules: [], + prefix: ["_test", "doc-cache-no-rule"], + meterProvider, + kind: "object", + }); + + await loader("https://example.com/object"); + deepStrictEqual( + recorder.getMeasurements("activitypub.document.cache").length, + 0, + ); + }, + ); + + await t.step( + "omitting meterProvider records no cache measurements", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + // Do not pass meterProvider; the meterProvider here is only used to + // assert that NOTHING was recorded against this isolated test provider. + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + rules: [["https://example.com/object", { days: 1 }]], + prefix: ["_test", "doc-cache-no-mp"], + }); + + await loader("https://example.com/object"); + await loader("https://example.com/object"); + deepStrictEqual( + recorder.getMeasurements("activitypub.document.cache").length, + 0, + ); + // Reference meterProvider to avoid unused-variable lint complaints. + deepStrictEqual(typeof meterProvider.getMeter, "function"); + }, + ); + await t.step("preloaded contexts bypass cache", async () => { const kv = new MockKvStore(); let loaderCalled = false; diff --git a/packages/fedify/src/utils/kv-cache.ts b/packages/fedify/src/utils/kv-cache.ts index f004c4a58..998825523 100644 --- a/packages/fedify/src/utils/kv-cache.ts +++ b/packages/fedify/src/utils/kv-cache.ts @@ -5,12 +5,17 @@ import type { } from "@fedify/vocab-runtime"; import { preloadedContexts } from "@fedify/vocab-runtime"; import { getLogger } from "@logtape/logtape"; +import type { MeterProvider } from "@opentelemetry/api"; import type { KvKey, KvStore, KvStoreListEntry, KvStoreSetOptions, } from "../federation/kv.ts"; +import { + type DocumentFetchKind, + recordDocumentCache, +} from "../federation/metrics.ts"; const logger = getLogger(["fedify", "utils", "kv-cache"]); @@ -80,6 +85,26 @@ export interface KvCacheParameters { string | URL | URLPattern, Temporal.Duration | Temporal.DurationLike, ][]; + + /** + * The OpenTelemetry meter provider used to record + * `activitypub.document.cache` measurements. When omitted, the wrapper + * does not emit any metric measurements, preserving the previous + * unobserved-cache behavior. + * @since 2.3.0 + */ + readonly meterProvider?: MeterProvider; + + /** + * The lookup kind to record on the `activitypub.lookup.kind` attribute of + * `activitypub.document.cache` measurements. Defaults to `"object"` so + * the generic document loader case does not require an explicit option. + * Set to `"context"` for context-loader wrappers; the + * authenticated-loader path does not use `kvCache()` and is therefore + * out of scope. + * @since 2.3.0 + */ + readonly kind?: DocumentFetchKind; } /** @@ -88,7 +113,7 @@ export interface KvCacheParameters { * @returns The decorated document loader which is cache-enabled. */ export function kvCache( - { loader, kv, prefix, rules }: KvCacheParameters, + { loader, kv, prefix, rules, meterProvider, kind }: KvCacheParameters, ): DocumentLoader { const keyPrefix = prefix ?? ["_fedify", "remoteDocument"]; rules ??= [ @@ -104,6 +129,25 @@ export function kvCache( ); } } + const lookupKind: DocumentFetchKind = kind ?? "object"; + + function emitCacheMetric( + url: string, + result: "hit" | "miss", + ): void { + if (meterProvider == null) return; + let remoteUrl: URL | undefined; + try { + remoteUrl = new URL(url); + } catch { + remoteUrl = undefined; + } + recordDocumentCache(meterProvider, { + kind: lookupKind, + result, + remoteUrl, + }); + } return async ( url: string, @@ -132,6 +176,7 @@ export function kvCache( } } if (cache == null) { + emitCacheMetric(url, "miss"); const remoteDoc = await loader(url, options); try { await kv.set(key, remoteDoc, { ttl: match }); @@ -143,6 +188,7 @@ export function kvCache( } return remoteDoc; } + emitCacheMetric(url, "hit"); return cache; }; } From 8d337a385431fcff2f619f4800af04052908e500 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 19:14:05 +0900 Subject: [PATCH 03/16] Add instrumentDocumentLoader() wrapper Add `instrumentDocumentLoader()` to the federation metrics module so any `DocumentLoader` Fedify wraps can record one `activitypub.document.fetch` + one `activitypub.document.fetch.duration` measurement per call. The wrapper - takes a meter provider, a bounded `DocumentFetchKind`, and an optional `cacheEnabled` flag, - parses the requested URL once and derives `activitypub.remote.host` via `getRemoteHost()`, omitting the attribute when the URL does not parse, - records `result=fetched` on success and runs `classifyFetchError()` on rejection (rethrowing the original error), so non-2xx HTTP responses surface as `not_found` or `error` with `http.response.status_code`, and native fetch failures (`TypeError`, `AbortError`, `FetchError` without a response) surface as `network_error`, and - short-circuits to the inner loader untouched when no meter provider is supplied, so opt-out is the default and applications without OpenTelemetry pay nothing. `classifyFetchError()`'s return type is narrowed from `LookupResult` to `DocumentFetchResult` since it never returns `hit` or `miss`; that also makes it directly assignable to both `DocumentFetchAttributes` and `KeyLookupAttributes`. The Federation middleware wiring that uses this wrapper follows in the next commit. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- .../fedify/src/federation/metrics.test.ts | 141 +++++++++++++++++- packages/fedify/src/federation/metrics.ts | 90 ++++++++++- 2 files changed, 228 insertions(+), 3 deletions(-) diff --git a/packages/fedify/src/federation/metrics.test.ts b/packages/fedify/src/federation/metrics.test.ts index c8c222e27..e98675209 100644 --- a/packages/fedify/src/federation/metrics.test.ts +++ b/packages/fedify/src/federation/metrics.test.ts @@ -1,9 +1,11 @@ import { createTestMeterProvider, test } from "@fedify/fixture"; -import { assertEquals } from "@std/assert"; +import { assertEquals, assertRejects } from "@std/assert"; +import type { DocumentLoader, RemoteDocument } from "@fedify/vocab-runtime"; import { FetchError } from "@fedify/vocab-runtime"; import type { MessageQueue } from "./mq.ts"; import { classifyFetchError, + instrumentDocumentLoader, recordDocumentCache, recordDocumentFetch, recordFanoutRecipients, @@ -376,3 +378,140 @@ test("classifyFetchError() classifies any other thrown value as error", () => { assertEquals(classifyFetchError("string error"), { result: "error" }); assertEquals(classifyFetchError(undefined), { result: "error" }); }); + +test("instrumentDocumentLoader() records fetched on success", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = (url) => + Promise.resolve( + { + contextUrl: null, + documentUrl: url, + document: { ok: true }, + } satisfies RemoteDocument, + ); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "object", + cacheEnabled: true, + }); + + const result = await wrapped("https://example.com/o"); + assertEquals(result.document, { ok: true }); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "object"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals(counter?.attributes["activitypub.remote.host"], "example.com"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], true); + assertEquals("http.response.status_code" in counter!.attributes, false); + + const duration = recorder.getMeasurement( + "activitypub.document.fetch.duration", + ); + assertEquals(duration?.type, "histogram"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "fetched"); +}); + +test("instrumentDocumentLoader() records not_found on FetchError 404", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = (url) => + Promise.reject( + new FetchError( + url, + "HTTP 404", + new Response("", { status: 404 }), + ), + ); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "context", + cacheEnabled: false, + }); + + await assertRejects( + () => wrapped("https://example.com/missing"), + FetchError, + ); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "context"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["activitypub.remote.host"], "example.com"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals(counter?.attributes["http.response.status_code"], 404); +}); + +test("instrumentDocumentLoader() records network_error on TypeError", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = () => + Promise.reject(new TypeError("fetch failed")); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "object", + }); + + await assertRejects( + () => wrapped("https://example.com/o"), + TypeError, + ); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals( + counter?.attributes["activitypub.lookup.result"], + "network_error", + ); + assertEquals("activitypub.cache.enabled" in counter!.attributes, false); +}); + +test("instrumentDocumentLoader() returns inner loader unchanged when meterProvider is omitted", async () => { + const [, recorder] = createTestMeterProvider(); + let callCount = 0; + const inner: DocumentLoader = (url) => { + callCount++; + return Promise.resolve( + { + contextUrl: null, + documentUrl: url, + document: { ok: true }, + } satisfies RemoteDocument, + ); + }; + const wrapped = instrumentDocumentLoader(inner, { kind: "object" }); + + // No-instrumentation short-circuit returns the original function reference. + assertEquals(wrapped, inner); + + await wrapped("https://example.com/o"); + assertEquals(callCount, 1); + assertEquals( + recorder.getMeasurements("activitypub.document.fetch").length, + 0, + ); + assertEquals( + recorder.getMeasurements("activitypub.document.fetch.duration").length, + 0, + ); +}); + +test("instrumentDocumentLoader() omits remote.host when URL is unparseable", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = (url) => + Promise.resolve( + { + contextUrl: null, + documentUrl: url, + document: {}, + } satisfies RemoteDocument, + ); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "other", + }); + + await wrapped("not a url"); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "other"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals("activitypub.remote.host" in counter!.attributes, false); +}); diff --git a/packages/fedify/src/federation/metrics.ts b/packages/fedify/src/federation/metrics.ts index 044ec67c8..75a16b4d5 100644 --- a/packages/fedify/src/federation/metrics.ts +++ b/packages/fedify/src/federation/metrics.ts @@ -1,3 +1,4 @@ +import type { DocumentLoader } from "@fedify/vocab-runtime"; import { FetchError } from "@fedify/vocab-runtime"; import { type Attributes, @@ -1022,11 +1023,11 @@ export function recordDocumentCache( */ export function classifyFetchError( error: unknown, -): { result: LookupResult; statusCode?: number } { +): { result: DocumentFetchResult; statusCode?: number } { if (error instanceof FetchError) { if (error.response != null) { const status = error.response.status; - const result: LookupResult = status === 404 || status === 410 + const result: DocumentFetchResult = status === 404 || status === 410 ? "not_found" : "error"; return { result, statusCode: status }; @@ -1038,6 +1039,91 @@ export function classifyFetchError( return { result: "error" }; } +/** + * Options for {@link instrumentDocumentLoader}. + * @since 2.3.0 + */ +export interface InstrumentDocumentLoaderOptions { + /** + * The OpenTelemetry meter provider used to record + * `activitypub.document.fetch` and `activitypub.document.fetch.duration` + * measurements. When omitted, the wrapper records nothing and simply + * delegates to the wrapped loader. + */ + meterProvider?: MeterProvider; + + /** + * The lookup kind recorded on `activitypub.lookup.kind`. Set to + * `"object"` for the generic document loader, `"context"` for the + * context loader, and `"other"` for callers that do not fit the + * generic-object classification. + */ + kind: DocumentFetchKind; + + /** + * Whether the wrapped loader is cache-backed (for example via + * {@link import("../utils/kv-cache.ts").kvCache}). Recorded as + * `activitypub.cache.enabled` on every measurement; omitted from the + * attribute set when the option is not set. + */ + cacheEnabled?: boolean; +} + +/** + * Wraps a {@link DocumentLoader} so each invocation records one + * measurement on `activitypub.document.fetch` (counter) and one on + * `activitypub.document.fetch.duration` (histogram), classifying the + * outcome via {@link classifyFetchError} when the wrapped loader throws + * and as `fetched` on success. The wrapper rethrows whatever the + * wrapped loader throws so caller behavior is unchanged. + * + * The wrapper records the hostname of the requested URL on + * `activitypub.remote.host` when the URL parses; full URLs, paths, and + * query strings are deliberately excluded to keep cardinality bounded. + * HTTP status codes are recorded only when the failure carries a + * `Response` (currently, when the wrapped loader throws a + * {@link FetchError} with a non-`null` `response`). + * @since 2.3.0 + */ +export function instrumentDocumentLoader( + loader: DocumentLoader, + options: InstrumentDocumentLoaderOptions, +): DocumentLoader { + const meterProvider = options.meterProvider; + if (meterProvider == null) return loader; + return async (url, opts) => { + const start = performance.now(); + let remoteUrl: URL | undefined; + try { + remoteUrl = new URL(url); + } catch { + remoteUrl = undefined; + } + try { + const result = await loader(url, opts); + recordDocumentFetch(meterProvider, { + durationMs: getDurationMs(start), + kind: options.kind, + result: "fetched", + remoteUrl, + cacheEnabled: options.cacheEnabled, + }); + return result; + } catch (error) { + const classified = classifyFetchError(error); + recordDocumentFetch(meterProvider, { + durationMs: getDurationMs(start), + kind: options.kind, + result: classified.result, + remoteUrl, + cacheEnabled: options.cacheEnabled, + statusCode: classified.statusCode, + }); + throw error; + } + }; +} + /** * Times an awaited public key fetch and records exactly one * `activitypub.signature.key_fetch.duration` measurement, classifying the From b04f4fc14122864dc9ae0c02067d42bb14b99b2e Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 19:30:32 +0900 Subject: [PATCH 04/16] Record activitypub.key.lookup in fetchKey() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an optional `meterProvider` to `FetchKeyOptions` and instrument the shared `fetchKeyWithResult()` helper so every `fetchKey()` and `fetchKeyDetailed()` call now records exactly one `activitypub.key.lookup` + `activitypub.key.lookup.duration` measurement, classified by outcome: - cached valid key or cached negative entry → result=hit - fresh fetch + parseable key → result=fetched - fresh fetch + no usable key (resolveFetchedKey returns null) → result=invalid - documentLoader threw → classified via `classifyFetchError()`, surfacing `http.response.status_code` on `FetchError` rows and mapping 404 / 410 to not_found, FetchError without a response / TypeError / AbortError to network_error - any unexpected throw upstream → result=error (default outcome is never overridden before the failing line) Each measurement carries `activitypub.lookup.kind=public_key`, `activitypub.cache.enabled` (true when `options.keyCache` is set), the key URL's hostname under `activitypub.remote.host`, and the HTTP status code when an HTTP response was observed. The outcome is set only on the successful return paths, so a throw from `onCachedUnavailable` or `onFetchError` leaves the default `error` outcome rather than mislabelling a partial failure as `hit`. The HTTP signature verifier (`sig/http.ts`) had two `fetchKeyDetailed` call sites — the draft-cavage path and the RFC 9421 path — that only forwarded `tracerProvider` and not `meterProvider`. Both now thread `meterProvider` through, and the verifier-path test asserts the new `activitypub.key.lookup` measurement lands on the configured test provider. The Linked Data Signatures and Object Integrity Proofs paths already forward the full `options` object, so spreading `options.meterProvider` is enough there. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- packages/fedify/src/sig/http.test.ts | 15 +++ packages/fedify/src/sig/http.ts | 2 + packages/fedify/src/sig/key.test.ts | 160 ++++++++++++++++++++++++++- packages/fedify/src/sig/key.ts | 106 ++++++++++++------ 4 files changed, 251 insertions(+), 32 deletions(-) diff --git a/packages/fedify/src/sig/http.test.ts b/packages/fedify/src/sig/http.test.ts index b05417e7c..c902e048e 100644 --- a/packages/fedify/src/sig/http.test.ts +++ b/packages/fedify/src/sig/http.test.ts @@ -421,6 +421,21 @@ test("verifyRequestDetailed() records verification duration metric", async (t) = assertFalse( "http_signatures.failure_reason" in measurement.attributes, ); + + // The HTTP draft-cavage verifier must also forward `meterProvider` + // through to `fetchKeyDetailed` so the generic + // `activitypub.key.lookup*` metrics land on the test provider rather + // than the global default. + const keyLookups = recorder.getMeasurements("activitypub.key.lookup"); + assertEquals(keyLookups.length, 1); + assertEquals( + keyLookups[0].attributes["activitypub.lookup.kind"], + "public_key", + ); + assertEquals( + keyLookups[0].attributes["activitypub.lookup.result"], + "fetched", + ); }); await t.step("missing signature is recorded as result=missing", async () => { diff --git a/packages/fedify/src/sig/http.ts b/packages/fedify/src/sig/http.ts index 3b534dbda..d79cd27a6 100644 --- a/packages/fedify/src/sig/http.ts +++ b/packages/fedify/src/sig/http.ts @@ -1157,6 +1157,7 @@ async function verifyRequestDraft( contextLoader, keyCache, tracerProvider, + meterProvider, }), ); const { key, cached, fetchError } = fetchResult; @@ -1508,6 +1509,7 @@ async function verifyRequestRfc9421( contextLoader, keyCache, tracerProvider, + meterProvider, }), ); const { key, cached, fetchError } = rfcFetchResult; diff --git a/packages/fedify/src/sig/key.test.ts b/packages/fedify/src/sig/key.test.ts index f80def098..b759c21c4 100644 --- a/packages/fedify/src/sig/key.test.ts +++ b/packages/fedify/src/sig/key.test.ts @@ -1,10 +1,11 @@ import { + createTestMeterProvider, createTestTracerProvider, mockDocumentLoader, test, } from "@fedify/fixture"; import { CryptographicKey, Multikey } from "@fedify/vocab"; -import { FetchError } from "@fedify/vocab-runtime"; +import { type DocumentLoader, FetchError } from "@fedify/vocab-runtime"; import { assertEquals, assertRejects, assertThrows } from "@std/assert"; import { ed25519Multikey, @@ -457,3 +458,160 @@ test("fetchKeyDetailed() returns detailed fetch errors", async () => { } assertEquals(detailedError.error, failure); }); + +test("fetchKey() records activitypub.key.lookup with hit on cached key", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const cache: Record = {}; + const keyCache: KeyCache = { + get(keyId) { + return Promise.resolve(cache[keyId.href]); + }, + set(keyId, key) { + cache[keyId.href] = key; + return Promise.resolve(); + }, + }; + const options: FetchKeyOptions = { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + keyCache, + meterProvider, + }; + + // Warm the cache. + await fetchKey("https://example.com/key", CryptographicKey, options); + recorder.clear(); + + // Subsequent call should hit the cache. + const result = await fetchKey( + "https://example.com/key", + CryptographicKey, + options, + ); + assertEquals(result.cached, true); + + const counters = recorder.getMeasurements("activitypub.key.lookup"); + assertEquals(counters.length, 1); + assertEquals(counters[0].attributes["activitypub.lookup.kind"], "public_key"); + assertEquals(counters[0].attributes["activitypub.lookup.result"], "hit"); + assertEquals(counters[0].attributes["activitypub.cache.enabled"], true); + assertEquals( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + + const duration = recorder.getMeasurement("activitypub.key.lookup.duration"); + assertEquals(duration?.type, "histogram"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "hit"); +}); + +test("fetchKey() records activitypub.key.lookup with fetched on cache miss", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const result = await fetchKey( + "https://example.com/key", + CryptographicKey, + { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }, + ); + assertEquals(result.cached, false); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals(counter?.attributes["activitypub.remote.host"], "example.com"); + + const duration = recorder.getMeasurement("activitypub.key.lookup.duration"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "fetched"); +}); + +test("fetchKey() records not_found and status code on HTTP 404", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const missingKeyId = new URL("https://example.com/missing-key"); + const documentLoader: DocumentLoader = (url) => { + if (url === missingKeyId.href) { + throw new FetchError( + missingKeyId, + `HTTP 404: ${missingKeyId.href}`, + new Response(null, { status: 404 }), + ); + } + return mockDocumentLoader(url); + }; + + const result = await fetchKey(missingKeyId, CryptographicKey, { + documentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + assertEquals(result.key, null); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["http.response.status_code"], 404); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals( + counter?.attributes["activitypub.remote.host"], + "example.com", + ); +}); + +test("fetchKey() records network_error on TypeError from the document loader", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const documentLoader: DocumentLoader = () => { + throw new TypeError("connect failed"); + }; + + await fetchKey("https://example.com/key", CryptographicKey, { + documentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals( + counter?.attributes["activitypub.lookup.result"], + "network_error", + ); + assertEquals("http.response.status_code" in counter!.attributes, false); +}); + +test("fetchKeyDetailed() records activitypub.key.lookup with the same taxonomy", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const goneKeyId = new URL("https://example.com/gone-key"); + const documentLoader: DocumentLoader = (url) => { + if (url === goneKeyId.href) { + throw new FetchError( + goneKeyId, + `HTTP 410: ${goneKeyId.href}`, + new Response(null, { status: 410 }), + ); + } + return mockDocumentLoader(url); + }; + + await fetchKeyDetailed(goneKeyId, CryptographicKey, { + documentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["http.response.status_code"], 410); +}); + +test("fetchKey() omits the meter provider has no effect on behavior", async () => { + // Sanity: omitting meterProvider keeps fetchKey functional. + const result = await fetchKey( + "https://example.com/key", + CryptographicKey, + { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + }, + ); + assertEquals(result.cached, false); +}); diff --git a/packages/fedify/src/sig/key.ts b/packages/fedify/src/sig/key.ts index 4d97dfdda..49be57cd4 100644 --- a/packages/fedify/src/sig/key.ts +++ b/packages/fedify/src/sig/key.ts @@ -11,12 +11,19 @@ import { } from "@fedify/vocab-runtime"; import { getLogger } from "@logtape/logtape"; import { + type MeterProvider, SpanKind, SpanStatusCode, trace, type TracerProvider, } from "@opentelemetry/api"; import metadata from "../../deno.json" with { type: "json" }; +import { + classifyFetchError, + getDurationMs, + type KeyLookupResult, + recordKeyLookup, +} from "../federation/metrics.ts"; /** * Checks if the given key is valid and supported. No-op if the key is valid, @@ -176,6 +183,14 @@ export interface FetchKeyOptions { * @since 1.3.0 */ tracerProvider?: TracerProvider; + + /** + * The OpenTelemetry meter provider to use for recording + * `activitypub.key.lookup` and `activitypub.key.lookup.duration`. If + * omitted, the global meter provider is used. + * @since 2.3.0 + */ + meterProvider?: MeterProvider; } /** @@ -538,45 +553,74 @@ async function fetchKeyWithResult< logger: ReturnType, ) => Promise | TResult, ): Promise { - const logger = getLogger(["fedify", "sig", "key"]); - const keyId = cacheKey.href; - const keyCache = options.keyCache as FetchErrorMetadataCache | undefined; - const cached = await getCachedFetchKey( - cacheKey, - keyId, - cls, - keyCache, - logger, - ); - if (cached?.key === null && cached.cached) { - return await onCachedUnavailable(cacheKey, keyId, keyCache, logger); - } - if (cached != null) return cached as TResult; - logger.debug("Fetching key {keyId} to verify signature...", { keyId }); - let document: unknown; + const start = performance.now(); + let outcome: { result: KeyLookupResult; statusCode?: number } = { + result: "error", + }; try { - const remoteDocument = - await (options.documentLoader ?? getDocumentLoader())( + const logger = getLogger(["fedify", "sig", "key"]); + const keyId = cacheKey.href; + const keyCache = options.keyCache as FetchErrorMetadataCache | undefined; + const cached = await getCachedFetchKey( + cacheKey, + keyId, + cls, + keyCache, + logger, + ); + if (cached?.key === null && cached.cached) { + const cachedUnavailable = await onCachedUnavailable( + cacheKey, + keyId, + keyCache, + logger, + ); + outcome = { result: "hit" }; + return cachedUnavailable; + } + if (cached != null) { + outcome = { result: "hit" }; + return cached as TResult; + } + logger.debug("Fetching key {keyId} to verify signature...", { keyId }); + let document: unknown; + try { + const remoteDocument = + await (options.documentLoader ?? getDocumentLoader())( + keyId, + ); + document = remoteDocument.document; + } catch (error) { + const classified = classifyFetchError(error); + const errored = await onFetchError( + error, + cacheKey, keyId, + keyCache, + logger, ); - document = remoteDocument.document; - } catch (error) { - return await onFetchError( - error, + outcome = classified; + return errored; + } + const resolved = await resolveFetchedKey( + document, cacheKey, keyId, - keyCache, + cls, + options, logger, ); + outcome = { result: resolved.key != null ? "fetched" : "invalid" }; + return resolved as TResult; + } finally { + recordKeyLookup(options.meterProvider, { + durationMs: getDurationMs(start), + result: outcome.result, + remoteUrl: cacheKey, + cacheEnabled: options.keyCache != null, + statusCode: outcome.statusCode, + }); } - return await resolveFetchedKey( - document, - cacheKey, - keyId, - cls, - options, - logger, - ) as TResult; } async function fetchKeyInternal( From 88a83e2bd0a6e55e877682367267e7a795948580 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 19:48:49 +0900 Subject: [PATCH 05/16] Wire document/context loader metrics into FederationImpl Layer the new OpenTelemetry instrumentation onto the loader factories that `FederationImpl` constructs. The default `documentLoaderFactory` now produces a `kvCache()` configured with `meterProvider` and `kind: "object"`, and a separate default `contextLoaderFactory` does the same with `kind: "context"`. Every factory's output is wrapped in `instrumentDocumentLoader()` so each call records exactly one `activitypub.document.fetch` plus `activitypub.document.fetch.duration`, and the `kvCache()` layer emits `activitypub.document.cache` hit/miss. The authenticated document loader factory is also wrapped, with `cacheEnabled: false` since it intentionally bypasses the cache. `activitypub.cache.enabled` is set to `true` only for the built-in factories where Fedify can guarantee `kvCache()` is in the path; for user-supplied factories caching is opaque, so the attribute is omitted rather than recorded as a confident `true`/`false`. When no `meterProvider` is configured, `instrumentDocumentLoader()` short- circuits and returns the inner loader unchanged, so `ctx.documentLoader` and `ctx.contextLoader` keep strict reference equality with user-supplied loaders for callers that do not use OpenTelemetry. The historical fallback where omitting `contextLoaderFactory` reuses the (possibly user-customised) `documentLoaderFactory` is preserved; the wrapping just relabels the metric attributes to `kind=context` at the call site. The `authenticatedDocumentLoaderFactory` wrapper now also forwards the optional `DocumentLoaderFactoryOptions` second argument so user-supplied authenticated factories still receive their per-call `allowPrivateAddress` / `userAgent` overrides. Three new tests in `middleware.test.ts` cover: - `ctx.documentLoader` records `kind=object` + omitted `cache.enabled` when the user supplies a custom factory. - `ctx.contextLoader` records `kind=context`. - `ctx.documentLoader` and `ctx.contextLoader` are strict-equal to the user-supplied loader when no `meterProvider` is configured. A fourth test verifies that a user-supplied `authenticatedDocumentLoaderFactory` still receives its second `DocumentLoaderFactoryOptions` argument. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- .../fedify/src/federation/middleware.test.ts | 94 ++++++++++++++++++ packages/fedify/src/federation/middleware.ts | 98 +++++++++++++++---- 2 files changed, 174 insertions(+), 18 deletions(-) diff --git a/packages/fedify/src/federation/middleware.test.ts b/packages/fedify/src/federation/middleware.test.ts index f2663e94e..39bb286a3 100644 --- a/packages/fedify/src/federation/middleware.test.ts +++ b/packages/fedify/src/federation/middleware.test.ts @@ -6491,3 +6491,97 @@ test("KvSpecDeterminer", async (t) => { assertEquals(spec, "rfc9421"); }); }); + +test("createFederation() instruments documentLoader with activitypub.document.fetch", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const kv = new MemoryKvStore(); + const federation = createFederation({ + kv, + meterProvider, + documentLoaderFactory: () => mockDocumentLoader, + contextLoaderFactory: () => mockDocumentLoader, + }); + const ctx = federation.createContext( + new URL("https://example.com/"), + undefined, + ); + await ctx.documentLoader("https://example.com/object"); + + const counters = recorder.getMeasurements("activitypub.document.fetch"); + assertEquals(counters.length, 1); + assertEquals(counters[0].attributes["activitypub.lookup.kind"], "object"); + assertEquals(counters[0].attributes["activitypub.lookup.result"], "fetched"); + assertEquals( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + // User-supplied factory: cacheEnabled is unknown, attribute is omitted. + assertFalse("activitypub.cache.enabled" in counters[0].attributes); +}); + +test("createFederation() records kind=context on contextLoader fetches", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const kv = new MemoryKvStore(); + const federation = createFederation({ + kv, + meterProvider, + documentLoaderFactory: () => mockDocumentLoader, + contextLoaderFactory: () => mockDocumentLoader, + }); + const ctx = federation.createContext( + new URL("https://example.com/"), + undefined, + ); + await ctx.contextLoader("https://example.com/object"); + + const counters = recorder.getMeasurements("activitypub.document.fetch"); + assertEquals(counters.length, 1); + assertEquals(counters[0].attributes["activitypub.lookup.kind"], "context"); +}); + +test("createFederation() forwards DocumentLoaderFactoryOptions to a user-supplied authenticatedDocumentLoaderFactory", () => { + const kv = new MemoryKvStore(); + const seen: Array = []; + const federation = createFederation({ + kv, + authenticatedDocumentLoaderFactory: (_identity, opts) => { + seen.push(opts); + return mockDocumentLoader; + }, + }); + // FederationImpl exposes the factory directly on the instance. + const impl = federation as unknown as { + authenticatedDocumentLoaderFactory: ( + identity: { keyId: URL; privateKey: CryptoKey }, + opts?: { allowPrivateAddress?: boolean; userAgent?: string }, + ) => unknown; + }; + impl.authenticatedDocumentLoaderFactory( + { + keyId: new URL("https://example.com/users/alice#main-key"), + // deno-lint-ignore no-explicit-any + privateKey: {} as any, + }, + { allowPrivateAddress: true, userAgent: "test-ua" }, + ); + assertEquals(seen.length, 1); + assertEquals(seen[0], { allowPrivateAddress: true, userAgent: "test-ua" }); +}); + +test("createFederation() omits instrumentation when no meterProvider is set", () => { + // Sanity: without a meterProvider, ctx.documentLoader must be the same + // function reference as the user-supplied loader, so the wrapper is a + // true no-op for non-OTel users. + const kv = new MemoryKvStore(); + const federation = createFederation({ + kv, + documentLoaderFactory: () => mockDocumentLoader, + contextLoaderFactory: () => mockDocumentLoader, + }); + const ctx = federation.createContext( + new URL("https://example.com/"), + undefined, + ); + assertStrictEquals(ctx.documentLoader, mockDocumentLoader); + assertStrictEquals(ctx.contextLoader, mockDocumentLoader); +}); diff --git a/packages/fedify/src/federation/middleware.ts b/packages/fedify/src/federation/middleware.ts index ec5d2629d..80f94c15c 100644 --- a/packages/fedify/src/federation/middleware.ts +++ b/packages/fedify/src/federation/middleware.ts @@ -107,6 +107,7 @@ import { getDurationMs, getFederationMetrics, getRemoteHost, + instrumentDocumentLoader, isAbortError, type QueueTaskCommonAttributes, type QueueTaskResult, @@ -369,24 +370,78 @@ export class FederationImpl } const { allowPrivateAddress, userAgent } = options; this.allowPrivateAddress = allowPrivateAddress ?? false; - this.documentLoaderFactory = options.documentLoaderFactory ?? - ((opts) => { - return kvCache({ - loader: getDocumentLoader({ - allowPrivateAddress: opts?.allowPrivateAddress ?? - allowPrivateAddress, - userAgent: opts?.userAgent ?? userAgent, - }), - kv: options.kv, - prefix: this.kvPrefixes.remoteDocument, - }); + // The loader factory closures below read `this._meterProvider` at + // call time, not when they are created. Factories are only invoked + // after the constructor has assigned `_meterProvider` (see below), so + // the lookup is safe; no eager assignment is needed here. + const userDocumentLoaderFactory = options.documentLoaderFactory; + const userContextLoaderFactory = options.contextLoaderFactory; + const userAuthFactory = options.authenticatedDocumentLoaderFactory; + const builtinDocumentLoaderFactory: DocumentLoaderFactory = (opts) => + kvCache({ + loader: getDocumentLoader({ + allowPrivateAddress: opts?.allowPrivateAddress ?? + allowPrivateAddress, + userAgent: opts?.userAgent ?? userAgent, + }), + kv: options.kv, + prefix: this.kvPrefixes.remoteDocument, + meterProvider: this._meterProvider, + kind: "object", }); - this.contextLoaderFactory = options.contextLoaderFactory ?? - this.documentLoaderFactory; - this.authenticatedDocumentLoaderFactory = - options.authenticatedDocumentLoaderFactory ?? - ((identity) => - getAuthenticatedDocumentLoader(identity, { + const builtinContextLoaderFactory: DocumentLoaderFactory = (opts) => + kvCache({ + loader: getDocumentLoader({ + allowPrivateAddress: opts?.allowPrivateAddress ?? + allowPrivateAddress, + userAgent: opts?.userAgent ?? userAgent, + }), + kv: options.kv, + prefix: this.kvPrefixes.remoteDocument, + meterProvider: this._meterProvider, + kind: "context", + }); + // Only the built-in factories use `kvCache()`, so we can confidently + // record `activitypub.cache.enabled=true` for them; user-supplied + // factories may or may not cache, so the attribute is omitted. + this.documentLoaderFactory = (opts) => + instrumentDocumentLoader( + (userDocumentLoaderFactory ?? builtinDocumentLoaderFactory)(opts), + { + meterProvider: this._meterProvider, + kind: "object", + cacheEnabled: userDocumentLoaderFactory == null ? true : undefined, + }, + ); + // When the user customises `documentLoaderFactory` but not + // `contextLoaderFactory`, Fedify has historically fallen back to the + // (customised) document factory rather than the built-in one so + // context fetches inherit the user's settings. Preserve that + // semantic, just with a separate instrumentation kind so the metric + // attributes reflect the call-site intent. + const resolvedContextLoaderFactory: DocumentLoaderFactory = + userContextLoaderFactory ?? userDocumentLoaderFactory ?? + builtinContextLoaderFactory; + this.contextLoaderFactory = (opts) => + instrumentDocumentLoader( + resolvedContextLoaderFactory(opts), + { + meterProvider: this._meterProvider, + kind: "context", + cacheEnabled: (userContextLoaderFactory == null && + userDocumentLoaderFactory == null) + ? true + : undefined, + }, + ); + this.authenticatedDocumentLoaderFactory = (identity, factoryOpts) => + instrumentDocumentLoader( + userAuthFactory != null + // Forward `factoryOpts` so user-supplied factories receive the + // per-call `DocumentLoaderFactoryOptions` (allowPrivateAddress, + // userAgent) just like they did before this wrapper was added. + ? userAuthFactory(identity, factoryOpts) + : getAuthenticatedDocumentLoader(identity, { allowPrivateAddress, userAgent, specDeterminer: new KvSpecDeterminer( @@ -395,7 +450,14 @@ export class FederationImpl options.firstKnock, ), tracerProvider: this.tracerProvider, - })); + }), + { + meterProvider: this._meterProvider, + kind: "object", + // The authenticated document loader does not cache. + cacheEnabled: userAuthFactory == null ? false : undefined, + }, + ); this.userAgent = userAgent; this.onOutboxError = options.onOutboxError; this.permanentFailureStatusCodes = options.permanentFailureStatusCodes ?? From 26e238b9f633de59c58d2e347441ae7d4cab2d4f Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 20:11:38 +0900 Subject: [PATCH 06/16] Add activitypub.object.lookup counter to lookupObject() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Record a new `activitypub.object.lookup` counter at every `lookupObject()` call so operators can see how many lookups resolved to an Actor, to a non-actor Object, or to nothing — without having to mix that classification into the loader-level `activitypub.document.fetch` counter (which would over-count unique remote fetches). The counter takes `activitypub.lookup.kind` ∈ `{actor, object, other}` and an optional `activitypub.remote.host` (hostname only). The host-extraction helper handles `URL` instances, plain `https://…` strings, `acct:user@host` URIs (both `URL` and string forms), and bare fediverse handles (`@user@host`, `user@host`). Anything that does not reduce cleanly to an authority — paths, query strings, fragments, or whitespace mixed in with the handle suffix — is dropped from the metric attribute rather than recorded, so adversarial inputs cannot inflate cardinality on `activitypub.remote.host`. The metric is opt-in: `lookupObject()` only emits when its caller passes a `meterProvider`. `Context.lookupObject()` threads `this.meterProvider` through so Federation-managed call sites get the counter automatically; direct callers of `lookupObject()` that do not set up OpenTelemetry pay nothing. The instrument lives in `@fedify/vocab` (the same package as `lookupObject` itself), so the recording site does not introduce a circular dependency on `@fedify/fedify`. The counter is created lazily per `MeterProvider` via a `WeakMap` cache that mirrors the `getFederationMetrics` pattern. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- packages/fedify/src/federation/middleware.ts | 1 + packages/vocab/src/lookup.test.ts | 164 +++++++++++++++++++ packages/vocab/src/lookup.ts | 104 +++++++++++- 3 files changed, 268 insertions(+), 1 deletion(-) diff --git a/packages/fedify/src/federation/middleware.ts b/packages/fedify/src/federation/middleware.ts index 80f94c15c..1fe2d62ad 100644 --- a/packages/fedify/src/federation/middleware.ts +++ b/packages/fedify/src/federation/middleware.ts @@ -2497,6 +2497,7 @@ export class ContextImpl implements Context { contextLoader: options.contextLoader ?? this.contextLoader, userAgent: options.userAgent ?? this.federation.userAgent, tracerProvider: options.tracerProvider ?? this.tracerProvider, + meterProvider: options.meterProvider ?? this.meterProvider, // @ts-ignore: `allowPrivateAddress` is not in the type definition. allowPrivateAddress: this.federation.allowPrivateAddress, }); diff --git a/packages/vocab/src/lookup.test.ts b/packages/vocab/src/lookup.test.ts index 6429bdb93..46b42a62e 100644 --- a/packages/vocab/src/lookup.test.ts +++ b/packages/vocab/src/lookup.test.ts @@ -1,4 +1,5 @@ import { + createTestMeterProvider, createTestTracerProvider, mockDocumentLoader, test, @@ -693,4 +694,167 @@ test("lookupObject() records OpenTelemetry span events", async () => { deepStrictEqual(recordedObject.id, "https://example.com/object"); }); +test("lookupObject() records activitypub.object.lookup counter", { + sanitizeResources: false, + sanitizeOps: false, +}, async (t) => { + fetchMock.spyGlobal(); + fetchMock.removeRoutes(); + fetchMock.get( + "begin:https://example.com/.well-known/webfinger", + { + subject: "acct:johndoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/person", + type: "application/activity+json", + }, + ], + }, + ); + + await t.step("records kind=actor for an actor result", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const person = await lookupObject("@johndoe@example.com", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + assertInstanceOf(person, Person); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual(counters[0].type, "counter"); + deepStrictEqual(counters[0].value, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "actor", + ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + // No `activitypub.document.fetch` measurement should be recorded by + // lookupObject itself; that lives on the document-loader path. + deepStrictEqual( + recorder.getMeasurements("activitypub.document.fetch").length, + 0, + ); + }); + + await t.step("records kind=object for a non-actor object", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const object = await lookupObject("https://example.com/object", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + assertInstanceOf(object, Object); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "object", + ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + }); + + await t.step("records kind=other on null result", async () => { + fetchMock.removeRoutes(); + fetchMock.get("begin:https://example.com/.well-known/webfinger", { + subject: "acct:janedoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/404", + type: "application/activity+json", + }, + ], + }); + const [meterProvider, recorder] = createTestMeterProvider(); + const result = await lookupObject("janedoe@example.com", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + deepStrictEqual(result, null); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "other", + ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + }); + + await t.step("omits counter when no meterProvider is provided", async () => { + const [_unused, recorder] = createTestMeterProvider(); + await lookupObject("https://example.com/object", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + }); + deepStrictEqual( + recorder.getMeasurements("activitypub.object.lookup").length, + 0, + ); + }); + + await t.step( + "extracts host from a URL acct: instance", + async () => { + fetchMock.removeRoutes(); + fetchMock.get("begin:https://example.com/.well-known/webfinger", { + subject: "acct:johndoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/person", + type: "application/activity+json", + }, + ], + }); + const [meterProvider, recorder] = createTestMeterProvider(); + await lookupObject(new URL("acct:johndoe@example.com"), { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + const counter = recorder.getMeasurement("activitypub.object.lookup"); + deepStrictEqual( + counter?.attributes["activitypub.remote.host"], + "example.com", + ); + }, + ); + + await t.step( + "omits host when the identifier carries path/query characters", + async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + // The bare-handle parser must refuse to put a path-bearing string + // like "example.com/leak" into the metric attribute. + await lookupObject("johndoe@example.com/leak", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + const counter = recorder.getMeasurement("activitypub.object.lookup"); + ok(counter != null); + deepStrictEqual( + "activitypub.remote.host" in counter.attributes, + false, + "high-cardinality handle suffixes must not be recorded as remote.host", + ); + }, + ); + + fetchMock.removeRoutes(); +}); + // cSpell: ignore gildong diff --git a/packages/vocab/src/lookup.ts b/packages/vocab/src/lookup.ts index 43262a3c7..502d059b0 100644 --- a/packages/vocab/src/lookup.ts +++ b/packages/vocab/src/lookup.ts @@ -6,13 +6,95 @@ import { } from "@fedify/vocab-runtime"; import { lookupWebFinger } from "@fedify/webfinger"; import { getLogger } from "@logtape/logtape"; -import { SpanStatusCode, trace, type TracerProvider } from "@opentelemetry/api"; +import { + type Attributes, + type Counter, + type MeterProvider, + SpanStatusCode, + trace, + type TracerProvider, +} from "@opentelemetry/api"; import { delay } from "es-toolkit"; import metadata from "../deno.json" with { type: "json" }; +import { isActor } from "./actor.ts"; import { toAcctUrl } from "./handle.ts"; import { getTypeId } from "./type.ts"; import { type Collection, type Link, Object } from "./vocab.ts"; +/** + * The classification of an ActivityStreams lookup performed by + * {@link lookupObject}, recorded as `activitypub.lookup.kind` on the + * `activitypub.object.lookup` counter. + * + * - `actor`: the resolved object is an {@link import("./actor.ts").Actor}. + * - `object`: the resolved object is a non-actor ActivityStreams object. + * - `other`: the lookup did not resolve to an object (not found, network + * failure, parse failure). + * @since 2.3.0 + */ +export type ObjectLookupKind = "actor" | "object" | "other"; + +const objectLookupCounters = new WeakMap(); + +function getObjectLookupCounter(meterProvider: MeterProvider): Counter { + let counter = objectLookupCounters.get(meterProvider); + if (counter == null) { + counter = meterProvider + .getMeter(metadata.name, metadata.version) + .createCounter("activitypub.object.lookup", { + description: + "ActivityStreams object lookups via lookupObject(), classified " + + "by whether the resolved value is an Actor.", + unit: "{lookup}", + }); + objectLookupCounters.set(meterProvider, counter); + } + return counter; +} + +function getLookupRemoteHost(identifier: string | URL): string | undefined { + let url: URL | undefined; + if (identifier instanceof URL) { + url = identifier; + } else { + try { + url = new URL(identifier); + } catch { + // Not a URL — try to interpret as a bare fediverse handle below. + const stripped = identifier.startsWith("@") + ? identifier.slice(1) + : identifier; + return extractHandleHost(stripped); + } + } + if (url.hostname !== "") return url.hostname; + // `acct:` URIs are opaque (no `//host` form), so the URL hostname is + // empty. The user and authority live in `url.pathname` as + // `user@host`; reuse the same handle-extraction logic, which both + // takes only the substring after the last `@` and refuses to record + // anything that looks like a path / query / fragment rather than a + // bare hostname. + if (url.protocol === "acct:") return extractHandleHost(url.pathname); + return undefined; +} + +function extractHandleHost(handle: string): string | undefined { + const at = handle.lastIndexOf("@"); + if (at < 0 || at >= handle.length - 1) return undefined; + const candidate = handle.slice(at + 1); + // Reject anything that is not just an authority — paths, queries, + // fragments, and spaces would all leak high-cardinality metadata into + // the metric attribute, so we drop the host entirely in those cases. + if (/[/?#\s]/.test(candidate)) return undefined; + // Round-trip through `URL` so the parser validates the authority and + // strips any port/userinfo before we record it. + try { + return new URL(`https://${candidate}`).hostname || undefined; + } catch { + return undefined; + } +} + const logger = getLogger(["fedify", "vocab", "lookup"]); /** @@ -67,6 +149,15 @@ export interface LookupObjectOptions { */ tracerProvider?: TracerProvider; + /** + * The OpenTelemetry meter provider used to record the + * `activitypub.object.lookup` counter. If omitted, the counter is not + * emitted at all (the helper is opt-in to avoid touching the global + * meter provider for callers that do not use OpenTelemetry). + * @since 2.3.0 + */ + meterProvider?: MeterProvider; + /** * AbortSignal for cancelling the request. * @since 1.8.0 @@ -118,6 +209,7 @@ export async function lookupObject( return await tracer.startActiveSpan( "activitypub.lookup_object", async (span) => { + let kind: ObjectLookupKind = "other"; try { const result = await lookupObjectInternal(identifier, options); if (result == null) span.setStatus({ code: SpanStatusCode.ERROR }); @@ -140,6 +232,8 @@ export async function lookupObject( await result.toJsonLd(options), ), }); + + kind = isActor(result) ? "actor" : "object"; } return result; } catch (error) { @@ -150,6 +244,14 @@ export async function lookupObject( }); throw error; } finally { + if (options.meterProvider != null) { + const attributes: Attributes = { + "activitypub.lookup.kind": kind, + }; + const host = getLookupRemoteHost(identifier); + if (host != null) attributes["activitypub.remote.host"] = host; + getObjectLookupCounter(options.meterProvider).add(1, attributes); + } span.end(); } }, From 2fce111d183ef9ad91e80059696d48f273dc027a Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 20:24:26 +0900 Subject: [PATCH 07/16] Document new key/document/object lookup metrics Extend `docs/manual/opentelemetry.md` with the new instruments introduced for issue #738. The "Instrumented metrics" table gains six rows for `activitypub.key.lookup`, `activitypub.key.lookup.duration`, `activitypub.document.fetch`, `activitypub.document.fetch.duration`, `activitypub.document.cache`, and `activitypub.object.lookup`, and each receives its own attribute subsection. The prose explains the shared bounded `activitypub.lookup.kind` and `activitypub.lookup.result` taxonomy, when `activitypub.cache.enabled` and `http.response.status_code` are recorded, what is deliberately excluded from the metric attributes for cardinality discipline, and how the new `activitypub.key.lookup.duration` complements the signature-scoped `activitypub.signature.key_fetch.duration` histogram that already covers the HTTP / Linked Data / Object Integrity slice. `CHANGES.md` gets a 2.3.0 entry under `@fedify/fedify` summarising the new instruments, the attribute discipline, and the relationship to the existing signature-scoped key-fetch histogram, with a link reference to issue #738. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- CHANGES.md | 39 +++++++++++ docs/manual/opentelemetry.md | 125 +++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index a0fb2cad7..8ae4080b3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -113,11 +113,50 @@ To be released. and other high-cardinality identifiers are deliberately excluded from the fanout histogram. [[#316], [#742], [#770]] + - Added OpenTelemetry metrics for public key lookups, remote JSON-LD + document fetches, and `lookupObject()` calls so operators can + observe how often Fedify hits the cache, how long remote fetches + take, and how `lookupObject()` resolutions split between actors, + non-actor objects, and unresolved lookups: + + - `activitypub.key.lookup` (counter) and + `activitypub.key.lookup.duration` (histogram) cover every + public key lookup performed by `fetchKey()` / + `fetchKeyDetailed()`, including signature verification paths. + - `activitypub.document.fetch` (counter) and + `activitypub.document.fetch.duration` (histogram) cover every + Fedify-wrapped document or context loader invocation, including + the authenticated loader. + - `activitypub.document.cache` (counter) records `hit` or `miss` + for each `kvCache()`-backed cache lookup. + - `activitypub.object.lookup` (counter) records the + parsed-result classification of every `lookupObject()` call as + `actor`, `object`, or `other`. + + Instruments share an `activitypub.lookup.kind` and (where + applicable) `activitypub.lookup.result` attribute drawn from small, + spec-bounded enumerations. `activitypub.remote.host` records the + URL hostname only; `http.response.status_code` is recorded when an + HTTP response was observed; `activitypub.cache.enabled` is + recorded on the key and document fetch metrics whenever Fedify can + confidently report the cache layer's presence. Key IDs, actor + IDs, object IDs, JSON-LD context URLs, full URLs, and fediverse + handles are deliberately excluded so attacker-controlled remotes + cannot inflate metric cardinality. The existing + `activitypub.signature.key_fetch.duration` histogram (introduced in + Fedify 2.3 for signature-scoped key-fetch latency, sliced by + `activitypub.signature.kind`) remains in place; the new + `activitypub.key.lookup.duration` is the general-purpose + histogram that covers non-signature key fetches as well and adds + `http.response.status_code` and a richer + `activitypub.lookup.result` taxonomy. [[#316], [#738]] + [#316]: https://github.com/fedify-dev/fedify/issues/316 [#619]: https://github.com/fedify-dev/fedify/issues/619 [#735]: https://github.com/fedify-dev/fedify/issues/735 [#736]: https://github.com/fedify-dev/fedify/issues/736 [#737]: https://github.com/fedify-dev/fedify/issues/737 +[#738]: https://github.com/fedify-dev/fedify/issues/738 [#740]: https://github.com/fedify-dev/fedify/issues/740 [#742]: https://github.com/fedify-dev/fedify/issues/742 [#748]: https://github.com/fedify-dev/fedify/pull/748 diff --git a/docs/manual/opentelemetry.md b/docs/manual/opentelemetry.md index fe38b3d78..507a525e2 100644 --- a/docs/manual/opentelemetry.md +++ b/docs/manual/opentelemetry.md @@ -308,6 +308,12 @@ Fedify records the following OpenTelemetry metrics: | `activitypub.signature.verification_failure` | Counter | `{failure}` | Counts failed signature verification for inbox requests. | | `activitypub.signature.verification.duration` | Histogram | `ms` | Measures signature verification duration across HTTP, Linked Data, and Object Integrity Proofs. | | `activitypub.signature.key_fetch.duration` | Histogram | `ms` | Measures public key lookup duration during signature verification. | +| `activitypub.key.lookup` | Counter | `{lookup}` | Counts public key lookups performed by `fetchKey()` / `fetchKeyDetailed()`. | +| `activitypub.key.lookup.duration` | Histogram | `ms` | Measures public key lookup duration, including cache hits and remote fetches. | +| `activitypub.document.fetch` | Counter | `{fetch}` | Counts remote JSON-LD document loader invocations made by Fedify-wrapped loaders. | +| `activitypub.document.fetch.duration` | Histogram | `ms` | Measures remote JSON-LD document loader invocation duration. | +| `activitypub.document.cache` | Counter | `{lookup}` | Counts KV-backed document loader cache lookups, classified as `hit` or `miss`. | +| `activitypub.object.lookup` | Counter | `{lookup}` | Counts `lookupObject()` calls, classified by whether the resolved value is an Actor. | | `fedify.http.server.request.count` | Counter | `{request}` | Counts inbound HTTP requests handled by `Federation.fetch()`. | | `fedify.http.server.request.duration` | Histogram | `ms` | Measures inbound HTTP request duration in `Federation.fetch()`. | | `fedify.queue.task.enqueued` | Counter | `{task}` | Counts inbox, outbox, and fanout tasks Fedify enqueued. | @@ -473,6 +479,113 @@ Fedify records the following OpenTelemetry metrics: for the stale attempt and one `fetched` for the freshly fetched retry) alongside the single verification measurement that covers both. +`activitypub.key.lookup` and `activitypub.key.lookup.duration` +: `activitypub.lookup.kind` is always `public_key` on these metrics; the + enumeration also covers `actor`, `object`, `context`, and `other` for + the document-fetch and lookup-object families described below. + `activitypub.lookup.result` is always present and is one of: + + - `hit`: the key was served from the configured `KeyCache` — either + a valid cached key or a cached negative entry recording a prior + failed fetch. + - `fetched`: the key was not in the cache and was loaded through + the document loader, returning a usable key. + - `not_found`: the remote responded with `404 Not Found` or + `410 Gone`. Recorded together with `http.response.status_code`. + - `invalid`: the remote responded with a payload Fedify could not + parse into a `CryptographicKey` or `Multikey`. + - `network_error`: no HTTP response was received — DNS, connect, + TLS, redirect-loop, or aborted-fetch failures all fall into this + bucket via the shared error classifier. + - `error`: any other unexpected failure (non-2xx HTTP response that + is neither `404` nor `410`, thrown exceptions that are not + recognised as transport failures, etc.). + + `activitypub.cache.enabled` is always present and is `true` when the + caller passed a `KeyCache`, `false` otherwise. `activitypub.remote.host` + is the hostname of the key URL. `http.response.status_code` is + present only when an HTTP response was observed. Key IDs, full key + URLs, and actor IDs are deliberately excluded from these metrics — + they remain on the `activitypub.fetch_key` span for trace-level + investigation. + + These metrics complement + [`activitypub.signature.key_fetch.duration`](#instrumented-metrics). + The signature-scoped histogram keeps an `activitypub.signature.kind` + dimension and is the right metric to slice signature verification + latency by `http` / `linked_data` / `object_integrity`; the new + `activitypub.key.lookup*` metrics cover *every* key lookup performed + by Fedify (including non-signature uses such as direct `fetchKey()` + calls) and add a bounded HTTP `status_code` and richer + `lookup.result` taxonomy. + +`activitypub.document.fetch` and `activitypub.document.fetch.duration` +: `activitypub.lookup.kind` is always present and is one of `object` + (Fedify's generic document loader), `context` (the JSON-LD context + loader), or `other` (callers that supply a custom kind hint). + Actor documents fetched through the generic loader are still + classified as `object` at this layer because the kind is decided at + the loader boundary, *before* the response is parsed; the + [`activitypub.object.lookup`](#instrumented-metrics) counter + provides the parsed-result actor / object split. + + `activitypub.lookup.result` is always present and is one of + `fetched`, `not_found` (with `http.response.status_code`), + `network_error`, or `error`. The shared error classifier only + surfaces these four values at the loader boundary — `invalid` is + reserved for the key lookup metrics, where the parser can decide + that a successful HTTP response still does not contain a usable + key. `activitypub.remote.host` records the hostname of the + fetched URL when the URL parses; otherwise it is omitted. + `activitypub.cache.enabled` is `true` for Fedify's built-in + `kvCache()`-backed document and context loaders and `false` for the + authenticated document loader; for user-supplied factories Fedify + cannot introspect caching behavior, so the attribute is omitted + rather than recorded as a confident `true` or `false`. + + Counter and histogram are always emitted together for one wrapped + loader call, so dashboards can compute average duration as + `duration_sum / counter`. Document IDs, JSON-LD context URLs, and + full request URLs are deliberately excluded; the + `activitypub.fetch_document` span keeps the full URL for sampled + traces. + +`activitypub.document.cache` +: `activitypub.lookup.kind` is always present (same values as + `activitypub.document.fetch`). `activitypub.lookup.result` is + `hit` when the KV cache returned a `RemoteDocument` and `miss` + when it did not. Cache lookups that bypass the KV cache entirely — + preloaded JSON-LD contexts and call sites without a matching cache + rule — emit no measurement. `activitypub.remote.host` records the + hostname of the looked-up URL when it parses. + +`activitypub.object.lookup` +: `activitypub.lookup.kind` is always present and is one of: + + - `actor`: `lookupObject()` resolved to an `Actor` subtype + (`Application`, `Group`, `Organization`, `Person`, `Service`). + - `object`: `lookupObject()` resolved to a non-actor + `Object` subtype. + - `other`: `lookupObject()` returned `null` (the document could + not be fetched, the response could not be parsed, or the + cross-origin check rejected the resolved object) **or** the + call threw before resolving an object. The metric is emitted + in a `finally` block, so a thrown error is still counted with + `kind=other`. + + `activitypub.remote.host` is the hostname extracted from the + identifier — a parsed `URL`, an `acct:user@host` URI, or a bare + `@user@host` / `user@host` handle. Inputs that do not reduce + cleanly to an authority (paths, query strings, fragments, or + whitespace mixed in with the handle suffix) result in the + attribute being omitted, rather than recording a high-cardinality + value. This counter has no companion histogram: `lookupObject()` + drives `activitypub.document.fetch.duration` through the document + loader, and emitting another duration here would double-count + latency. Use `activitypub.object.lookup` for the parsed-result + classification and `activitypub.document.fetch[.duration]` for + the loader-level rate and latency. + `fedify.http.server.request.count` and `fedify.http.server.request.duration` : `http.request.method` and `fedify.endpoint` are always present. `http.request.method` is normalized to one of the standard HTTP methods @@ -557,6 +670,18 @@ and query strings are deliberately excluded to keep metric cardinality bounded. Activity types use the same qualified URI form as Fedify's trace attributes, for example `https://www.w3.org/ns/activitystreams#Create`. +The key lookup, document fetch, document cache, and object lookup metrics +share an `activitypub.lookup.kind` and (where applicable) +`activitypub.lookup.result` attribute taxonomy. Both are drawn from small +fixed enumerations — `kind` ∈ `{public_key, actor, object, context, other}` +and `result` ∈ `{hit, miss, fetched, not_found, invalid, network_error, error}` +— so an attacker-controlled remote cannot inflate cardinality by returning +arbitrary status codes, content types, or thrown exceptions. Full URLs, key +IDs, actor IDs, object IDs, JSON-LD context URLs, and fediverse handles are +deliberately excluded; they remain on the corresponding spans +(`activitypub.fetch_key`, `activitypub.fetch_document`, +`activitypub.lookup_object`) for trace-level investigation. + The HTTP server request metrics deliberately exclude high-cardinality fields such as the full URL, raw path, query string, actor identifier, and inbox URL. Use the request span's `url.full` attribute when you need the exact URL From 47d1246301546405445a9cbc9df4d3e127d24d11 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 20:39:09 +0900 Subject: [PATCH 08/16] Clarify opt-in vs global-fallback metric semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a `[!NOTE]` block to the "Explicit MeterProvider configuration" section that calls out which metrics are opt-in (only emit when an explicit `meterProvider` is configured) and which follow the standard "fall back to the global `MeterProvider`" behavior. Only `activitypub.document.fetch[.duration]` and `activitypub.document.cache` are opt-in inside Fedify, because the `instrumentDocumentLoader()` wrapper short-circuits to the original loader when no meter provider is supplied so that `ctx.documentLoader === userLoader` continues to hold and existing reference-identity tests do not break. Every other metric — including `activitypub.key.lookup[.duration]` and `activitypub.object.lookup` when reached through `Context.lookupObject()` — uses Fedify's standard global-provider fallback because the recording helpers do not short-circuit and `Context.meterProvider` resolves to the global default when nothing was configured. A trailing sentence clarifies that direct `lookupObject()` calls from `@fedify/vocab` (bypassing the Federation context) still require an explicit `LookupObjectOptions.meterProvider` since they have no Federation-level provider to fall back on. https://github.com/fedify-dev/fedify/issues/316 https://github.com/fedify-dev/fedify/issues/738 Assisted-by: Claude Code:claude-opus-4-7 Assisted-by: Codex:gpt-5.5 --- docs/manual/opentelemetry.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/manual/opentelemetry.md b/docs/manual/opentelemetry.md index 507a525e2..fb73ba30c 100644 --- a/docs/manual/opentelemetry.md +++ b/docs/manual/opentelemetry.md @@ -173,6 +173,26 @@ const federation = createFederation({ }); ~~~~ +> [!NOTE] +> The document and context loader metrics +> (`activitypub.document.fetch[.duration]` and +> `activitypub.document.cache`) are opt-in inside Fedify: they are +> emitted only when `meterProvider` is explicitly configured on +> `createFederation()`. Omitting it preserves strict reference identity +> for `Context.documentLoader`, `Context.contextLoader`, and the +> authenticated document loader (`ctx.documentLoader === userLoader`), +> so existing test code that asserts identity on a user-supplied +> factory's output continues to work. The other metrics (delivery, +> inbox, outbox, fanout, queue, HTTP server, signature verification, +> signature key fetch, public key lookup, and `lookupObject` actor +> classification) follow the standard “fall back to the global +> [`MeterProvider`]” behavior described above. Calling +> `lookupObject()` directly from `@fedify/vocab` (without going through +> a `Context`) still requires an explicit +> `LookupObjectOptions.meterProvider` to emit +> `activitypub.object.lookup`; `Context.lookupObject()` threads the +> Federation's meter provider through automatically. + [`MeterProvider`]: https://open-telemetry.github.io/opentelemetry-js/interfaces/_opentelemetry_api._opentelemetry_api.MeterProvider.html From 3df79b5176e6cfe140a3089633f9b871ad29e46d Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 20:48:09 +0900 Subject: [PATCH 09/16] Add a PR link to the changelog --- CHANGES.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 8ae4080b3..7ddf9d76e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -149,7 +149,7 @@ To be released. `activitypub.key.lookup.duration` is the general-purpose histogram that covers non-signature key fetches as well and adds `http.response.status_code` and a richer - `activitypub.lookup.result` taxonomy. [[#316], [#738]] + `activitypub.lookup.result` taxonomy. [[#316], [#738], [#771]] [#316]: https://github.com/fedify-dev/fedify/issues/316 [#619]: https://github.com/fedify-dev/fedify/issues/619 @@ -167,6 +167,7 @@ To be released. [#759]: https://github.com/fedify-dev/fedify/pull/759 [#769]: https://github.com/fedify-dev/fedify/pull/769 [#770]: https://github.com/fedify-dev/fedify/pull/770 +[#771]: https://github.com/fedify-dev/fedify/pull/771 ### @fedify/fixture From 1ddf69ab5db685936b9a26bde281132d8de679db Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 21:16:47 +0900 Subject: [PATCH 10/16] Reset fetchMock global spy in lookup.test.ts The new `lookupObject() records activitypub.object.lookup counter` test block installs a `fetchMock.spyGlobal()` spy but previously only called `fetchMock.removeRoutes()` in teardown, leaving the global `fetch` spy installed after the block finished. Call `fetchMock.hardReset()` after `removeRoutes()` so the global `fetch` is restored, matching the pattern the file's other `spyGlobal()` blocks already use. https://github.com/fedify-dev/fedify/pull/771#discussion_r3258709646 Assisted-by: Claude Code:claude-opus-4-7 --- packages/vocab/src/lookup.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/vocab/src/lookup.test.ts b/packages/vocab/src/lookup.test.ts index 46b42a62e..2d78f47da 100644 --- a/packages/vocab/src/lookup.test.ts +++ b/packages/vocab/src/lookup.test.ts @@ -855,6 +855,7 @@ test("lookupObject() records activitypub.object.lookup counter", { ); fetchMock.removeRoutes(); + fetchMock.hardReset(); }); // cSpell: ignore gildong From 326c94ad128cefef943587e5cb34f2e8e5a31195 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 21:50:05 +0900 Subject: [PATCH 11/16] Always tear down fetchMock spy in lookup test The new `lookupObject() records activitypub.object.lookup counter` test block calls `fetchMock.spyGlobal()` at the top and pairs it with `fetchMock.removeRoutes(); fetchMock.hardReset();` at the bottom, but the cleanup was unconditional only on the happy path: if any `await t.step(...)` propagated a rejection, the global `fetch` spy stayed installed and leaked into later tests. Wrap the block body in `try { ... } finally { ... }` so the spy is always torn down, even when an assertion inside a step fails. `t.after()` was not used because Deno's `TestContext` does not expose an `after` hook and the fixture wrapper is built around the Deno-native context shape. Existing test steps were re-indented one level deeper to fit inside the try block; no test logic changed. https://github.com/fedify-dev/fedify/pull/771#discussion_r3258830982 Assisted-by: Claude Code:claude-opus-4-7 --- packages/vocab/src/lookup.test.ts | 267 +++++++++++++++--------------- 1 file changed, 137 insertions(+), 130 deletions(-) diff --git a/packages/vocab/src/lookup.test.ts b/packages/vocab/src/lookup.test.ts index 2d78f47da..807da59e0 100644 --- a/packages/vocab/src/lookup.test.ts +++ b/packages/vocab/src/lookup.test.ts @@ -699,117 +699,13 @@ test("lookupObject() records activitypub.object.lookup counter", { sanitizeOps: false, }, async (t) => { fetchMock.spyGlobal(); - fetchMock.removeRoutes(); - fetchMock.get( - "begin:https://example.com/.well-known/webfinger", - { - subject: "acct:johndoe@example.com", - links: [ - { - rel: "self", - href: "https://example.com/person", - type: "application/activity+json", - }, - ], - }, - ); - - await t.step("records kind=actor for an actor result", async () => { - const [meterProvider, recorder] = createTestMeterProvider(); - const person = await lookupObject("@johndoe@example.com", { - documentLoader: mockDocumentLoader, - contextLoader: mockDocumentLoader, - meterProvider, - }); - assertInstanceOf(person, Person); - const counters = recorder.getMeasurements("activitypub.object.lookup"); - deepStrictEqual(counters.length, 1); - deepStrictEqual(counters[0].type, "counter"); - deepStrictEqual(counters[0].value, 1); - deepStrictEqual( - counters[0].attributes["activitypub.lookup.kind"], - "actor", - ); - deepStrictEqual( - counters[0].attributes["activitypub.remote.host"], - "example.com", - ); - // No `activitypub.document.fetch` measurement should be recorded by - // lookupObject itself; that lives on the document-loader path. - deepStrictEqual( - recorder.getMeasurements("activitypub.document.fetch").length, - 0, - ); - }); - - await t.step("records kind=object for a non-actor object", async () => { - const [meterProvider, recorder] = createTestMeterProvider(); - const object = await lookupObject("https://example.com/object", { - documentLoader: mockDocumentLoader, - contextLoader: mockDocumentLoader, - meterProvider, - }); - assertInstanceOf(object, Object); - const counters = recorder.getMeasurements("activitypub.object.lookup"); - deepStrictEqual(counters.length, 1); - deepStrictEqual( - counters[0].attributes["activitypub.lookup.kind"], - "object", - ); - deepStrictEqual( - counters[0].attributes["activitypub.remote.host"], - "example.com", - ); - }); - - await t.step("records kind=other on null result", async () => { + // Cleanup must run even if a test step throws so the global `fetch` + // spy does not leak into subsequent tests. + try { fetchMock.removeRoutes(); - fetchMock.get("begin:https://example.com/.well-known/webfinger", { - subject: "acct:janedoe@example.com", - links: [ - { - rel: "self", - href: "https://example.com/404", - type: "application/activity+json", - }, - ], - }); - const [meterProvider, recorder] = createTestMeterProvider(); - const result = await lookupObject("janedoe@example.com", { - documentLoader: mockDocumentLoader, - contextLoader: mockDocumentLoader, - meterProvider, - }); - deepStrictEqual(result, null); - const counters = recorder.getMeasurements("activitypub.object.lookup"); - deepStrictEqual(counters.length, 1); - deepStrictEqual( - counters[0].attributes["activitypub.lookup.kind"], - "other", - ); - deepStrictEqual( - counters[0].attributes["activitypub.remote.host"], - "example.com", - ); - }); - - await t.step("omits counter when no meterProvider is provided", async () => { - const [_unused, recorder] = createTestMeterProvider(); - await lookupObject("https://example.com/object", { - documentLoader: mockDocumentLoader, - contextLoader: mockDocumentLoader, - }); - deepStrictEqual( - recorder.getMeasurements("activitypub.object.lookup").length, - 0, - ); - }); - - await t.step( - "extracts host from a URL acct: instance", - async () => { - fetchMock.removeRoutes(); - fetchMock.get("begin:https://example.com/.well-known/webfinger", { + fetchMock.get( + "begin:https://example.com/.well-known/webfinger", + { subject: "acct:johndoe@example.com", links: [ { @@ -818,44 +714,155 @@ test("lookupObject() records activitypub.object.lookup counter", { type: "application/activity+json", }, ], + }, + ); + + await t.step("records kind=actor for an actor result", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const person = await lookupObject("@johndoe@example.com", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, }); + assertInstanceOf(person, Person); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual(counters[0].type, "counter"); + deepStrictEqual(counters[0].value, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "actor", + ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + // No `activitypub.document.fetch` measurement should be recorded by + // lookupObject itself; that lives on the document-loader path. + deepStrictEqual( + recorder.getMeasurements("activitypub.document.fetch").length, + 0, + ); + }); + + await t.step("records kind=object for a non-actor object", async () => { const [meterProvider, recorder] = createTestMeterProvider(); - await lookupObject(new URL("acct:johndoe@example.com"), { + const object = await lookupObject("https://example.com/object", { documentLoader: mockDocumentLoader, contextLoader: mockDocumentLoader, meterProvider, }); - const counter = recorder.getMeasurement("activitypub.object.lookup"); + assertInstanceOf(object, Object); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "object", + ); deepStrictEqual( - counter?.attributes["activitypub.remote.host"], + counters[0].attributes["activitypub.remote.host"], "example.com", ); - }, - ); + }); - await t.step( - "omits host when the identifier carries path/query characters", - async () => { + await t.step("records kind=other on null result", async () => { + fetchMock.removeRoutes(); + fetchMock.get("begin:https://example.com/.well-known/webfinger", { + subject: "acct:janedoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/404", + type: "application/activity+json", + }, + ], + }); const [meterProvider, recorder] = createTestMeterProvider(); - // The bare-handle parser must refuse to put a path-bearing string - // like "example.com/leak" into the metric attribute. - await lookupObject("johndoe@example.com/leak", { + const result = await lookupObject("janedoe@example.com", { documentLoader: mockDocumentLoader, contextLoader: mockDocumentLoader, meterProvider, }); - const counter = recorder.getMeasurement("activitypub.object.lookup"); - ok(counter != null); + deepStrictEqual(result, null); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); deepStrictEqual( - "activitypub.remote.host" in counter.attributes, - false, - "high-cardinality handle suffixes must not be recorded as remote.host", + counters[0].attributes["activitypub.lookup.kind"], + "other", ); - }, - ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + }); - fetchMock.removeRoutes(); - fetchMock.hardReset(); + await t.step( + "omits counter when no meterProvider is provided", + async () => { + const [_unused, recorder] = createTestMeterProvider(); + await lookupObject("https://example.com/object", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + }); + deepStrictEqual( + recorder.getMeasurements("activitypub.object.lookup").length, + 0, + ); + }, + ); + + await t.step( + "extracts host from a URL acct: instance", + async () => { + fetchMock.removeRoutes(); + fetchMock.get("begin:https://example.com/.well-known/webfinger", { + subject: "acct:johndoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/person", + type: "application/activity+json", + }, + ], + }); + const [meterProvider, recorder] = createTestMeterProvider(); + await lookupObject(new URL("acct:johndoe@example.com"), { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + const counter = recorder.getMeasurement("activitypub.object.lookup"); + deepStrictEqual( + counter?.attributes["activitypub.remote.host"], + "example.com", + ); + }, + ); + + await t.step( + "omits host when the identifier carries path/query characters", + async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + // The bare-handle parser must refuse to put a path-bearing string + // like "example.com/leak" into the metric attribute. + await lookupObject("johndoe@example.com/leak", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + const counter = recorder.getMeasurement("activitypub.object.lookup"); + ok(counter != null); + deepStrictEqual( + "activitypub.remote.host" in counter.attributes, + false, + "high-cardinality handle suffixes must not be recorded as remote.host", + ); + }, + ); + } finally { + fetchMock.removeRoutes(); + fetchMock.hardReset(); + } }); // cSpell: ignore gildong From 00f7b77606816168555699dad828a2241812a038 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 21:53:08 +0900 Subject: [PATCH 12/16] Rewrite em dashes in opentelemetry.md prose Seven em-dashes in narrative paragraphs of the OpenTelemetry guide used surrounding spaces, which the project style guide explicitly forbids in running prose ("em dashes in narrative text should not have surrounding spaces"). Gemini's code-assist bot flagged all seven on the PR. Rather than just removing the spaces around each em-dash, rewrite the punctuation so the prose does not use em-dashes at all. Em dashes read as LLM-style in this codebase, and the replacements (commas, semicolons, colons, parentheses, or a sentence split) all read better in context. No semantic content changed. https://github.com/fedify-dev/fedify/pull/771#discussion_r3258859469 https://github.com/fedify-dev/fedify/pull/771#discussion_r3258859490 https://github.com/fedify-dev/fedify/pull/771#discussion_r3258859496 https://github.com/fedify-dev/fedify/pull/771#discussion_r3258859502 https://github.com/fedify-dev/fedify/pull/771#discussion_r3258859507 https://github.com/fedify-dev/fedify/pull/771#discussion_r3258859515 https://github.com/fedify-dev/fedify/pull/771#discussion_r3258859526 Assisted-by: Claude Code:claude-opus-4-7 --- docs/manual/opentelemetry.md | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/manual/opentelemetry.md b/docs/manual/opentelemetry.md index fb73ba30c..ed66d95bc 100644 --- a/docs/manual/opentelemetry.md +++ b/docs/manual/opentelemetry.md @@ -505,7 +505,7 @@ Fedify records the following OpenTelemetry metrics: the document-fetch and lookup-object families described below. `activitypub.lookup.result` is always present and is one of: - - `hit`: the key was served from the configured `KeyCache` — either + - `hit`: the key was served from the configured `KeyCache`, either a valid cached key or a cached negative entry recording a prior failed fetch. - `fetched`: the key was not in the cache and was loaded through @@ -514,7 +514,7 @@ Fedify records the following OpenTelemetry metrics: `410 Gone`. Recorded together with `http.response.status_code`. - `invalid`: the remote responded with a payload Fedify could not parse into a `CryptographicKey` or `Multikey`. - - `network_error`: no HTTP response was received — DNS, connect, + - `network_error`: no HTTP response was received. DNS, connect, TLS, redirect-loop, or aborted-fetch failures all fall into this bucket via the shared error classifier. - `error`: any other unexpected failure (non-2xx HTTP response that @@ -525,7 +525,7 @@ Fedify records the following OpenTelemetry metrics: caller passed a `KeyCache`, `false` otherwise. `activitypub.remote.host` is the hostname of the key URL. `http.response.status_code` is present only when an HTTP response was observed. Key IDs, full key - URLs, and actor IDs are deliberately excluded from these metrics — + URLs, and actor IDs are deliberately excluded from these metrics; they remain on the `activitypub.fetch_key` span for trace-level investigation. @@ -552,7 +552,7 @@ Fedify records the following OpenTelemetry metrics: `activitypub.lookup.result` is always present and is one of `fetched`, `not_found` (with `http.response.status_code`), `network_error`, or `error`. The shared error classifier only - surfaces these four values at the loader boundary — `invalid` is + surfaces these four values at the loader boundary; `invalid` is reserved for the key lookup metrics, where the parser can decide that a successful HTTP response still does not contain a usable key. `activitypub.remote.host` records the hostname of the @@ -574,9 +574,9 @@ Fedify records the following OpenTelemetry metrics: : `activitypub.lookup.kind` is always present (same values as `activitypub.document.fetch`). `activitypub.lookup.result` is `hit` when the KV cache returned a `RemoteDocument` and `miss` - when it did not. Cache lookups that bypass the KV cache entirely — - preloaded JSON-LD contexts and call sites without a matching cache - rule — emit no measurement. `activitypub.remote.host` records the + when it did not. Cache lookups that bypass the KV cache entirely + (preloaded JSON-LD contexts and call sites without a matching cache + rule) emit no measurement. `activitypub.remote.host` records the hostname of the looked-up URL when it parses. `activitypub.object.lookup` @@ -594,7 +594,7 @@ Fedify records the following OpenTelemetry metrics: `kind=other`. `activitypub.remote.host` is the hostname extracted from the - identifier — a parsed `URL`, an `acct:user@host` URI, or a bare + identifier: a parsed `URL`, an `acct:user@host` URI, or a bare `@user@host` / `user@host` handle. Inputs that do not reduce cleanly to an authority (paths, query strings, fragments, or whitespace mixed in with the handle suffix) result in the @@ -693,14 +693,15 @@ for example `https://www.w3.org/ns/activitystreams#Create`. The key lookup, document fetch, document cache, and object lookup metrics share an `activitypub.lookup.kind` and (where applicable) `activitypub.lookup.result` attribute taxonomy. Both are drawn from small -fixed enumerations — `kind` ∈ `{public_key, actor, object, context, other}` -and `result` ∈ `{hit, miss, fetched, not_found, invalid, network_error, error}` -— so an attacker-controlled remote cannot inflate cardinality by returning -arbitrary status codes, content types, or thrown exceptions. Full URLs, key -IDs, actor IDs, object IDs, JSON-LD context URLs, and fediverse handles are -deliberately excluded; they remain on the corresponding spans -(`activitypub.fetch_key`, `activitypub.fetch_document`, -`activitypub.lookup_object`) for trace-level investigation. +fixed enumerations (`kind` ∈ `{public_key, actor, object, context, other}` +and `result` ∈ +`{hit, miss, fetched, not_found, invalid, network_error, error}`), so an +attacker-controlled remote cannot inflate cardinality by returning arbitrary +status codes, content types, or thrown exceptions. Full URLs, key IDs, actor +IDs, object IDs, JSON-LD context URLs, and fediverse handles are deliberately +excluded; they remain on the corresponding spans (`activitypub.fetch_key`, +`activitypub.fetch_document`, `activitypub.lookup_object`) for trace-level +investigation. The HTTP server request metrics deliberately exclude high-cardinality fields such as the full URL, raw path, query string, actor identifier, and inbox From 0d5e2c5809ee21f50985114b5b866f22e04f7f1a Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Mon, 18 May 2026 22:23:58 +0900 Subject: [PATCH 13/16] Honor factoryOpts in default auth loader factory The built-in default `authenticatedDocumentLoaderFactory` previously ignored its second `factoryOpts: DocumentLoaderFactoryOptions` argument and always built the authenticated loader with the constructor-level `allowPrivateAddress` / `userAgent`. That was inconsistent with the regular `documentLoaderFactory` and `contextLoaderFactory` defaults a few lines up, which already use `opts?.allowPrivateAddress ?? allowPrivateAddress` and `opts?.userAgent ?? userAgent` to honor per-call overrides. Mirror that pattern in the authenticated default so the constructor defaults remain the fallback but per-call overrides now win. The user-supplied auth factory branch already forwards `factoryOpts` directly via `userAuthFactory(identity, factoryOpts)` (added earlier in this PR), so both branches now behave consistently. https://github.com/fedify-dev/fedify/pull/771#discussion_r3259084935 Assisted-by: Claude Code:claude-opus-4-7 --- packages/fedify/src/federation/middleware.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/fedify/src/federation/middleware.ts b/packages/fedify/src/federation/middleware.ts index 1fe2d62ad..cf12b134d 100644 --- a/packages/fedify/src/federation/middleware.ts +++ b/packages/fedify/src/federation/middleware.ts @@ -441,9 +441,14 @@ export class FederationImpl // per-call `DocumentLoaderFactoryOptions` (allowPrivateAddress, // userAgent) just like they did before this wrapper was added. ? userAuthFactory(identity, factoryOpts) + // The built-in default honors per-call `factoryOpts` overrides + // the same way `documentLoaderFactory` / `contextLoaderFactory` + // do above, falling back to the constructor-level settings when + // the caller did not supply an override. : getAuthenticatedDocumentLoader(identity, { - allowPrivateAddress, - userAgent, + allowPrivateAddress: factoryOpts?.allowPrivateAddress ?? + allowPrivateAddress, + userAgent: factoryOpts?.userAgent ?? userAgent, specDeterminer: new KvSpecDeterminer( this.kv, this.kvPrefixes.httpMessageSignaturesSpec, From c825ad962b7ac3b8c892d9498dbc1f3e238b8e87 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Wed, 20 May 2026 01:13:32 +0900 Subject: [PATCH 14/16] Classify object.lookup kind before span events The `activitypub.object.lookup` measurement's `kind` was assigned after `lookupObjectInternal` returned, but only past several lines that can throw, including `result.toJsonLd(options)` inside the `span.addEvent("activitypub.object.fetched", ...)` payload. If any of those threw, `kind` stayed at its initial `"other"` value, so the `finally` block recorded `kind=other` even when `lookupObjectInternal` had returned a non-null Actor or Object. Classify the result with `kind = isActor(result) ? "actor" : "object"` immediately after `lookupObjectInternal` returns (before the span attributes and span event work). A thrown error past that point now still records the correct `kind` value on the metric. https://github.com/fedify-dev/fedify/pull/771#discussion_r3267739396 Assisted-by: Claude Code:claude-opus-4-7 --- packages/vocab/src/lookup.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/vocab/src/lookup.ts b/packages/vocab/src/lookup.ts index 502d059b0..da9159c42 100644 --- a/packages/vocab/src/lookup.ts +++ b/packages/vocab/src/lookup.ts @@ -212,6 +212,13 @@ export async function lookupObject( let kind: ObjectLookupKind = "other"; try { const result = await lookupObjectInternal(identifier, options); + // Classify the result as soon as `lookupObjectInternal` returns, + // so that any subsequent throw (for example from + // `result.toJsonLd(options)` while building the span event) does + // not roll `kind` back to `"other"` in the `finally` block. + if (result != null) { + kind = isActor(result) ? "actor" : "object"; + } if (result == null) span.setStatus({ code: SpanStatusCode.ERROR }); else { if (result.id != null) { @@ -232,8 +239,6 @@ export async function lookupObject( await result.toJsonLd(options), ), }); - - kind = isActor(result) ? "actor" : "object"; } return result; } catch (error) { From 22e3557122a0b6367cdd862026645c73f2fb0831 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Wed, 20 May 2026 01:14:08 +0900 Subject: [PATCH 15/16] Fix grammar of fetchKey meter-omission test name The test was named "fetchKey() omits the meter provider has no effect on behavior", which reads as if the subject is "fetchKey() omits the meter provider" and the verb is "has no effect" (it is not a complete clause). Rename it to "fetchKey() works when meterProvider is omitted", which matches the test's actual purpose: a sanity check that omitting the new optional `meterProvider` keeps `fetchKey()` functional. No test logic changed. https://github.com/fedify-dev/fedify/pull/771#discussion_r3267606398 Assisted-by: Claude Code:claude-opus-4-7 --- packages/fedify/src/sig/key.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/fedify/src/sig/key.test.ts b/packages/fedify/src/sig/key.test.ts index b759c21c4..7d865324f 100644 --- a/packages/fedify/src/sig/key.test.ts +++ b/packages/fedify/src/sig/key.test.ts @@ -603,7 +603,7 @@ test("fetchKeyDetailed() records activitypub.key.lookup with the same taxonomy", assertEquals(counter?.attributes["http.response.status_code"], 410); }); -test("fetchKey() omits the meter provider has no effect on behavior", async () => { +test("fetchKey() works when meterProvider is omitted", async () => { // Sanity: omitting meterProvider keeps fetchKey functional. const result = await fetchKey( "https://example.com/key", From 389552e441cc369f5bcd1c72535ff1bc859910f9 Mon Sep 17 00:00:00 2001 From: Hong Minhee Date: Wed, 20 May 2026 01:54:54 +0900 Subject: [PATCH 16/16] Narrow DocumentFetchResult to drop unused invalid `DocumentFetchResult` was previously defined as `Exclude`, which still left `"invalid"` in the type union. But the document-loader instrumentation never produces `"invalid"`: the success path always emits `"fetched"`, and `classifyFetchError()` only returns `"not_found"`, `"network_error"`, or `"error"`. `"invalid"` is reserved for the key-lookup family, where the parser can decide that a successful HTTP response still does not contain a usable key, which the existing `activitypub.document.fetch` docs already call out. Narrow `DocumentFetchResult` to `Exclude` so the unreachable variant becomes a compile-time error if any future code path tries to record `result: "invalid"` on the document-fetch family. JSDoc updated to mention the third exclusion and why. No runtime behavior changes; this is a type-safety tightening. https://github.com/fedify-dev/fedify/pull/771#discussion_r3267877406 Assisted-by: Claude Code:claude-opus-4-7 --- packages/fedify/src/federation/metrics.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/fedify/src/federation/metrics.ts b/packages/fedify/src/federation/metrics.ts index 75a16b4d5..448c6d5fa 100644 --- a/packages/fedify/src/federation/metrics.ts +++ b/packages/fedify/src/federation/metrics.ts @@ -255,10 +255,16 @@ export type DocumentFetchKind = Exclude; * The {@link LookupResult} values that can appear on the * `activitypub.document.fetch` and `activitypub.document.fetch.duration` * metrics. Cache `hit` / `miss` outcomes are reported on - * `activitypub.document.cache`, so they are excluded here. + * `activitypub.document.cache`, and `invalid` is reserved for the + * key-lookup metrics (where the parser can decide that a successful + * HTTP response still does not contain a usable key), so all three are + * excluded here. * @since 2.3.0 */ -export type DocumentFetchResult = Exclude; +export type DocumentFetchResult = Exclude< + LookupResult, + "hit" | "miss" | "invalid" +>; /** * The {@link LookupResult} values that can appear on the