diff --git a/CHANGES.md b/CHANGES.md index a0fb2cad7..7ddf9d76e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -113,11 +113,50 @@ To be released. and other high-cardinality identifiers are deliberately excluded from the fanout histogram. [[#316], [#742], [#770]] + - Added OpenTelemetry metrics for public key lookups, remote JSON-LD + document fetches, and `lookupObject()` calls so operators can + observe how often Fedify hits the cache, how long remote fetches + take, and how `lookupObject()` resolutions split between actors, + non-actor objects, and unresolved lookups: + + - `activitypub.key.lookup` (counter) and + `activitypub.key.lookup.duration` (histogram) cover every + public key lookup performed by `fetchKey()` / + `fetchKeyDetailed()`, including signature verification paths. + - `activitypub.document.fetch` (counter) and + `activitypub.document.fetch.duration` (histogram) cover every + Fedify-wrapped document or context loader invocation, including + the authenticated loader. + - `activitypub.document.cache` (counter) records `hit` or `miss` + for each `kvCache()`-backed cache lookup. + - `activitypub.object.lookup` (counter) records the + parsed-result classification of every `lookupObject()` call as + `actor`, `object`, or `other`. + + Instruments share an `activitypub.lookup.kind` and (where + applicable) `activitypub.lookup.result` attribute drawn from small, + spec-bounded enumerations. `activitypub.remote.host` records the + URL hostname only; `http.response.status_code` is recorded when an + HTTP response was observed; `activitypub.cache.enabled` is + recorded on the key and document fetch metrics whenever Fedify can + confidently report the cache layer's presence. Key IDs, actor + IDs, object IDs, JSON-LD context URLs, full URLs, and fediverse + handles are deliberately excluded so attacker-controlled remotes + cannot inflate metric cardinality. The existing + `activitypub.signature.key_fetch.duration` histogram (introduced in + Fedify 2.3 for signature-scoped key-fetch latency, sliced by + `activitypub.signature.kind`) remains in place; the new + `activitypub.key.lookup.duration` is the general-purpose + histogram that covers non-signature key fetches as well and adds + `http.response.status_code` and a richer + `activitypub.lookup.result` taxonomy. [[#316], [#738], [#771]] + [#316]: https://github.com/fedify-dev/fedify/issues/316 [#619]: https://github.com/fedify-dev/fedify/issues/619 [#735]: https://github.com/fedify-dev/fedify/issues/735 [#736]: https://github.com/fedify-dev/fedify/issues/736 [#737]: https://github.com/fedify-dev/fedify/issues/737 +[#738]: https://github.com/fedify-dev/fedify/issues/738 [#740]: https://github.com/fedify-dev/fedify/issues/740 [#742]: https://github.com/fedify-dev/fedify/issues/742 [#748]: https://github.com/fedify-dev/fedify/pull/748 @@ -128,6 +167,7 @@ To be released. [#759]: https://github.com/fedify-dev/fedify/pull/759 [#769]: https://github.com/fedify-dev/fedify/pull/769 [#770]: https://github.com/fedify-dev/fedify/pull/770 +[#771]: https://github.com/fedify-dev/fedify/pull/771 ### @fedify/fixture diff --git a/docs/manual/opentelemetry.md b/docs/manual/opentelemetry.md index fe38b3d78..ed66d95bc 100644 --- a/docs/manual/opentelemetry.md +++ b/docs/manual/opentelemetry.md @@ -173,6 +173,26 @@ const federation = createFederation({ }); ~~~~ +> [!NOTE] +> The document and context loader metrics +> (`activitypub.document.fetch[.duration]` and +> `activitypub.document.cache`) are opt-in inside Fedify: they are +> emitted only when `meterProvider` is explicitly configured on +> `createFederation()`. Omitting it preserves strict reference identity +> for `Context.documentLoader`, `Context.contextLoader`, and the +> authenticated document loader (`ctx.documentLoader === userLoader`), +> so existing test code that asserts identity on a user-supplied +> factory's output continues to work. The other metrics (delivery, +> inbox, outbox, fanout, queue, HTTP server, signature verification, +> signature key fetch, public key lookup, and `lookupObject` actor +> classification) follow the standard “fall back to the global +> [`MeterProvider`]” behavior described above. Calling +> `lookupObject()` directly from `@fedify/vocab` (without going through +> a `Context`) still requires an explicit +> `LookupObjectOptions.meterProvider` to emit +> `activitypub.object.lookup`; `Context.lookupObject()` threads the +> Federation's meter provider through automatically. + [`MeterProvider`]: https://open-telemetry.github.io/opentelemetry-js/interfaces/_opentelemetry_api._opentelemetry_api.MeterProvider.html @@ -308,6 +328,12 @@ Fedify records the following OpenTelemetry metrics: | `activitypub.signature.verification_failure` | Counter | `{failure}` | Counts failed signature verification for inbox requests. | | `activitypub.signature.verification.duration` | Histogram | `ms` | Measures signature verification duration across HTTP, Linked Data, and Object Integrity Proofs. | | `activitypub.signature.key_fetch.duration` | Histogram | `ms` | Measures public key lookup duration during signature verification. | +| `activitypub.key.lookup` | Counter | `{lookup}` | Counts public key lookups performed by `fetchKey()` / `fetchKeyDetailed()`. | +| `activitypub.key.lookup.duration` | Histogram | `ms` | Measures public key lookup duration, including cache hits and remote fetches. | +| `activitypub.document.fetch` | Counter | `{fetch}` | Counts remote JSON-LD document loader invocations made by Fedify-wrapped loaders. | +| `activitypub.document.fetch.duration` | Histogram | `ms` | Measures remote JSON-LD document loader invocation duration. | +| `activitypub.document.cache` | Counter | `{lookup}` | Counts KV-backed document loader cache lookups, classified as `hit` or `miss`. | +| `activitypub.object.lookup` | Counter | `{lookup}` | Counts `lookupObject()` calls, classified by whether the resolved value is an Actor. | | `fedify.http.server.request.count` | Counter | `{request}` | Counts inbound HTTP requests handled by `Federation.fetch()`. | | `fedify.http.server.request.duration` | Histogram | `ms` | Measures inbound HTTP request duration in `Federation.fetch()`. | | `fedify.queue.task.enqueued` | Counter | `{task}` | Counts inbox, outbox, and fanout tasks Fedify enqueued. | @@ -473,6 +499,113 @@ Fedify records the following OpenTelemetry metrics: for the stale attempt and one `fetched` for the freshly fetched retry) alongside the single verification measurement that covers both. +`activitypub.key.lookup` and `activitypub.key.lookup.duration` +: `activitypub.lookup.kind` is always `public_key` on these metrics; the + enumeration also covers `actor`, `object`, `context`, and `other` for + the document-fetch and lookup-object families described below. + `activitypub.lookup.result` is always present and is one of: + + - `hit`: the key was served from the configured `KeyCache`, either + a valid cached key or a cached negative entry recording a prior + failed fetch. + - `fetched`: the key was not in the cache and was loaded through + the document loader, returning a usable key. + - `not_found`: the remote responded with `404 Not Found` or + `410 Gone`. Recorded together with `http.response.status_code`. + - `invalid`: the remote responded with a payload Fedify could not + parse into a `CryptographicKey` or `Multikey`. + - `network_error`: no HTTP response was received. DNS, connect, + TLS, redirect-loop, or aborted-fetch failures all fall into this + bucket via the shared error classifier. + - `error`: any other unexpected failure (non-2xx HTTP response that + is neither `404` nor `410`, thrown exceptions that are not + recognised as transport failures, etc.). + + `activitypub.cache.enabled` is always present and is `true` when the + caller passed a `KeyCache`, `false` otherwise. `activitypub.remote.host` + is the hostname of the key URL. `http.response.status_code` is + present only when an HTTP response was observed. Key IDs, full key + URLs, and actor IDs are deliberately excluded from these metrics; + they remain on the `activitypub.fetch_key` span for trace-level + investigation. + + These metrics complement + [`activitypub.signature.key_fetch.duration`](#instrumented-metrics). + The signature-scoped histogram keeps an `activitypub.signature.kind` + dimension and is the right metric to slice signature verification + latency by `http` / `linked_data` / `object_integrity`; the new + `activitypub.key.lookup*` metrics cover *every* key lookup performed + by Fedify (including non-signature uses such as direct `fetchKey()` + calls) and add a bounded HTTP `status_code` and richer + `lookup.result` taxonomy. + +`activitypub.document.fetch` and `activitypub.document.fetch.duration` +: `activitypub.lookup.kind` is always present and is one of `object` + (Fedify's generic document loader), `context` (the JSON-LD context + loader), or `other` (callers that supply a custom kind hint). + Actor documents fetched through the generic loader are still + classified as `object` at this layer because the kind is decided at + the loader boundary, *before* the response is parsed; the + [`activitypub.object.lookup`](#instrumented-metrics) counter + provides the parsed-result actor / object split. + + `activitypub.lookup.result` is always present and is one of + `fetched`, `not_found` (with `http.response.status_code`), + `network_error`, or `error`. The shared error classifier only + surfaces these four values at the loader boundary; `invalid` is + reserved for the key lookup metrics, where the parser can decide + that a successful HTTP response still does not contain a usable + key. `activitypub.remote.host` records the hostname of the + fetched URL when the URL parses; otherwise it is omitted. + `activitypub.cache.enabled` is `true` for Fedify's built-in + `kvCache()`-backed document and context loaders and `false` for the + authenticated document loader; for user-supplied factories Fedify + cannot introspect caching behavior, so the attribute is omitted + rather than recorded as a confident `true` or `false`. + + Counter and histogram are always emitted together for one wrapped + loader call, so dashboards can compute average duration as + `duration_sum / counter`. Document IDs, JSON-LD context URLs, and + full request URLs are deliberately excluded; the + `activitypub.fetch_document` span keeps the full URL for sampled + traces. + +`activitypub.document.cache` +: `activitypub.lookup.kind` is always present (same values as + `activitypub.document.fetch`). `activitypub.lookup.result` is + `hit` when the KV cache returned a `RemoteDocument` and `miss` + when it did not. Cache lookups that bypass the KV cache entirely + (preloaded JSON-LD contexts and call sites without a matching cache + rule) emit no measurement. `activitypub.remote.host` records the + hostname of the looked-up URL when it parses. + +`activitypub.object.lookup` +: `activitypub.lookup.kind` is always present and is one of: + + - `actor`: `lookupObject()` resolved to an `Actor` subtype + (`Application`, `Group`, `Organization`, `Person`, `Service`). + - `object`: `lookupObject()` resolved to a non-actor + `Object` subtype. + - `other`: `lookupObject()` returned `null` (the document could + not be fetched, the response could not be parsed, or the + cross-origin check rejected the resolved object) **or** the + call threw before resolving an object. The metric is emitted + in a `finally` block, so a thrown error is still counted with + `kind=other`. + + `activitypub.remote.host` is the hostname extracted from the + identifier: a parsed `URL`, an `acct:user@host` URI, or a bare + `@user@host` / `user@host` handle. Inputs that do not reduce + cleanly to an authority (paths, query strings, fragments, or + whitespace mixed in with the handle suffix) result in the + attribute being omitted, rather than recording a high-cardinality + value. This counter has no companion histogram: `lookupObject()` + drives `activitypub.document.fetch.duration` through the document + loader, and emitting another duration here would double-count + latency. Use `activitypub.object.lookup` for the parsed-result + classification and `activitypub.document.fetch[.duration]` for + the loader-level rate and latency. + `fedify.http.server.request.count` and `fedify.http.server.request.duration` : `http.request.method` and `fedify.endpoint` are always present. `http.request.method` is normalized to one of the standard HTTP methods @@ -557,6 +690,19 @@ and query strings are deliberately excluded to keep metric cardinality bounded. Activity types use the same qualified URI form as Fedify's trace attributes, for example `https://www.w3.org/ns/activitystreams#Create`. +The key lookup, document fetch, document cache, and object lookup metrics +share an `activitypub.lookup.kind` and (where applicable) +`activitypub.lookup.result` attribute taxonomy. Both are drawn from small +fixed enumerations (`kind` ∈ `{public_key, actor, object, context, other}` +and `result` ∈ +`{hit, miss, fetched, not_found, invalid, network_error, error}`), so an +attacker-controlled remote cannot inflate cardinality by returning arbitrary +status codes, content types, or thrown exceptions. Full URLs, key IDs, actor +IDs, object IDs, JSON-LD context URLs, and fediverse handles are deliberately +excluded; they remain on the corresponding spans (`activitypub.fetch_key`, +`activitypub.fetch_document`, `activitypub.lookup_object`) for trace-level +investigation. + The HTTP server request metrics deliberately exclude high-cardinality fields such as the full URL, raw path, query string, actor identifier, and inbox URL. Use the request span's `url.full` attribute when you need the exact URL diff --git a/packages/fedify/src/federation/metrics.test.ts b/packages/fedify/src/federation/metrics.test.ts index 61df566ca..e98675209 100644 --- a/packages/fedify/src/federation/metrics.test.ts +++ b/packages/fedify/src/federation/metrics.test.ts @@ -1,9 +1,16 @@ import { createTestMeterProvider, test } from "@fedify/fixture"; -import { assertEquals } from "@std/assert"; +import { assertEquals, assertRejects } from "@std/assert"; +import type { DocumentLoader, RemoteDocument } from "@fedify/vocab-runtime"; +import { FetchError } from "@fedify/vocab-runtime"; import type { MessageQueue } from "./mq.ts"; import { + classifyFetchError, + instrumentDocumentLoader, + recordDocumentCache, + recordDocumentFetch, recordFanoutRecipients, recordInboxActivity, + recordKeyLookup, recordOutboxActivity, recordOutboxEnqueue, } from "./metrics.ts"; @@ -153,3 +160,358 @@ test("recordOutboxActivity() records counter with result and activity type", () ["queued", "retried", "abandoned"], ); }); + +test("recordKeyLookup() records counter and duration with all attributes", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordKeyLookup(meterProvider, { + durationMs: 42, + result: "fetched", + remoteUrl: new URL("https://example.com/users/alice#main-key"), + cacheEnabled: true, + statusCode: 200, + }); + + const counters = recorder.getMeasurements("activitypub.key.lookup"); + assertEquals(counters.length, 1); + assertEquals(counters[0].type, "counter"); + assertEquals(counters[0].value, 1); + assertEquals( + counters[0].attributes["activitypub.lookup.kind"], + "public_key", + ); + assertEquals( + counters[0].attributes["activitypub.lookup.result"], + "fetched", + ); + assertEquals( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + assertEquals(counters[0].attributes["activitypub.cache.enabled"], true); + assertEquals(counters[0].attributes["http.response.status_code"], 200); + + const durations = recorder.getMeasurements( + "activitypub.key.lookup.duration", + ); + assertEquals(durations.length, 1); + assertEquals(durations[0].type, "histogram"); + assertEquals(durations[0].value, 42); + assertEquals( + durations[0].attributes["activitypub.lookup.kind"], + "public_key", + ); + assertEquals( + durations[0].attributes["activitypub.lookup.result"], + "fetched", + ); + assertEquals( + durations[0].attributes["activitypub.remote.host"], + "example.com", + ); + assertEquals(durations[0].attributes["activitypub.cache.enabled"], true); + assertEquals(durations[0].attributes["http.response.status_code"], 200); +}); + +test("recordKeyLookup() omits optional attributes when not provided", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordKeyLookup(meterProvider, { + durationMs: 0, + result: "error", + cacheEnabled: false, + }); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "error"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals("activitypub.remote.host" in counter!.attributes, false); + assertEquals("http.response.status_code" in counter!.attributes, false); + + const duration = recorder.getMeasurement("activitypub.key.lookup.duration"); + assertEquals("activitypub.remote.host" in duration!.attributes, false); + assertEquals("http.response.status_code" in duration!.attributes, false); +}); + +test("recordDocumentFetch() records counter and duration with all attributes", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordDocumentFetch(meterProvider, { + durationMs: 123, + kind: "object", + result: "not_found", + remoteUrl: new URL("https://remote.test/objects/123"), + cacheEnabled: true, + statusCode: 404, + }); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.type, "counter"); + assertEquals(counter?.value, 1); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "object"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["activitypub.remote.host"], "remote.test"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], true); + assertEquals(counter?.attributes["http.response.status_code"], 404); + + const duration = recorder.getMeasurement( + "activitypub.document.fetch.duration", + ); + assertEquals(duration?.type, "histogram"); + assertEquals(duration?.value, 123); + assertEquals(duration?.attributes["activitypub.lookup.kind"], "object"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "not_found"); +}); + +test("recordDocumentFetch() omits optional attributes when not provided", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordDocumentFetch(meterProvider, { + durationMs: 5, + kind: "context", + result: "fetched", + }); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "context"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals("activitypub.remote.host" in counter!.attributes, false); + assertEquals("activitypub.cache.enabled" in counter!.attributes, false); + assertEquals("http.response.status_code" in counter!.attributes, false); + + const duration = recorder.getMeasurement( + "activitypub.document.fetch.duration", + ); + assertEquals(duration?.value, 5); +}); + +test("recordDocumentCache() records hit and miss as a counter", () => { + const [meterProvider, recorder] = createTestMeterProvider(); + recordDocumentCache(meterProvider, { + kind: "object", + result: "hit", + remoteUrl: new URL("https://remote.test/objects/1"), + }); + recordDocumentCache(meterProvider, { + kind: "context", + result: "miss", + remoteUrl: new URL("https://w3id.org/security/v1"), + }); + + const measurements = recorder.getMeasurements("activitypub.document.cache"); + assertEquals(measurements.length, 2); + for (const m of measurements) { + assertEquals(m.type, "counter"); + assertEquals(m.value, 1); + } + assertEquals(measurements[0].attributes["activitypub.lookup.kind"], "object"); + assertEquals(measurements[0].attributes["activitypub.lookup.result"], "hit"); + assertEquals( + measurements[0].attributes["activitypub.remote.host"], + "remote.test", + ); + assertEquals( + measurements[1].attributes["activitypub.lookup.kind"], + "context", + ); + assertEquals(measurements[1].attributes["activitypub.lookup.result"], "miss"); + assertEquals( + measurements[1].attributes["activitypub.remote.host"], + "w3id.org", + ); +}); + +test("classifyFetchError() classifies FetchError with 404 as not_found", () => { + const response = new Response("", { status: 404 }); + const error = new FetchError( + "https://example.com/k", + "not found", + response, + ); + assertEquals(classifyFetchError(error), { + result: "not_found", + statusCode: 404, + }); +}); + +test("classifyFetchError() classifies FetchError with 410 as not_found", () => { + const response = new Response("", { status: 410 }); + const error = new FetchError( + "https://example.com/k", + "gone", + response, + ); + assertEquals(classifyFetchError(error), { + result: "not_found", + statusCode: 410, + }); +}); + +test("classifyFetchError() classifies FetchError with 500 as error", () => { + const response = new Response("", { status: 500 }); + const error = new FetchError( + "https://example.com/k", + "server error", + response, + ); + assertEquals(classifyFetchError(error), { + result: "error", + statusCode: 500, + }); +}); + +test("classifyFetchError() classifies FetchError without response as network_error", () => { + const error = new FetchError("https://example.com/k", "boom"); + assertEquals(classifyFetchError(error), { result: "network_error" }); +}); + +test("classifyFetchError() classifies a bare TypeError as network_error", () => { + assertEquals(classifyFetchError(new TypeError("connect failed")), { + result: "network_error", + }); +}); + +test("classifyFetchError() classifies an AbortError as network_error", () => { + const abort = new Error("aborted"); + abort.name = "AbortError"; + assertEquals(classifyFetchError(abort), { result: "network_error" }); +}); + +test("classifyFetchError() classifies any other thrown value as error", () => { + assertEquals(classifyFetchError(new Error("nope")), { result: "error" }); + assertEquals(classifyFetchError("string error"), { result: "error" }); + assertEquals(classifyFetchError(undefined), { result: "error" }); +}); + +test("instrumentDocumentLoader() records fetched on success", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = (url) => + Promise.resolve( + { + contextUrl: null, + documentUrl: url, + document: { ok: true }, + } satisfies RemoteDocument, + ); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "object", + cacheEnabled: true, + }); + + const result = await wrapped("https://example.com/o"); + assertEquals(result.document, { ok: true }); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "object"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals(counter?.attributes["activitypub.remote.host"], "example.com"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], true); + assertEquals("http.response.status_code" in counter!.attributes, false); + + const duration = recorder.getMeasurement( + "activitypub.document.fetch.duration", + ); + assertEquals(duration?.type, "histogram"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "fetched"); +}); + +test("instrumentDocumentLoader() records not_found on FetchError 404", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = (url) => + Promise.reject( + new FetchError( + url, + "HTTP 404", + new Response("", { status: 404 }), + ), + ); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "context", + cacheEnabled: false, + }); + + await assertRejects( + () => wrapped("https://example.com/missing"), + FetchError, + ); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "context"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["activitypub.remote.host"], "example.com"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals(counter?.attributes["http.response.status_code"], 404); +}); + +test("instrumentDocumentLoader() records network_error on TypeError", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = () => + Promise.reject(new TypeError("fetch failed")); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "object", + }); + + await assertRejects( + () => wrapped("https://example.com/o"), + TypeError, + ); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals( + counter?.attributes["activitypub.lookup.result"], + "network_error", + ); + assertEquals("activitypub.cache.enabled" in counter!.attributes, false); +}); + +test("instrumentDocumentLoader() returns inner loader unchanged when meterProvider is omitted", async () => { + const [, recorder] = createTestMeterProvider(); + let callCount = 0; + const inner: DocumentLoader = (url) => { + callCount++; + return Promise.resolve( + { + contextUrl: null, + documentUrl: url, + document: { ok: true }, + } satisfies RemoteDocument, + ); + }; + const wrapped = instrumentDocumentLoader(inner, { kind: "object" }); + + // No-instrumentation short-circuit returns the original function reference. + assertEquals(wrapped, inner); + + await wrapped("https://example.com/o"); + assertEquals(callCount, 1); + assertEquals( + recorder.getMeasurements("activitypub.document.fetch").length, + 0, + ); + assertEquals( + recorder.getMeasurements("activitypub.document.fetch.duration").length, + 0, + ); +}); + +test("instrumentDocumentLoader() omits remote.host when URL is unparseable", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const inner: DocumentLoader = (url) => + Promise.resolve( + { + contextUrl: null, + documentUrl: url, + document: {}, + } satisfies RemoteDocument, + ); + const wrapped = instrumentDocumentLoader(inner, { + meterProvider, + kind: "other", + }); + + await wrapped("not a url"); + + const counter = recorder.getMeasurement("activitypub.document.fetch"); + assertEquals(counter?.attributes["activitypub.lookup.kind"], "other"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals("activitypub.remote.host" in counter!.attributes, false); +}); diff --git a/packages/fedify/src/federation/metrics.ts b/packages/fedify/src/federation/metrics.ts index 49a0ceddb..448c6d5fa 100644 --- a/packages/fedify/src/federation/metrics.ts +++ b/packages/fedify/src/federation/metrics.ts @@ -1,3 +1,5 @@ +import type { DocumentLoader } from "@fedify/vocab-runtime"; +import { FetchError } from "@fedify/vocab-runtime"; import { type Attributes, type Counter, @@ -186,6 +188,140 @@ export type LinkedDataSignatureMetricType = "RsaSignature2017"; */ export type ObjectIntegrityProofMetricCryptosuite = "eddsa-jcs-2022"; +/** + * The kind of remote ActivityPub lookup, recorded as + * `activitypub.lookup.kind` on the public-key lookup and remote document + * fetch metric families. + * + * - `public_key`: a public key lookup performed by `fetchKey` / + * `fetchKeyDetailed` (always recorded on `activitypub.key.lookup*`). + * - `actor`: a document fetch whose resolved value is an Actor. The + * bucket exists in the taxonomy for future actor-aware call sites; + * today, actor documents fetched through Fedify's generic document + * loader are still classified as `object` because the kind is decided + * at the loader boundary, before the response is parsed. + * - `object`: a generic ActivityPub object fetch through Fedify's + * document loader. This is the default classification for + * `documentLoader` invocations that do not match a more specific + * bucket. + * - `context`: a JSON-LD `@context` document fetch through Fedify's + * context loader. + * - `other`: a fetch that does not fit any of the above classifications. + * @since 2.3.0 + */ +export type LookupKind = + | "public_key" + | "actor" + | "object" + | "context" + | "other"; + +/** + * The terminal classification of a public-key lookup or remote document + * fetch, recorded as `activitypub.lookup.result` on the lookup metric + * families. + * + * - `hit`: served from a cache without going to the network. + * - `miss`: a cache was consulted and returned no entry; only used on + * `activitypub.document.cache`. + * - `fetched`: the remote document or key was loaded successfully. + * - `not_found`: the remote responded with HTTP `404 Not Found` or + * `410 Gone`, or otherwise reported the resource is absent. + * - `invalid`: the remote responded with content Fedify could not parse + * into the expected shape. + * - `network_error`: no HTTP response was received (DNS failure, connect + * timeout, abort, redirect loop, etc.). + * - `error`: any other unexpected failure. + * @since 2.3.0 + */ +export type LookupResult = + | "hit" + | "miss" + | "fetched" + | "not_found" + | "invalid" + | "network_error" + | "error"; + +/** + * The {@link LookupKind} values that can appear on remote document fetch + * metrics. `public_key` lookups are reported on the + * `activitypub.key.lookup` metric family instead, so it is excluded here. + * @since 2.3.0 + */ +export type DocumentFetchKind = Exclude; + +/** + * The {@link LookupResult} values that can appear on the + * `activitypub.document.fetch` and `activitypub.document.fetch.duration` + * metrics. Cache `hit` / `miss` outcomes are reported on + * `activitypub.document.cache`, and `invalid` is reserved for the + * key-lookup metrics (where the parser can decide that a successful + * HTTP response still does not contain a usable key), so all three are + * excluded here. + * @since 2.3.0 + */ +export type DocumentFetchResult = Exclude< + LookupResult, + "hit" | "miss" | "invalid" +>; + +/** + * The {@link LookupResult} values that can appear on the + * `activitypub.key.lookup` and `activitypub.key.lookup.duration` metrics. + * `miss` is a cache-internal classification that surfaces on + * `activitypub.document.cache` only and is not a terminal key lookup + * outcome, so it is excluded here. + * @since 2.3.0 + */ +export type KeyLookupResult = Exclude; + +/** + * Attributes accepted by {@link recordKeyLookup}. `remoteUrl` is taken as + * a `URL` so that the helper can derive the hostname-only + * `activitypub.remote.host` attribute internally and refuse to record + * high-cardinality values such as full key IDs or actor URLs. + * @since 2.3.0 + */ +export interface KeyLookupAttributes { + /** The terminal lookup result. */ + result: KeyLookupResult; + /** Elapsed lookup duration in milliseconds. */ + durationMs: number; + /** URL of the key, used to derive `activitypub.remote.host`. */ + remoteUrl?: URL; + /** Whether the lookup path had a `KeyCache` configured. */ + cacheEnabled: boolean; + /** The HTTP response status code, when an HTTP response was received. */ + statusCode?: number; +} + +/** + * Attributes accepted by {@link recordDocumentFetch}. + * @since 2.3.0 + */ +export interface DocumentFetchAttributes { + kind: DocumentFetchKind; + result: DocumentFetchResult; + /** URL of the fetched document, used to derive `activitypub.remote.host`. */ + remoteUrl?: URL; + /** Elapsed fetch duration in milliseconds. */ + durationMs: number; + cacheEnabled?: boolean; + statusCode?: number; +} + +/** + * Attributes accepted by {@link recordDocumentCache}. + * @since 2.3.0 + */ +export interface DocumentCacheAttributes { + kind: DocumentFetchKind; + result: "hit" | "miss"; + /** URL of the looked-up document, used to derive `activitypub.remote.host`. */ + remoteUrl?: URL; +} + /** * Optional attributes recorded alongside an * `activitypub.signature.verification.duration` measurement. Each field is @@ -228,6 +364,11 @@ class FederationMetrics { readonly fanoutRecipients: Histogram; readonly inboxActivity: Counter; readonly outboxActivity: Counter; + readonly keyLookup: Counter; + readonly keyLookupDuration: Histogram; + readonly documentFetch: Counter; + readonly documentFetchDuration: Histogram; + readonly documentCache: Counter; constructor(meterProvider: MeterProvider) { const meter = meterProvider.getMeter(metadata.name, metadata.version); @@ -392,6 +533,81 @@ class FederationMetrics { "live on `activitypub.delivery.*`.", unit: "{activity}", }); + this.keyLookup = meter.createCounter("activitypub.key.lookup", { + description: + "Public-key lookup attempts performed by Fedify, including both " + + "cache hits and remote fetches.", + unit: "{lookup}", + }); + this.keyLookupDuration = meter.createHistogram( + "activitypub.key.lookup.duration", + { + description: + "Duration of public-key lookups performed by Fedify, including " + + "any remote fetch.", + unit: "ms", + advice: { + // Reuse the OpenTelemetry HTTP server semantic-conventions buckets + // since key lookups span the same 5 ms to 10 s range that other + // network-bound histograms in this package already use. + explicitBucketBoundaries: [ + 5, + 10, + 25, + 50, + 75, + 100, + 250, + 500, + 750, + 1000, + 2500, + 5000, + 7500, + 10000, + ], + }, + }, + ); + this.documentFetch = meter.createCounter("activitypub.document.fetch", { + description: + "Remote JSON-LD document loader invocations made by Fedify-wrapped " + + "loaders.", + unit: "{fetch}", + }); + this.documentFetchDuration = meter.createHistogram( + "activitypub.document.fetch.duration", + { + description: + "Duration of remote JSON-LD document loader invocations made by " + + "Fedify-wrapped loaders.", + unit: "ms", + advice: { + explicitBucketBoundaries: [ + 5, + 10, + 25, + 50, + 75, + 100, + 250, + 500, + 750, + 1000, + 2500, + 5000, + 7500, + 10000, + ], + }, + }, + ); + this.documentCache = meter.createCounter("activitypub.document.cache", { + description: + "KV-backed document loader cache lookups, with `hit` or `miss` " + + "classification.", + unit: "{lookup}", + }); } recordDelivery( @@ -559,6 +775,51 @@ class FederationMetrics { buildActivityLifecycleAttributes(result, activityType), ); } + + recordKeyLookup(attrs: KeyLookupAttributes): void { + const attributes: Attributes = { + "activitypub.lookup.kind": "public_key", + "activitypub.lookup.result": attrs.result, + "activitypub.cache.enabled": attrs.cacheEnabled, + }; + if (attrs.remoteUrl != null) { + attributes["activitypub.remote.host"] = getRemoteHost(attrs.remoteUrl); + } + if (attrs.statusCode != null) { + attributes["http.response.status_code"] = attrs.statusCode; + } + this.keyLookup.add(1, attributes); + this.keyLookupDuration.record(attrs.durationMs, attributes); + } + + recordDocumentFetch(attrs: DocumentFetchAttributes): void { + const attributes: Attributes = { + "activitypub.lookup.kind": attrs.kind, + "activitypub.lookup.result": attrs.result, + }; + if (attrs.remoteUrl != null) { + attributes["activitypub.remote.host"] = getRemoteHost(attrs.remoteUrl); + } + if (attrs.cacheEnabled != null) { + attributes["activitypub.cache.enabled"] = attrs.cacheEnabled; + } + if (attrs.statusCode != null) { + attributes["http.response.status_code"] = attrs.statusCode; + } + this.documentFetch.add(1, attributes); + this.documentFetchDuration.record(attrs.durationMs, attributes); + } + + recordDocumentCache(attrs: DocumentCacheAttributes): void { + const attributes: Attributes = { + "activitypub.lookup.kind": attrs.kind, + "activitypub.lookup.result": attrs.result, + }; + if (attrs.remoteUrl != null) { + attributes["activitypub.remote.host"] = getRemoteHost(attrs.remoteUrl); + } + this.documentCache.add(1, attributes); + } } function buildActivityLifecycleAttributes( @@ -701,6 +962,174 @@ export function recordOutboxActivity( ); } +/** + * Records one measurement on `activitypub.key.lookup` (counter) and + * `activitypub.key.lookup.duration` (histogram) for a public-key lookup. + * + * `activitypub.lookup.kind` is always recorded as `public_key`; the result + * classification, remote host, HTTP status code (when an HTTP response was + * received), and `activitypub.cache.enabled` are recorded as attributes on + * both measurements. Full key URLs and key IDs are deliberately omitted to + * keep cardinality bounded. + * @since 2.3.0 + */ +export function recordKeyLookup( + meterProvider: MeterProvider | undefined, + attrs: KeyLookupAttributes, +): void { + getFederationMetrics(meterProvider).recordKeyLookup(attrs); +} + +/** + * Records one measurement each on `activitypub.document.fetch` (counter) + * and `activitypub.document.fetch.duration` (histogram) for one remote + * JSON-LD document loader invocation, with bounded + * `activitypub.lookup.kind` and `activitypub.lookup.result` attributes + * plus the optional remote-host, cache-enabled, and HTTP status-code + * attributes. Counter and histogram are always recorded together so + * aggregate rate and latency views stay in sync. + * @since 2.3.0 + */ +export function recordDocumentFetch( + meterProvider: MeterProvider | undefined, + attrs: DocumentFetchAttributes, +): void { + getFederationMetrics(meterProvider).recordDocumentFetch(attrs); +} + +/** + * Records one `activitypub.document.cache` measurement, classifying the + * lookup as `hit` (the cache returned an entry) or `miss` (the cache was + * consulted and returned nothing, prompting a delegate fetch). + * @since 2.3.0 + */ +export function recordDocumentCache( + meterProvider: MeterProvider | undefined, + attrs: DocumentCacheAttributes, +): void { + getFederationMetrics(meterProvider).recordDocumentCache(attrs); +} + +/** + * Classifies a thrown value from a key or document fetch into the bounded + * {@link LookupResult} taxonomy and, when an HTTP response was received, + * surfaces its status code. + * + * - `FetchError` with a `Response` whose status is `404` or `410`: + * `result=not_found` and the response status code. + * - `FetchError` with any other `Response`: `result=error` and the + * response status code. + * - `FetchError` without a `Response`: `result=network_error`. + * - An `AbortError` (typically from a cancelled fetch): `result=network_error`. + * - A bare `TypeError` (the shape native `fetch()` raises on DNS, connect, + * and TLS failures before any response is observed): + * `result=network_error`. + * - Any other value: `result=error`. + * @since 2.3.0 + */ +export function classifyFetchError( + error: unknown, +): { result: DocumentFetchResult; statusCode?: number } { + if (error instanceof FetchError) { + if (error.response != null) { + const status = error.response.status; + const result: DocumentFetchResult = status === 404 || status === 410 + ? "not_found" + : "error"; + return { result, statusCode: status }; + } + return { result: "network_error" }; + } + if (isAbortError(error)) return { result: "network_error" }; + if (error instanceof TypeError) return { result: "network_error" }; + return { result: "error" }; +} + +/** + * Options for {@link instrumentDocumentLoader}. + * @since 2.3.0 + */ +export interface InstrumentDocumentLoaderOptions { + /** + * The OpenTelemetry meter provider used to record + * `activitypub.document.fetch` and `activitypub.document.fetch.duration` + * measurements. When omitted, the wrapper records nothing and simply + * delegates to the wrapped loader. + */ + meterProvider?: MeterProvider; + + /** + * The lookup kind recorded on `activitypub.lookup.kind`. Set to + * `"object"` for the generic document loader, `"context"` for the + * context loader, and `"other"` for callers that do not fit the + * generic-object classification. + */ + kind: DocumentFetchKind; + + /** + * Whether the wrapped loader is cache-backed (for example via + * {@link import("../utils/kv-cache.ts").kvCache}). Recorded as + * `activitypub.cache.enabled` on every measurement; omitted from the + * attribute set when the option is not set. + */ + cacheEnabled?: boolean; +} + +/** + * Wraps a {@link DocumentLoader} so each invocation records one + * measurement on `activitypub.document.fetch` (counter) and one on + * `activitypub.document.fetch.duration` (histogram), classifying the + * outcome via {@link classifyFetchError} when the wrapped loader throws + * and as `fetched` on success. The wrapper rethrows whatever the + * wrapped loader throws so caller behavior is unchanged. + * + * The wrapper records the hostname of the requested URL on + * `activitypub.remote.host` when the URL parses; full URLs, paths, and + * query strings are deliberately excluded to keep cardinality bounded. + * HTTP status codes are recorded only when the failure carries a + * `Response` (currently, when the wrapped loader throws a + * {@link FetchError} with a non-`null` `response`). + * @since 2.3.0 + */ +export function instrumentDocumentLoader( + loader: DocumentLoader, + options: InstrumentDocumentLoaderOptions, +): DocumentLoader { + const meterProvider = options.meterProvider; + if (meterProvider == null) return loader; + return async (url, opts) => { + const start = performance.now(); + let remoteUrl: URL | undefined; + try { + remoteUrl = new URL(url); + } catch { + remoteUrl = undefined; + } + try { + const result = await loader(url, opts); + recordDocumentFetch(meterProvider, { + durationMs: getDurationMs(start), + kind: options.kind, + result: "fetched", + remoteUrl, + cacheEnabled: options.cacheEnabled, + }); + return result; + } catch (error) { + const classified = classifyFetchError(error); + recordDocumentFetch(meterProvider, { + durationMs: getDurationMs(start), + kind: options.kind, + result: classified.result, + remoteUrl, + cacheEnabled: options.cacheEnabled, + statusCode: classified.statusCode, + }); + throw error; + } + }; +} + /** * Times an awaited public key fetch and records exactly one * `activitypub.signature.key_fetch.duration` measurement, classifying the diff --git a/packages/fedify/src/federation/middleware.test.ts b/packages/fedify/src/federation/middleware.test.ts index f2663e94e..39bb286a3 100644 --- a/packages/fedify/src/federation/middleware.test.ts +++ b/packages/fedify/src/federation/middleware.test.ts @@ -6491,3 +6491,97 @@ test("KvSpecDeterminer", async (t) => { assertEquals(spec, "rfc9421"); }); }); + +test("createFederation() instruments documentLoader with activitypub.document.fetch", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const kv = new MemoryKvStore(); + const federation = createFederation({ + kv, + meterProvider, + documentLoaderFactory: () => mockDocumentLoader, + contextLoaderFactory: () => mockDocumentLoader, + }); + const ctx = federation.createContext( + new URL("https://example.com/"), + undefined, + ); + await ctx.documentLoader("https://example.com/object"); + + const counters = recorder.getMeasurements("activitypub.document.fetch"); + assertEquals(counters.length, 1); + assertEquals(counters[0].attributes["activitypub.lookup.kind"], "object"); + assertEquals(counters[0].attributes["activitypub.lookup.result"], "fetched"); + assertEquals( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + // User-supplied factory: cacheEnabled is unknown, attribute is omitted. + assertFalse("activitypub.cache.enabled" in counters[0].attributes); +}); + +test("createFederation() records kind=context on contextLoader fetches", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const kv = new MemoryKvStore(); + const federation = createFederation({ + kv, + meterProvider, + documentLoaderFactory: () => mockDocumentLoader, + contextLoaderFactory: () => mockDocumentLoader, + }); + const ctx = federation.createContext( + new URL("https://example.com/"), + undefined, + ); + await ctx.contextLoader("https://example.com/object"); + + const counters = recorder.getMeasurements("activitypub.document.fetch"); + assertEquals(counters.length, 1); + assertEquals(counters[0].attributes["activitypub.lookup.kind"], "context"); +}); + +test("createFederation() forwards DocumentLoaderFactoryOptions to a user-supplied authenticatedDocumentLoaderFactory", () => { + const kv = new MemoryKvStore(); + const seen: Array = []; + const federation = createFederation({ + kv, + authenticatedDocumentLoaderFactory: (_identity, opts) => { + seen.push(opts); + return mockDocumentLoader; + }, + }); + // FederationImpl exposes the factory directly on the instance. + const impl = federation as unknown as { + authenticatedDocumentLoaderFactory: ( + identity: { keyId: URL; privateKey: CryptoKey }, + opts?: { allowPrivateAddress?: boolean; userAgent?: string }, + ) => unknown; + }; + impl.authenticatedDocumentLoaderFactory( + { + keyId: new URL("https://example.com/users/alice#main-key"), + // deno-lint-ignore no-explicit-any + privateKey: {} as any, + }, + { allowPrivateAddress: true, userAgent: "test-ua" }, + ); + assertEquals(seen.length, 1); + assertEquals(seen[0], { allowPrivateAddress: true, userAgent: "test-ua" }); +}); + +test("createFederation() omits instrumentation when no meterProvider is set", () => { + // Sanity: without a meterProvider, ctx.documentLoader must be the same + // function reference as the user-supplied loader, so the wrapper is a + // true no-op for non-OTel users. + const kv = new MemoryKvStore(); + const federation = createFederation({ + kv, + documentLoaderFactory: () => mockDocumentLoader, + contextLoaderFactory: () => mockDocumentLoader, + }); + const ctx = federation.createContext( + new URL("https://example.com/"), + undefined, + ); + assertStrictEquals(ctx.documentLoader, mockDocumentLoader); + assertStrictEquals(ctx.contextLoader, mockDocumentLoader); +}); diff --git a/packages/fedify/src/federation/middleware.ts b/packages/fedify/src/federation/middleware.ts index ec5d2629d..cf12b134d 100644 --- a/packages/fedify/src/federation/middleware.ts +++ b/packages/fedify/src/federation/middleware.ts @@ -107,6 +107,7 @@ import { getDurationMs, getFederationMetrics, getRemoteHost, + instrumentDocumentLoader, isAbortError, type QueueTaskCommonAttributes, type QueueTaskResult, @@ -369,33 +370,99 @@ export class FederationImpl } const { allowPrivateAddress, userAgent } = options; this.allowPrivateAddress = allowPrivateAddress ?? false; - this.documentLoaderFactory = options.documentLoaderFactory ?? - ((opts) => { - return kvCache({ - loader: getDocumentLoader({ - allowPrivateAddress: opts?.allowPrivateAddress ?? - allowPrivateAddress, - userAgent: opts?.userAgent ?? userAgent, - }), - kv: options.kv, - prefix: this.kvPrefixes.remoteDocument, - }); + // The loader factory closures below read `this._meterProvider` at + // call time, not when they are created. Factories are only invoked + // after the constructor has assigned `_meterProvider` (see below), so + // the lookup is safe; no eager assignment is needed here. + const userDocumentLoaderFactory = options.documentLoaderFactory; + const userContextLoaderFactory = options.contextLoaderFactory; + const userAuthFactory = options.authenticatedDocumentLoaderFactory; + const builtinDocumentLoaderFactory: DocumentLoaderFactory = (opts) => + kvCache({ + loader: getDocumentLoader({ + allowPrivateAddress: opts?.allowPrivateAddress ?? + allowPrivateAddress, + userAgent: opts?.userAgent ?? userAgent, + }), + kv: options.kv, + prefix: this.kvPrefixes.remoteDocument, + meterProvider: this._meterProvider, + kind: "object", }); - this.contextLoaderFactory = options.contextLoaderFactory ?? - this.documentLoaderFactory; - this.authenticatedDocumentLoaderFactory = - options.authenticatedDocumentLoaderFactory ?? - ((identity) => - getAuthenticatedDocumentLoader(identity, { + const builtinContextLoaderFactory: DocumentLoaderFactory = (opts) => + kvCache({ + loader: getDocumentLoader({ + allowPrivateAddress: opts?.allowPrivateAddress ?? allowPrivateAddress, - userAgent, + userAgent: opts?.userAgent ?? userAgent, + }), + kv: options.kv, + prefix: this.kvPrefixes.remoteDocument, + meterProvider: this._meterProvider, + kind: "context", + }); + // Only the built-in factories use `kvCache()`, so we can confidently + // record `activitypub.cache.enabled=true` for them; user-supplied + // factories may or may not cache, so the attribute is omitted. + this.documentLoaderFactory = (opts) => + instrumentDocumentLoader( + (userDocumentLoaderFactory ?? builtinDocumentLoaderFactory)(opts), + { + meterProvider: this._meterProvider, + kind: "object", + cacheEnabled: userDocumentLoaderFactory == null ? true : undefined, + }, + ); + // When the user customises `documentLoaderFactory` but not + // `contextLoaderFactory`, Fedify has historically fallen back to the + // (customised) document factory rather than the built-in one so + // context fetches inherit the user's settings. Preserve that + // semantic, just with a separate instrumentation kind so the metric + // attributes reflect the call-site intent. + const resolvedContextLoaderFactory: DocumentLoaderFactory = + userContextLoaderFactory ?? userDocumentLoaderFactory ?? + builtinContextLoaderFactory; + this.contextLoaderFactory = (opts) => + instrumentDocumentLoader( + resolvedContextLoaderFactory(opts), + { + meterProvider: this._meterProvider, + kind: "context", + cacheEnabled: (userContextLoaderFactory == null && + userDocumentLoaderFactory == null) + ? true + : undefined, + }, + ); + this.authenticatedDocumentLoaderFactory = (identity, factoryOpts) => + instrumentDocumentLoader( + userAuthFactory != null + // Forward `factoryOpts` so user-supplied factories receive the + // per-call `DocumentLoaderFactoryOptions` (allowPrivateAddress, + // userAgent) just like they did before this wrapper was added. + ? userAuthFactory(identity, factoryOpts) + // The built-in default honors per-call `factoryOpts` overrides + // the same way `documentLoaderFactory` / `contextLoaderFactory` + // do above, falling back to the constructor-level settings when + // the caller did not supply an override. + : getAuthenticatedDocumentLoader(identity, { + allowPrivateAddress: factoryOpts?.allowPrivateAddress ?? + allowPrivateAddress, + userAgent: factoryOpts?.userAgent ?? userAgent, specDeterminer: new KvSpecDeterminer( this.kv, this.kvPrefixes.httpMessageSignaturesSpec, options.firstKnock, ), tracerProvider: this.tracerProvider, - })); + }), + { + meterProvider: this._meterProvider, + kind: "object", + // The authenticated document loader does not cache. + cacheEnabled: userAuthFactory == null ? false : undefined, + }, + ); this.userAgent = userAgent; this.onOutboxError = options.onOutboxError; this.permanentFailureStatusCodes = options.permanentFailureStatusCodes ?? @@ -2435,6 +2502,7 @@ export class ContextImpl implements Context { contextLoader: options.contextLoader ?? this.contextLoader, userAgent: options.userAgent ?? this.federation.userAgent, tracerProvider: options.tracerProvider ?? this.tracerProvider, + meterProvider: options.meterProvider ?? this.meterProvider, // @ts-ignore: `allowPrivateAddress` is not in the type definition. allowPrivateAddress: this.federation.allowPrivateAddress, }); diff --git a/packages/fedify/src/sig/http.test.ts b/packages/fedify/src/sig/http.test.ts index b05417e7c..c902e048e 100644 --- a/packages/fedify/src/sig/http.test.ts +++ b/packages/fedify/src/sig/http.test.ts @@ -421,6 +421,21 @@ test("verifyRequestDetailed() records verification duration metric", async (t) = assertFalse( "http_signatures.failure_reason" in measurement.attributes, ); + + // The HTTP draft-cavage verifier must also forward `meterProvider` + // through to `fetchKeyDetailed` so the generic + // `activitypub.key.lookup*` metrics land on the test provider rather + // than the global default. + const keyLookups = recorder.getMeasurements("activitypub.key.lookup"); + assertEquals(keyLookups.length, 1); + assertEquals( + keyLookups[0].attributes["activitypub.lookup.kind"], + "public_key", + ); + assertEquals( + keyLookups[0].attributes["activitypub.lookup.result"], + "fetched", + ); }); await t.step("missing signature is recorded as result=missing", async () => { diff --git a/packages/fedify/src/sig/http.ts b/packages/fedify/src/sig/http.ts index 3b534dbda..d79cd27a6 100644 --- a/packages/fedify/src/sig/http.ts +++ b/packages/fedify/src/sig/http.ts @@ -1157,6 +1157,7 @@ async function verifyRequestDraft( contextLoader, keyCache, tracerProvider, + meterProvider, }), ); const { key, cached, fetchError } = fetchResult; @@ -1508,6 +1509,7 @@ async function verifyRequestRfc9421( contextLoader, keyCache, tracerProvider, + meterProvider, }), ); const { key, cached, fetchError } = rfcFetchResult; diff --git a/packages/fedify/src/sig/key.test.ts b/packages/fedify/src/sig/key.test.ts index f80def098..7d865324f 100644 --- a/packages/fedify/src/sig/key.test.ts +++ b/packages/fedify/src/sig/key.test.ts @@ -1,10 +1,11 @@ import { + createTestMeterProvider, createTestTracerProvider, mockDocumentLoader, test, } from "@fedify/fixture"; import { CryptographicKey, Multikey } from "@fedify/vocab"; -import { FetchError } from "@fedify/vocab-runtime"; +import { type DocumentLoader, FetchError } from "@fedify/vocab-runtime"; import { assertEquals, assertRejects, assertThrows } from "@std/assert"; import { ed25519Multikey, @@ -457,3 +458,160 @@ test("fetchKeyDetailed() returns detailed fetch errors", async () => { } assertEquals(detailedError.error, failure); }); + +test("fetchKey() records activitypub.key.lookup with hit on cached key", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const cache: Record = {}; + const keyCache: KeyCache = { + get(keyId) { + return Promise.resolve(cache[keyId.href]); + }, + set(keyId, key) { + cache[keyId.href] = key; + return Promise.resolve(); + }, + }; + const options: FetchKeyOptions = { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + keyCache, + meterProvider, + }; + + // Warm the cache. + await fetchKey("https://example.com/key", CryptographicKey, options); + recorder.clear(); + + // Subsequent call should hit the cache. + const result = await fetchKey( + "https://example.com/key", + CryptographicKey, + options, + ); + assertEquals(result.cached, true); + + const counters = recorder.getMeasurements("activitypub.key.lookup"); + assertEquals(counters.length, 1); + assertEquals(counters[0].attributes["activitypub.lookup.kind"], "public_key"); + assertEquals(counters[0].attributes["activitypub.lookup.result"], "hit"); + assertEquals(counters[0].attributes["activitypub.cache.enabled"], true); + assertEquals( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + + const duration = recorder.getMeasurement("activitypub.key.lookup.duration"); + assertEquals(duration?.type, "histogram"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "hit"); +}); + +test("fetchKey() records activitypub.key.lookup with fetched on cache miss", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const result = await fetchKey( + "https://example.com/key", + CryptographicKey, + { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }, + ); + assertEquals(result.cached, false); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "fetched"); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals(counter?.attributes["activitypub.remote.host"], "example.com"); + + const duration = recorder.getMeasurement("activitypub.key.lookup.duration"); + assertEquals(duration?.attributes["activitypub.lookup.result"], "fetched"); +}); + +test("fetchKey() records not_found and status code on HTTP 404", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const missingKeyId = new URL("https://example.com/missing-key"); + const documentLoader: DocumentLoader = (url) => { + if (url === missingKeyId.href) { + throw new FetchError( + missingKeyId, + `HTTP 404: ${missingKeyId.href}`, + new Response(null, { status: 404 }), + ); + } + return mockDocumentLoader(url); + }; + + const result = await fetchKey(missingKeyId, CryptographicKey, { + documentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + assertEquals(result.key, null); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["http.response.status_code"], 404); + assertEquals(counter?.attributes["activitypub.cache.enabled"], false); + assertEquals( + counter?.attributes["activitypub.remote.host"], + "example.com", + ); +}); + +test("fetchKey() records network_error on TypeError from the document loader", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const documentLoader: DocumentLoader = () => { + throw new TypeError("connect failed"); + }; + + await fetchKey("https://example.com/key", CryptographicKey, { + documentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals( + counter?.attributes["activitypub.lookup.result"], + "network_error", + ); + assertEquals("http.response.status_code" in counter!.attributes, false); +}); + +test("fetchKeyDetailed() records activitypub.key.lookup with the same taxonomy", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const goneKeyId = new URL("https://example.com/gone-key"); + const documentLoader: DocumentLoader = (url) => { + if (url === goneKeyId.href) { + throw new FetchError( + goneKeyId, + `HTTP 410: ${goneKeyId.href}`, + new Response(null, { status: 410 }), + ); + } + return mockDocumentLoader(url); + }; + + await fetchKeyDetailed(goneKeyId, CryptographicKey, { + documentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + + const counter = recorder.getMeasurement("activitypub.key.lookup"); + assertEquals(counter?.attributes["activitypub.lookup.result"], "not_found"); + assertEquals(counter?.attributes["http.response.status_code"], 410); +}); + +test("fetchKey() works when meterProvider is omitted", async () => { + // Sanity: omitting meterProvider keeps fetchKey functional. + const result = await fetchKey( + "https://example.com/key", + CryptographicKey, + { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + }, + ); + assertEquals(result.cached, false); +}); diff --git a/packages/fedify/src/sig/key.ts b/packages/fedify/src/sig/key.ts index 4d97dfdda..49be57cd4 100644 --- a/packages/fedify/src/sig/key.ts +++ b/packages/fedify/src/sig/key.ts @@ -11,12 +11,19 @@ import { } from "@fedify/vocab-runtime"; import { getLogger } from "@logtape/logtape"; import { + type MeterProvider, SpanKind, SpanStatusCode, trace, type TracerProvider, } from "@opentelemetry/api"; import metadata from "../../deno.json" with { type: "json" }; +import { + classifyFetchError, + getDurationMs, + type KeyLookupResult, + recordKeyLookup, +} from "../federation/metrics.ts"; /** * Checks if the given key is valid and supported. No-op if the key is valid, @@ -176,6 +183,14 @@ export interface FetchKeyOptions { * @since 1.3.0 */ tracerProvider?: TracerProvider; + + /** + * The OpenTelemetry meter provider to use for recording + * `activitypub.key.lookup` and `activitypub.key.lookup.duration`. If + * omitted, the global meter provider is used. + * @since 2.3.0 + */ + meterProvider?: MeterProvider; } /** @@ -538,45 +553,74 @@ async function fetchKeyWithResult< logger: ReturnType, ) => Promise | TResult, ): Promise { - const logger = getLogger(["fedify", "sig", "key"]); - const keyId = cacheKey.href; - const keyCache = options.keyCache as FetchErrorMetadataCache | undefined; - const cached = await getCachedFetchKey( - cacheKey, - keyId, - cls, - keyCache, - logger, - ); - if (cached?.key === null && cached.cached) { - return await onCachedUnavailable(cacheKey, keyId, keyCache, logger); - } - if (cached != null) return cached as TResult; - logger.debug("Fetching key {keyId} to verify signature...", { keyId }); - let document: unknown; + const start = performance.now(); + let outcome: { result: KeyLookupResult; statusCode?: number } = { + result: "error", + }; try { - const remoteDocument = - await (options.documentLoader ?? getDocumentLoader())( + const logger = getLogger(["fedify", "sig", "key"]); + const keyId = cacheKey.href; + const keyCache = options.keyCache as FetchErrorMetadataCache | undefined; + const cached = await getCachedFetchKey( + cacheKey, + keyId, + cls, + keyCache, + logger, + ); + if (cached?.key === null && cached.cached) { + const cachedUnavailable = await onCachedUnavailable( + cacheKey, + keyId, + keyCache, + logger, + ); + outcome = { result: "hit" }; + return cachedUnavailable; + } + if (cached != null) { + outcome = { result: "hit" }; + return cached as TResult; + } + logger.debug("Fetching key {keyId} to verify signature...", { keyId }); + let document: unknown; + try { + const remoteDocument = + await (options.documentLoader ?? getDocumentLoader())( + keyId, + ); + document = remoteDocument.document; + } catch (error) { + const classified = classifyFetchError(error); + const errored = await onFetchError( + error, + cacheKey, keyId, + keyCache, + logger, ); - document = remoteDocument.document; - } catch (error) { - return await onFetchError( - error, + outcome = classified; + return errored; + } + const resolved = await resolveFetchedKey( + document, cacheKey, keyId, - keyCache, + cls, + options, logger, ); + outcome = { result: resolved.key != null ? "fetched" : "invalid" }; + return resolved as TResult; + } finally { + recordKeyLookup(options.meterProvider, { + durationMs: getDurationMs(start), + result: outcome.result, + remoteUrl: cacheKey, + cacheEnabled: options.keyCache != null, + statusCode: outcome.statusCode, + }); } - return await resolveFetchedKey( - document, - cacheKey, - keyId, - cls, - options, - logger, - ) as TResult; } async function fetchKeyInternal( diff --git a/packages/fedify/src/utils/kv-cache.test.ts b/packages/fedify/src/utils/kv-cache.test.ts index 8b9090ff8..4642aa9c2 100644 --- a/packages/fedify/src/utils/kv-cache.test.ts +++ b/packages/fedify/src/utils/kv-cache.test.ts @@ -1,4 +1,8 @@ -import { mockDocumentLoader, test } from "@fedify/fixture"; +import { + createTestMeterProvider, + mockDocumentLoader, + test, +} from "@fedify/fixture"; import type { DocumentLoader } from "@fedify/vocab-runtime"; import { preloadedContexts } from "@fedify/vocab-runtime"; import { deepStrictEqual, throws } from "node:assert"; @@ -190,6 +194,122 @@ test("kvCache()", async (t) => { }); }); + await t.step( + "records activitypub.document.cache with miss then hit", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + rules: [["https://example.com/object", { days: 1 }]], + prefix: ["_test", "doc-cache"], + meterProvider, + kind: "object", + }); + + await loader("https://example.com/object"); + const afterMiss = recorder.getMeasurements("activitypub.document.cache"); + deepStrictEqual(afterMiss.length, 1); + deepStrictEqual(afterMiss[0].type, "counter"); + deepStrictEqual(afterMiss[0].value, 1); + deepStrictEqual( + afterMiss[0].attributes["activitypub.lookup.kind"], + "object", + ); + deepStrictEqual( + afterMiss[0].attributes["activitypub.lookup.result"], + "miss", + ); + deepStrictEqual( + afterMiss[0].attributes["activitypub.remote.host"], + "example.com", + ); + + await loader("https://example.com/object"); + const all = recorder.getMeasurements("activitypub.document.cache"); + deepStrictEqual(all.length, 2); + deepStrictEqual(all[1].attributes["activitypub.lookup.result"], "hit"); + deepStrictEqual( + all[1].attributes["activitypub.lookup.kind"], + "object", + ); + deepStrictEqual( + all[1].attributes["activitypub.remote.host"], + "example.com", + ); + }, + ); + + await t.step( + "preloaded contexts emit no activitypub.document.cache", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + prefix: ["_test", "doc-cache-preloaded"], + meterProvider, + kind: "context", + }); + + await loader("https://www.w3.org/ns/activitystreams"); + deepStrictEqual( + recorder.getMeasurements("activitypub.document.cache").length, + 0, + "preloaded contexts must bypass the KV cache and the cache metric", + ); + }, + ); + + await t.step( + "no cache rule match emits no activitypub.document.cache", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + rules: [], + prefix: ["_test", "doc-cache-no-rule"], + meterProvider, + kind: "object", + }); + + await loader("https://example.com/object"); + deepStrictEqual( + recorder.getMeasurements("activitypub.document.cache").length, + 0, + ); + }, + ); + + await t.step( + "omitting meterProvider records no cache measurements", + async () => { + const kv = new MockKvStore(); + const [meterProvider, recorder] = createTestMeterProvider(); + // Do not pass meterProvider; the meterProvider here is only used to + // assert that NOTHING was recorded against this isolated test provider. + const loader = kvCache({ + kv, + loader: mockDocumentLoader, + rules: [["https://example.com/object", { days: 1 }]], + prefix: ["_test", "doc-cache-no-mp"], + }); + + await loader("https://example.com/object"); + await loader("https://example.com/object"); + deepStrictEqual( + recorder.getMeasurements("activitypub.document.cache").length, + 0, + ); + // Reference meterProvider to avoid unused-variable lint complaints. + deepStrictEqual(typeof meterProvider.getMeter, "function"); + }, + ); + await t.step("preloaded contexts bypass cache", async () => { const kv = new MockKvStore(); let loaderCalled = false; diff --git a/packages/fedify/src/utils/kv-cache.ts b/packages/fedify/src/utils/kv-cache.ts index f004c4a58..998825523 100644 --- a/packages/fedify/src/utils/kv-cache.ts +++ b/packages/fedify/src/utils/kv-cache.ts @@ -5,12 +5,17 @@ import type { } from "@fedify/vocab-runtime"; import { preloadedContexts } from "@fedify/vocab-runtime"; import { getLogger } from "@logtape/logtape"; +import type { MeterProvider } from "@opentelemetry/api"; import type { KvKey, KvStore, KvStoreListEntry, KvStoreSetOptions, } from "../federation/kv.ts"; +import { + type DocumentFetchKind, + recordDocumentCache, +} from "../federation/metrics.ts"; const logger = getLogger(["fedify", "utils", "kv-cache"]); @@ -80,6 +85,26 @@ export interface KvCacheParameters { string | URL | URLPattern, Temporal.Duration | Temporal.DurationLike, ][]; + + /** + * The OpenTelemetry meter provider used to record + * `activitypub.document.cache` measurements. When omitted, the wrapper + * does not emit any metric measurements, preserving the previous + * unobserved-cache behavior. + * @since 2.3.0 + */ + readonly meterProvider?: MeterProvider; + + /** + * The lookup kind to record on the `activitypub.lookup.kind` attribute of + * `activitypub.document.cache` measurements. Defaults to `"object"` so + * the generic document loader case does not require an explicit option. + * Set to `"context"` for context-loader wrappers; the + * authenticated-loader path does not use `kvCache()` and is therefore + * out of scope. + * @since 2.3.0 + */ + readonly kind?: DocumentFetchKind; } /** @@ -88,7 +113,7 @@ export interface KvCacheParameters { * @returns The decorated document loader which is cache-enabled. */ export function kvCache( - { loader, kv, prefix, rules }: KvCacheParameters, + { loader, kv, prefix, rules, meterProvider, kind }: KvCacheParameters, ): DocumentLoader { const keyPrefix = prefix ?? ["_fedify", "remoteDocument"]; rules ??= [ @@ -104,6 +129,25 @@ export function kvCache( ); } } + const lookupKind: DocumentFetchKind = kind ?? "object"; + + function emitCacheMetric( + url: string, + result: "hit" | "miss", + ): void { + if (meterProvider == null) return; + let remoteUrl: URL | undefined; + try { + remoteUrl = new URL(url); + } catch { + remoteUrl = undefined; + } + recordDocumentCache(meterProvider, { + kind: lookupKind, + result, + remoteUrl, + }); + } return async ( url: string, @@ -132,6 +176,7 @@ export function kvCache( } } if (cache == null) { + emitCacheMetric(url, "miss"); const remoteDoc = await loader(url, options); try { await kv.set(key, remoteDoc, { ttl: match }); @@ -143,6 +188,7 @@ export function kvCache( } return remoteDoc; } + emitCacheMetric(url, "hit"); return cache; }; } diff --git a/packages/vocab/src/lookup.test.ts b/packages/vocab/src/lookup.test.ts index 6429bdb93..807da59e0 100644 --- a/packages/vocab/src/lookup.test.ts +++ b/packages/vocab/src/lookup.test.ts @@ -1,4 +1,5 @@ import { + createTestMeterProvider, createTestTracerProvider, mockDocumentLoader, test, @@ -693,4 +694,175 @@ test("lookupObject() records OpenTelemetry span events", async () => { deepStrictEqual(recordedObject.id, "https://example.com/object"); }); +test("lookupObject() records activitypub.object.lookup counter", { + sanitizeResources: false, + sanitizeOps: false, +}, async (t) => { + fetchMock.spyGlobal(); + // Cleanup must run even if a test step throws so the global `fetch` + // spy does not leak into subsequent tests. + try { + fetchMock.removeRoutes(); + fetchMock.get( + "begin:https://example.com/.well-known/webfinger", + { + subject: "acct:johndoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/person", + type: "application/activity+json", + }, + ], + }, + ); + + await t.step("records kind=actor for an actor result", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const person = await lookupObject("@johndoe@example.com", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + assertInstanceOf(person, Person); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual(counters[0].type, "counter"); + deepStrictEqual(counters[0].value, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "actor", + ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + // No `activitypub.document.fetch` measurement should be recorded by + // lookupObject itself; that lives on the document-loader path. + deepStrictEqual( + recorder.getMeasurements("activitypub.document.fetch").length, + 0, + ); + }); + + await t.step("records kind=object for a non-actor object", async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + const object = await lookupObject("https://example.com/object", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + assertInstanceOf(object, Object); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "object", + ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + }); + + await t.step("records kind=other on null result", async () => { + fetchMock.removeRoutes(); + fetchMock.get("begin:https://example.com/.well-known/webfinger", { + subject: "acct:janedoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/404", + type: "application/activity+json", + }, + ], + }); + const [meterProvider, recorder] = createTestMeterProvider(); + const result = await lookupObject("janedoe@example.com", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + deepStrictEqual(result, null); + const counters = recorder.getMeasurements("activitypub.object.lookup"); + deepStrictEqual(counters.length, 1); + deepStrictEqual( + counters[0].attributes["activitypub.lookup.kind"], + "other", + ); + deepStrictEqual( + counters[0].attributes["activitypub.remote.host"], + "example.com", + ); + }); + + await t.step( + "omits counter when no meterProvider is provided", + async () => { + const [_unused, recorder] = createTestMeterProvider(); + await lookupObject("https://example.com/object", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + }); + deepStrictEqual( + recorder.getMeasurements("activitypub.object.lookup").length, + 0, + ); + }, + ); + + await t.step( + "extracts host from a URL acct: instance", + async () => { + fetchMock.removeRoutes(); + fetchMock.get("begin:https://example.com/.well-known/webfinger", { + subject: "acct:johndoe@example.com", + links: [ + { + rel: "self", + href: "https://example.com/person", + type: "application/activity+json", + }, + ], + }); + const [meterProvider, recorder] = createTestMeterProvider(); + await lookupObject(new URL("acct:johndoe@example.com"), { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + const counter = recorder.getMeasurement("activitypub.object.lookup"); + deepStrictEqual( + counter?.attributes["activitypub.remote.host"], + "example.com", + ); + }, + ); + + await t.step( + "omits host when the identifier carries path/query characters", + async () => { + const [meterProvider, recorder] = createTestMeterProvider(); + // The bare-handle parser must refuse to put a path-bearing string + // like "example.com/leak" into the metric attribute. + await lookupObject("johndoe@example.com/leak", { + documentLoader: mockDocumentLoader, + contextLoader: mockDocumentLoader, + meterProvider, + }); + const counter = recorder.getMeasurement("activitypub.object.lookup"); + ok(counter != null); + deepStrictEqual( + "activitypub.remote.host" in counter.attributes, + false, + "high-cardinality handle suffixes must not be recorded as remote.host", + ); + }, + ); + } finally { + fetchMock.removeRoutes(); + fetchMock.hardReset(); + } +}); + // cSpell: ignore gildong diff --git a/packages/vocab/src/lookup.ts b/packages/vocab/src/lookup.ts index 43262a3c7..da9159c42 100644 --- a/packages/vocab/src/lookup.ts +++ b/packages/vocab/src/lookup.ts @@ -6,13 +6,95 @@ import { } from "@fedify/vocab-runtime"; import { lookupWebFinger } from "@fedify/webfinger"; import { getLogger } from "@logtape/logtape"; -import { SpanStatusCode, trace, type TracerProvider } from "@opentelemetry/api"; +import { + type Attributes, + type Counter, + type MeterProvider, + SpanStatusCode, + trace, + type TracerProvider, +} from "@opentelemetry/api"; import { delay } from "es-toolkit"; import metadata from "../deno.json" with { type: "json" }; +import { isActor } from "./actor.ts"; import { toAcctUrl } from "./handle.ts"; import { getTypeId } from "./type.ts"; import { type Collection, type Link, Object } from "./vocab.ts"; +/** + * The classification of an ActivityStreams lookup performed by + * {@link lookupObject}, recorded as `activitypub.lookup.kind` on the + * `activitypub.object.lookup` counter. + * + * - `actor`: the resolved object is an {@link import("./actor.ts").Actor}. + * - `object`: the resolved object is a non-actor ActivityStreams object. + * - `other`: the lookup did not resolve to an object (not found, network + * failure, parse failure). + * @since 2.3.0 + */ +export type ObjectLookupKind = "actor" | "object" | "other"; + +const objectLookupCounters = new WeakMap(); + +function getObjectLookupCounter(meterProvider: MeterProvider): Counter { + let counter = objectLookupCounters.get(meterProvider); + if (counter == null) { + counter = meterProvider + .getMeter(metadata.name, metadata.version) + .createCounter("activitypub.object.lookup", { + description: + "ActivityStreams object lookups via lookupObject(), classified " + + "by whether the resolved value is an Actor.", + unit: "{lookup}", + }); + objectLookupCounters.set(meterProvider, counter); + } + return counter; +} + +function getLookupRemoteHost(identifier: string | URL): string | undefined { + let url: URL | undefined; + if (identifier instanceof URL) { + url = identifier; + } else { + try { + url = new URL(identifier); + } catch { + // Not a URL — try to interpret as a bare fediverse handle below. + const stripped = identifier.startsWith("@") + ? identifier.slice(1) + : identifier; + return extractHandleHost(stripped); + } + } + if (url.hostname !== "") return url.hostname; + // `acct:` URIs are opaque (no `//host` form), so the URL hostname is + // empty. The user and authority live in `url.pathname` as + // `user@host`; reuse the same handle-extraction logic, which both + // takes only the substring after the last `@` and refuses to record + // anything that looks like a path / query / fragment rather than a + // bare hostname. + if (url.protocol === "acct:") return extractHandleHost(url.pathname); + return undefined; +} + +function extractHandleHost(handle: string): string | undefined { + const at = handle.lastIndexOf("@"); + if (at < 0 || at >= handle.length - 1) return undefined; + const candidate = handle.slice(at + 1); + // Reject anything that is not just an authority — paths, queries, + // fragments, and spaces would all leak high-cardinality metadata into + // the metric attribute, so we drop the host entirely in those cases. + if (/[/?#\s]/.test(candidate)) return undefined; + // Round-trip through `URL` so the parser validates the authority and + // strips any port/userinfo before we record it. + try { + return new URL(`https://${candidate}`).hostname || undefined; + } catch { + return undefined; + } +} + const logger = getLogger(["fedify", "vocab", "lookup"]); /** @@ -67,6 +149,15 @@ export interface LookupObjectOptions { */ tracerProvider?: TracerProvider; + /** + * The OpenTelemetry meter provider used to record the + * `activitypub.object.lookup` counter. If omitted, the counter is not + * emitted at all (the helper is opt-in to avoid touching the global + * meter provider for callers that do not use OpenTelemetry). + * @since 2.3.0 + */ + meterProvider?: MeterProvider; + /** * AbortSignal for cancelling the request. * @since 1.8.0 @@ -118,8 +209,16 @@ export async function lookupObject( return await tracer.startActiveSpan( "activitypub.lookup_object", async (span) => { + let kind: ObjectLookupKind = "other"; try { const result = await lookupObjectInternal(identifier, options); + // Classify the result as soon as `lookupObjectInternal` returns, + // so that any subsequent throw (for example from + // `result.toJsonLd(options)` while building the span event) does + // not roll `kind` back to `"other"` in the `finally` block. + if (result != null) { + kind = isActor(result) ? "actor" : "object"; + } if (result == null) span.setStatus({ code: SpanStatusCode.ERROR }); else { if (result.id != null) { @@ -150,6 +249,14 @@ export async function lookupObject( }); throw error; } finally { + if (options.meterProvider != null) { + const attributes: Attributes = { + "activitypub.lookup.kind": kind, + }; + const host = getLookupRemoteHost(identifier); + if (host != null) attributes["activitypub.remote.host"] = host; + getObjectLookupCounter(options.meterProvider).add(1, attributes); + } span.end(); } },