diff --git a/public/images/docs/observe/1.png b/public/images/docs/observe/1.png deleted file mode 100644 index 4cf8d841..00000000 Binary files a/public/images/docs/observe/1.png and /dev/null differ diff --git a/public/images/docs/observe/2.png b/public/images/docs/observe/2.png deleted file mode 100644 index e5b66386..00000000 Binary files a/public/images/docs/observe/2.png and /dev/null differ diff --git a/public/images/docs/observe/3.png b/public/images/docs/observe/3.png deleted file mode 100644 index 94a4ada7..00000000 Binary files a/public/images/docs/observe/3.png and /dev/null differ diff --git a/public/images/docs/observe/4.png b/public/images/docs/observe/4.png deleted file mode 100644 index 25ba68de..00000000 Binary files a/public/images/docs/observe/4.png and /dev/null differ diff --git a/public/images/docs/observe/5.png b/public/images/docs/observe/5.png deleted file mode 100644 index 598c228e..00000000 Binary files a/public/images/docs/observe/5.png and /dev/null differ diff --git a/public/images/docs/observe/5.webp b/public/images/docs/observe/5.webp deleted file mode 100644 index d4577521..00000000 Binary files a/public/images/docs/observe/5.webp and /dev/null differ diff --git a/public/images/docs/observe/alerts-create.png b/public/images/docs/observe/alerts-create.png new file mode 100644 index 00000000..92ed57d4 Binary files /dev/null and b/public/images/docs/observe/alerts-create.png differ diff --git a/public/images/docs/observe/alerts-create.webp b/public/images/docs/observe/alerts-create.webp new file mode 100644 index 00000000..c3b34b03 Binary files /dev/null and b/public/images/docs/observe/alerts-create.webp differ diff --git a/public/images/docs/observe/alerts-overview.png b/public/images/docs/observe/alerts-overview.png new file mode 100644 index 00000000..8732d1b5 Binary files /dev/null and b/public/images/docs/observe/alerts-overview.png differ diff --git a/public/images/docs/observe/alerts-overview.webp b/public/images/docs/observe/alerts-overview.webp new file mode 100644 index 00000000..618ad48c Binary files /dev/null and b/public/images/docs/observe/alerts-overview.webp differ diff --git a/public/images/docs/observe/dashboard-add-widget.png b/public/images/docs/observe/dashboard-add-widget.png new file mode 100644 index 00000000..816fa177 Binary files /dev/null and b/public/images/docs/observe/dashboard-add-widget.png differ diff --git a/public/images/docs/observe/dashboard-add-widget.webp b/public/images/docs/observe/dashboard-add-widget.webp new file mode 100644 index 00000000..495f0d65 Binary files /dev/null and b/public/images/docs/observe/dashboard-add-widget.webp differ diff --git a/public/images/docs/observe/dashboard-overview.png b/public/images/docs/observe/dashboard-overview.png new file mode 100644 index 00000000..0ef53489 Binary files /dev/null and b/public/images/docs/observe/dashboard-overview.png differ diff --git a/public/images/docs/observe/dashboard-overview.webp b/public/images/docs/observe/dashboard-overview.webp new file mode 100644 index 00000000..6df319da Binary files /dev/null and b/public/images/docs/observe/dashboard-overview.webp differ diff --git a/public/images/docs/observe/dashboard-populated.png b/public/images/docs/observe/dashboard-populated.png new file mode 100644 index 00000000..1c990723 Binary files /dev/null and b/public/images/docs/observe/dashboard-populated.png differ diff --git a/public/images/docs/observe/dashboard-populated.webp b/public/images/docs/observe/dashboard-populated.webp new file mode 100644 index 00000000..7154473f Binary files /dev/null and b/public/images/docs/observe/dashboard-populated.webp differ diff --git a/public/images/docs/observe/evals-create.png b/public/images/docs/observe/evals-create.png new file mode 100644 index 00000000..c616928d Binary files /dev/null and b/public/images/docs/observe/evals-create.png differ diff --git a/public/images/docs/observe/evals-create.webp b/public/images/docs/observe/evals-create.webp new file mode 100644 index 00000000..26fab78a Binary files /dev/null and b/public/images/docs/observe/evals-create.webp differ diff --git a/public/images/docs/observe/evals-overview.png b/public/images/docs/observe/evals-overview.png new file mode 100644 index 00000000..f45b9633 Binary files /dev/null and b/public/images/docs/observe/evals-overview.png differ diff --git a/public/images/docs/observe/evals-overview.webp b/public/images/docs/observe/evals-overview.webp new file mode 100644 index 00000000..fe423838 Binary files /dev/null and b/public/images/docs/observe/evals-overview.webp differ diff --git a/public/images/docs/observe/llm-tracing-agent-graph.png b/public/images/docs/observe/llm-tracing-agent-graph.png new file mode 100644 index 00000000..a9587b75 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-agent-graph.png differ diff --git a/public/images/docs/observe/llm-tracing-agent-path.png b/public/images/docs/observe/llm-tracing-agent-path.png new file mode 100644 index 00000000..7e7ffbba Binary files /dev/null and b/public/images/docs/observe/llm-tracing-agent-path.png differ diff --git a/public/images/docs/observe/llm-tracing-bulk-actions.png b/public/images/docs/observe/llm-tracing-bulk-actions.png new file mode 100644 index 00000000..55d0ed9d Binary files /dev/null and b/public/images/docs/observe/llm-tracing-bulk-actions.png differ diff --git a/public/images/docs/observe/llm-tracing-bulk-actions.webp b/public/images/docs/observe/llm-tracing-bulk-actions.webp new file mode 100644 index 00000000..203817dd Binary files /dev/null and b/public/images/docs/observe/llm-tracing-bulk-actions.webp differ diff --git a/public/images/docs/observe/llm-tracing-date-range.png b/public/images/docs/observe/llm-tracing-date-range.png new file mode 100644 index 00000000..5067316d Binary files /dev/null and b/public/images/docs/observe/llm-tracing-date-range.png differ diff --git a/public/images/docs/observe/llm-tracing-date-range.webp b/public/images/docs/observe/llm-tracing-date-range.webp new file mode 100644 index 00000000..3a1fbc81 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-date-range.webp differ diff --git a/public/images/docs/observe/llm-tracing-detail-drawer.png b/public/images/docs/observe/llm-tracing-detail-drawer.png new file mode 100644 index 00000000..2378c742 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-detail-drawer.png differ diff --git a/public/images/docs/observe/llm-tracing-detail-drawer.webp b/public/images/docs/observe/llm-tracing-detail-drawer.webp new file mode 100644 index 00000000..980e23bd Binary files /dev/null and b/public/images/docs/observe/llm-tracing-detail-drawer.webp differ diff --git a/public/images/docs/observe/llm-tracing-display.png b/public/images/docs/observe/llm-tracing-display.png new file mode 100644 index 00000000..9ea95e6a Binary files /dev/null and b/public/images/docs/observe/llm-tracing-display.png differ diff --git a/public/images/docs/observe/llm-tracing-display.webp b/public/images/docs/observe/llm-tracing-display.webp new file mode 100644 index 00000000..ae32fa6b Binary files /dev/null and b/public/images/docs/observe/llm-tracing-display.webp differ diff --git a/public/images/docs/observe/llm-tracing-filter.png b/public/images/docs/observe/llm-tracing-filter.png new file mode 100644 index 00000000..c67fd6db Binary files /dev/null and b/public/images/docs/observe/llm-tracing-filter.png differ diff --git a/public/images/docs/observe/llm-tracing-filter.webp b/public/images/docs/observe/llm-tracing-filter.webp new file mode 100644 index 00000000..73b72116 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-filter.webp differ diff --git a/public/images/docs/observe/llm-tracing-overview.png b/public/images/docs/observe/llm-tracing-overview.png new file mode 100644 index 00000000..b8538b3f Binary files /dev/null and b/public/images/docs/observe/llm-tracing-overview.png differ diff --git a/public/images/docs/observe/llm-tracing-overview.webp b/public/images/docs/observe/llm-tracing-overview.webp new file mode 100644 index 00000000..81015fc8 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-overview.webp differ diff --git a/public/images/docs/observe/llm-tracing-sessions-tab.png b/public/images/docs/observe/llm-tracing-sessions-tab.png new file mode 100644 index 00000000..48ffa061 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-sessions-tab.png differ diff --git a/public/images/docs/observe/llm-tracing-users-tab.png b/public/images/docs/observe/llm-tracing-users-tab.png new file mode 100644 index 00000000..e5ea255e Binary files /dev/null and b/public/images/docs/observe/llm-tracing-users-tab.png differ diff --git a/public/images/docs/observe/llm-tracing-voice-detail.png b/public/images/docs/observe/llm-tracing-voice-detail.png new file mode 100644 index 00000000..de0f1079 Binary files /dev/null and b/public/images/docs/observe/llm-tracing-voice-detail.png differ diff --git a/public/images/docs/observe/llm-tracing-voice-overview.png b/public/images/docs/observe/llm-tracing-voice-overview.png new file mode 100644 index 00000000..cf935b6a Binary files /dev/null and b/public/images/docs/observe/llm-tracing-voice-overview.png differ diff --git a/public/images/docs/observe/sessions-bulk-actions.png b/public/images/docs/observe/sessions-bulk-actions.png new file mode 100644 index 00000000..b301ecc5 Binary files /dev/null and b/public/images/docs/observe/sessions-bulk-actions.png differ diff --git a/public/images/docs/observe/sessions-date-range.png b/public/images/docs/observe/sessions-date-range.png new file mode 100644 index 00000000..9cc71673 Binary files /dev/null and b/public/images/docs/observe/sessions-date-range.png differ diff --git a/public/images/docs/observe/sessions-detail.png b/public/images/docs/observe/sessions-detail.png new file mode 100644 index 00000000..a5e08cbd Binary files /dev/null and b/public/images/docs/observe/sessions-detail.png differ diff --git a/public/images/docs/observe/sessions-display.png b/public/images/docs/observe/sessions-display.png new file mode 100644 index 00000000..2c3750f5 Binary files /dev/null and b/public/images/docs/observe/sessions-display.png differ diff --git a/public/images/docs/observe/sessions-filter.png b/public/images/docs/observe/sessions-filter.png new file mode 100644 index 00000000..a90eda0d Binary files /dev/null and b/public/images/docs/observe/sessions-filter.png differ diff --git a/public/images/docs/observe/sessions-overview.png b/public/images/docs/observe/sessions-overview.png new file mode 100644 index 00000000..9143d4cf Binary files /dev/null and b/public/images/docs/observe/sessions-overview.png differ diff --git a/public/images/docs/observe/sessions-overview.webp b/public/images/docs/observe/sessions-overview.webp new file mode 100644 index 00000000..bfc4285e Binary files /dev/null and b/public/images/docs/observe/sessions-overview.webp differ diff --git a/public/images/docs/observe/sessions-replay-config.png b/public/images/docs/observe/sessions-replay-config.png new file mode 100644 index 00000000..16ce3464 Binary files /dev/null and b/public/images/docs/observe/sessions-replay-config.png differ diff --git a/public/images/docs/observe/users-date-range.png b/public/images/docs/observe/users-date-range.png new file mode 100644 index 00000000..1d5a0e0a Binary files /dev/null and b/public/images/docs/observe/users-date-range.png differ diff --git a/public/images/docs/observe/users-detail.png b/public/images/docs/observe/users-detail.png new file mode 100644 index 00000000..c88cf233 Binary files /dev/null and b/public/images/docs/observe/users-detail.png differ diff --git a/public/images/docs/observe/users-detail.webp b/public/images/docs/observe/users-detail.webp new file mode 100644 index 00000000..54106965 Binary files /dev/null and b/public/images/docs/observe/users-detail.webp differ diff --git a/public/images/docs/observe/users-display.png b/public/images/docs/observe/users-display.png new file mode 100644 index 00000000..adbffb56 Binary files /dev/null and b/public/images/docs/observe/users-display.png differ diff --git a/public/images/docs/observe/users-filter.png b/public/images/docs/observe/users-filter.png new file mode 100644 index 00000000..f488c9b7 Binary files /dev/null and b/public/images/docs/observe/users-filter.png differ diff --git a/public/images/docs/observe/users-overview.png b/public/images/docs/observe/users-overview.png new file mode 100644 index 00000000..e1f11a11 Binary files /dev/null and b/public/images/docs/observe/users-overview.png differ diff --git a/public/images/docs/observe/users-overview.webp b/public/images/docs/observe/users-overview.webp new file mode 100644 index 00000000..0a2c365e Binary files /dev/null and b/public/images/docs/observe/users-overview.webp differ diff --git a/public/images/docs/observe/voice-agent-definitions.png b/public/images/docs/observe/voice-agent-definitions.png new file mode 100644 index 00000000..33bb2d4a Binary files /dev/null and b/public/images/docs/observe/voice-agent-definitions.png differ diff --git a/public/images/docs/observe/voice-agent-definitions.webp b/public/images/docs/observe/voice-agent-definitions.webp new file mode 100644 index 00000000..85a46a86 Binary files /dev/null and b/public/images/docs/observe/voice-agent-definitions.webp differ diff --git a/public/images/docs/observe/voice-call-detail.png b/public/images/docs/observe/voice-call-detail.png new file mode 100644 index 00000000..ffc058e0 Binary files /dev/null and b/public/images/docs/observe/voice-call-detail.png differ diff --git a/public/images/docs/observe/voice-call-detail.webp b/public/images/docs/observe/voice-call-detail.webp new file mode 100644 index 00000000..ae571fce Binary files /dev/null and b/public/images/docs/observe/voice-call-detail.webp differ diff --git a/public/images/docs/observe/voice-create-form.png b/public/images/docs/observe/voice-create-form.png new file mode 100644 index 00000000..6aa525bf Binary files /dev/null and b/public/images/docs/observe/voice-create-form.png differ diff --git a/public/images/docs/observe/voice-create-form.webp b/public/images/docs/observe/voice-create-form.webp new file mode 100644 index 00000000..acfb4c38 Binary files /dev/null and b/public/images/docs/observe/voice-create-form.webp differ diff --git a/public/images/docs/observe/voice-projects-list.png b/public/images/docs/observe/voice-projects-list.png new file mode 100644 index 00000000..23d19398 Binary files /dev/null and b/public/images/docs/observe/voice-projects-list.png differ diff --git a/public/images/docs/observe/voice-tracing-overview.png b/public/images/docs/observe/voice-tracing-overview.png new file mode 100644 index 00000000..c94d0d73 Binary files /dev/null and b/public/images/docs/observe/voice-tracing-overview.png differ diff --git a/public/images/docs/observe/voice-tracing-overview.webp b/public/images/docs/observe/voice-tracing-overview.webp new file mode 100644 index 00000000..3759c6d2 Binary files /dev/null and b/public/images/docs/observe/voice-tracing-overview.webp differ diff --git a/src/components/docs/CodeGroup.astro b/src/components/docs/CodeGroup.astro index 7193f41a..95d82d7c 100644 --- a/src/components/docs/CodeGroup.astro +++ b/src/components/docs/CodeGroup.astro @@ -40,6 +40,11 @@ const id = `code-group-${Math.random().toString(36).slice(2, 9)}`; + + diff --git a/src/lib/navigation.ts b/src/lib/navigation.ts index f8c26b39..8a47f289 100644 --- a/src/lib/navigation.ts +++ b/src/lib/navigation.ts @@ -348,7 +348,7 @@ export const tabNavigation: NavTab[] = [ ] }, { - group: 'Observability', + group: 'traceAI', icon: 'eye', items: [ { title: 'Overview', href: '/docs/observe' }, @@ -358,6 +358,7 @@ export const tabNavigation: NavTab[] = [ { title: 'Understanding Observability', href: '/docs/tracing/concepts' }, { title: 'What are Traces?', href: '/docs/tracing/concepts/traces' }, { title: 'What are Spans?', href: '/docs/tracing/concepts/spans' }, + { title: 'Sessions and Users', href: '/docs/tracing/concepts/sessions-and-users' }, { title: 'What is OpenTelemetry?', href: '/docs/tracing/concepts/otel' }, { title: 'What is traceAI?', href: '/docs/tracing/concepts/traceai' }, ] @@ -366,12 +367,13 @@ export const tabNavigation: NavTab[] = [ title: 'Features', items: [ { title: 'Set Up Observability', href: '/docs/observe/features/quickstart' }, - { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' }, + { title: 'LLM Tracing', href: '/docs/observe/features/llm-tracing' }, { title: 'Sessions', href: '/docs/observe/features/session' }, { title: 'Users', href: '/docs/observe/features/users' }, + { title: 'Run Evals on Traces', href: '/docs/observe/features/evals' }, + { title: 'Dashboards', href: '/docs/observe/features/dashboard' }, { title: 'Alerts & Monitors', href: '/docs/observe/features/alerts' }, { title: 'Voice Observability', href: '/docs/observe/features/voice' }, - { title: 'Dashboards', href: '/docs/observe/features/dashboard' }, { title: 'Manual Tracing', items: [ @@ -394,7 +396,24 @@ export const tabNavigation: NavTab[] = [ ] }, { - title: 'Integration', + title: 'Reference', + items: [ + { title: 'Trace Filter Syntax', href: '/docs/observe/reference/trace-filter-syntax' }, + { title: 'Dashboard Metric Definitions', href: '/docs/observe/reference/dashboard-metric-definitions' }, + { title: 'Trace Export and Endpoints', href: '/docs/observe/reference/export-formats' }, + ] + }, + { + title: 'Troubleshooting', + items: [ + { title: 'No traces appear', href: '/docs/observe/troubleshooting/no-traces-appearing' }, + { title: 'Missing spans or attributes', href: '/docs/observe/troubleshooting/missing-attributes' }, + { title: 'Dashboard numbers look wrong', href: '/docs/observe/troubleshooting/dashboard-numbers-look-wrong' }, + { title: 'An alert did not fire', href: '/docs/observe/troubleshooting/alerts-did-not-fire' }, + ] + }, + { + title: 'Framework integrations', items: [ { title: 'Overview', href: '/docs/tracing/auto' }, { diff --git a/src/pages/docs/observe/features/alerts.mdx b/src/pages/docs/observe/features/alerts.mdx index b86f7f2c..1935161f 100644 --- a/src/pages/docs/observe/features/alerts.mdx +++ b/src/pages/docs/observe/features/alerts.mdx @@ -1,84 +1,127 @@ --- -title: "Alerts and Monitors: Observe Metric Threshold Notifications" -description: "Define monitors on Observe project metrics (system or evaluation) and get notified by email or Slack when values cross a threshold." +title: "Alerts and Monitors: Threshold Notifications" +description: "Define monitors on Observe metrics — error rate, latency, cost, or eval scores — and get notified by email or Slack when a value crosses a threshold." +page_type: "feature-deep-dive" +products: ["traceAI"] +feature: "Alerts and monitors" +feature_status: "stable" +ui_surfaces: ["Observe > Alerts"] +audience: "engineer" +difficulty: "beginner" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "llm observability alerts and monitors" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/dashboard" + concept: "/docs/tracing/concepts/traces" + how_to: "/docs/observe/features/evals" --- ## About -**Alerts and monitors** notify you when a metric goes above or below a value you set. Pick a metric (error rate, latency, cost, or an eval score), define a threshold, and choose where to get notified: email, Slack, or both. Monitors check the metric on a schedule. If the threshold is breached, you get an alert. You can review past alerts, mark them resolved, or mute a monitor without deleting it. +A monitor watches one Observe metric on a schedule and notifies you when it crosses a threshold. Pick a metric — error rate, latency, cost, or an eval score — set a threshold and direction, and choose where alerts go: email, Slack, or both. When the threshold is breached the monitor creates an alert log and sends the notification. You can review past alerts, mark them resolved, or mute a monitor without deleting it. Monitors are how Observe tells *you* something broke instead of you watching a dashboard. --- ## When to use -- **Catch errors early**: Get notified when error rate or API failure rate spikes after a deployment. -- **Stay within latency limits**: Alert when response time goes above your target. -- **Control costs**: Track token usage and get a warning before you hit your budget. -- **Monitor eval quality**: Know when a pass/fail eval like toxicity starts failing more often. -- **Stay informed without watching dashboards**: Send alerts to email, Slack, or both. +- **Catch errors early** — alert when error rate or LLM API failure rate spikes after a deploy. +- **Hold latency limits** — alert when response time goes above your target. +- **Control cost** — warn before token usage hits your budget. +- **Guard quality** — alert when a pass/fail eval (e.g. toxicity) starts failing more often. --- -## How to +## When not to use + +- **Exploring trends** — a monitor is a tripwire, not a chart; use [Dashboards](/docs/observe/features/dashboard). +- **Debugging one request** — use the [trace explorer](/docs/observe/features/llm-tracing). + +--- + +## Set up a monitor - Create a monitor for an Observe project and select the **metric type**: - ![Choose the metric](/screenshot/product/observe/1.png) + Create a monitor for an Observe project and pick the metric type. - - **System metrics**: count of errors, error-free session rates, LLM API failure rates, span response time, LLM response time, token usage, daily/monthly tokens spent. - - **Evaluation metrics**: attach an eval config for that project. For pass/fail or choice evals you can set **threshold_metric_value** to the specific value to monitor (e.g. fail rate or a choice label). + Create-monitor form selecting a metric, threshold, and notification channels + *Building a monitor: metric → threshold → notifications, in one form.* - The monitor is scoped to one project (Observe projects only). + - **System metrics:** error count, error-free session rate, LLM API failure rate, span response time, LLM response time, token usage, daily/monthly tokens spent. + - **Evaluation metrics:** attach an eval config for the project. For pass/fail or choice evals, set `threshold_metric_value` to the value to watch (e.g. a fail rate or a choice label). - Set how the alert is triggered: - ![Define the threshold](/screenshot/product/observe/2.png) - - - **threshold_operator**: **Greater than** or **Less than** (the current metric value is compared to the threshold). - - **threshold_type**: how the threshold is determined: - - **Static**: you set fixed **critical_threshold_value** and optionally **warning_threshold_value**. Alert fires when the metric is greater than (or less than) these values. - - **Percentage change**: threshold is based on percentage change from a baseline (e.g. historical mean over a time window). You set **critical_threshold_value** and optionally **warning_threshold_value** as percentage values. **auto_threshold_time_window** (default one week, in minutes) defines the window used to compute the baseline. - - When the condition is met, the system creates an alert log (critical or warning) and triggers notifications. + - **`threshold_operator`** — `Greater than` or `Less than`. + - **`threshold_type`** — `Static` (fixed `critical_threshold_value`, optional `warning_threshold_value`) or `Percentage change` (compared to a baseline; `auto_threshold_time_window` sets the baseline window, default one week). - - **alert_frequency** is how often the monitor is evaluated, in minutes (minimum 5, default 60). The monitor runs on this schedule and checks the metric over the relevant time window. If the threshold is breached, an alert is created and notifications are sent. + + `alert_frequency` is how often the monitor runs, in minutes — **minimum 5, default 60**. Each run checks the metric over its window and fires an alert if the threshold is breached. - - **Email**: add up to five addresses in **notification_emails**. They receive an email when an alert is triggered (subject and body include alert name, message, and type). - - **Slack**: set **slack_webhook_url** to your Slack incoming webhook. Optional **slack_notes** are included in the message. - ![Configure notifications](/screenshot/product/observe/3.png) - You can use email only, Slack only, or both. Mute a monitor with **is_mute** to stop notifications without deleting it. + - **Email** — up to **5** addresses in `notification_emails`. + - **Slack** — set `slack_webhook_url` (an incoming webhook); optional `slack_notes` are included. + + Use email only, Slack only, or both. Mute with `is_mute` to pause notifications without deleting the monitor. - Alert history is stored as **UserAlertMonitorLog** records (critical/warning, message, time window, link). You can list logs for a monitor, see when each alert fired, and mark them resolved. Use the monitor detail view in the UI to see trend data and unresolved count. + Alert history is stored as `UserAlertMonitorLog` records (critical or warning, with message, time window, and a link). List them per monitor, see when each fired, and mark them resolved. + + Alerts list showing past alerts with severity, message, and resolved status + *Alert history. The unresolved count is your live to-do list.* - - Monitors are only available for projects with **trace_type** `observe`. Optional **filters** (same structure as eval-task filters) can narrow which spans are included when computing the metric. - +--- + +## Inputs and parameters + +| Parameter | Detail | +|---|---| +| `metric type` | System metric or an attached evaluation metric. | +| `threshold_operator` | `Greater than` / `Less than`. | +| `threshold_type` | `Static` or `Percentage change`. | +| `critical_threshold_value` / `warning_threshold_value` | The trigger values (warning optional). | +| `auto_threshold_time_window` | Baseline window for percentage-change, default one week (minutes). | +| `alert_frequency` | Evaluation cadence, min 5 / default 60 minutes. | +| `notification_emails` | Up to 5 recipients. | +| `slack_webhook_url`, `slack_notes` | Slack channel + optional message. | +| `is_mute` | Pause notifications without deleting. | +| `filters` | Optional; same structure as eval-task filters, to narrow the spans. | + +--- + +## Edge cases and limits + +- Monitors are available **only for `observe` projects**. +- A `Percentage change` monitor needs enough history in its baseline window to compute against — a brand-new project may not alert until data accumulates. +- Muting (`is_mute`) stops notifications but the monitor keeps evaluating and logging. --- -## Next Steps +## Related features - - Connect the SDK and start capturing traces. + + Chart the same metrics you alert on. - - Run evaluations on your traced spans to score quality. + + Produce the eval scores a monitor can watch. - - Group traces into sessions for multi-turn analysis. + + Investigate the requests behind a breached threshold. - - View activity and metrics per end user. + + Check whether a spike is concentrated in specific users. diff --git a/src/pages/docs/observe/features/dashboard.mdx b/src/pages/docs/observe/features/dashboard.mdx index 2ef91ad5..9d9c2ae9 100644 --- a/src/pages/docs/observe/features/dashboard.mdx +++ b/src/pages/docs/observe/features/dashboard.mdx @@ -1,85 +1,115 @@ --- -title: "Dashboards: Custom Metric Visualization in Observe" -description: "Build custom dashboards with widgets to visualize your Observe project metrics, traces, and performance data in one place." +title: "Dashboards: Custom Charts Over Trace Metrics" +description: "Build dashboards with widgets to visualize Observe metrics — latency, cost, tokens, error rate, and eval scores — over time, grouped and aggregated how you choose." +page_type: "feature-deep-dive" +products: ["traceAI"] +feature: "Dashboards" +feature_status: "stable" +ui_surfaces: ["Observe > Dashboards"] +audience: "engineer" +difficulty: "beginner" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "llm observability dashboards" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/traces" + feature: "/docs/observe/features/llm-tracing" + how_to: "/docs/observe/features/alerts" --- ## About -**Dashboards** let you create custom views of your project data. Each dashboard is a collection of widgets, and each widget runs a query against your data and displays the result as a chart or table. You can track error rates, latency, token usage, eval scores, or any metric from your spans and experiments. Dashboards work across project types and are shareable across your team. +A dashboard is a set of widgets that chart your Observe metrics over time. Where the [trace explorer](/docs/observe/features/llm-tracing) shows individual requests, a dashboard shows the roll-up — latency trends, daily cost, error rate, token usage, eval pass-rate — so you can watch production health at a glance and compare periods. Each widget runs a query and renders it as a chart or table; you choose its metric, chart type, aggregation, grouping, filters, and time range. Dashboards are shared across the project. --- ## When to use -- **You want a single view of key metrics**: Combine error rate, latency, cost, and eval scores into one dashboard instead of switching between pages. -- **You need to monitor a deployment**: Create a dashboard with widgets that show the metrics you care about, then filter by time range to see how things changed after a release. -- **Your team needs a shared overview**: Build a dashboard that everyone on the team can open to see the current state of the project. -- **You want to compare metrics side by side**: Place multiple widgets on the same dashboard to spot correlations between latency spikes and error rate increases. -- **You need to export or present data**: Use table widgets to view raw data and export it as CSV. +- **Watch production health** — keep latency, error rate, and cost on one screen. +- **Monitor a deployment** — build a dashboard of the metrics you care about, then filter by time to see how a release changed them. +- **Share an overview** — give the team one page instead of asking everyone to run queries. +- **Spot correlations** — place widgets side by side to see a latency spike line up with an error-rate jump. +- **Export** — use a table widget to view and export raw data as CSV. --- -## How to +## When not to use + +- **Debugging one request** — use the [trace explorer](/docs/observe/features/llm-tracing); a chart won't show a single span. +- **Getting notified** — a dashboard is passive; for threshold breaches use [Alerts](/docs/observe/features/alerts). +- **Per-user breakdowns** — use the [Users](/docs/observe/features/users) view. + +--- + +## Build a dashboard - Open the **Dashboards** section and click **Create Dashboard**. Give it a name and optional description. - ![Create dashboard](/screenshot/product/observe/dashboard/1.png) + Open **Dashboards** and create a new one with a name and optional description. + + Observe Dashboards view with existing dashboards and the create action + *Start here — one dashboard holds many widgets.* - Click **Add Widget** and configure the query: - ![Add widget](/screenshot/product/observe/dashboard/2.png) - - - **Chart type**: line, stacked line, column, stacked column, bar, stacked bar, pie, table, or metric (single number). - - **Metric**: select from available metrics (e.g. span count, error count, latency, token usage, eval scores). - - **Aggregation**: sum, average, median, count, distinct count, min, or max. - - **Granularity**: minute, hour, day, week, or month (options adjust based on the time range). - - **Filters**: narrow the query to specific spans. - - **Group by**: break down the metric by a span attribute (e.g. model, user, status). + Add a widget and configure its query: pick the **metric**, **chart type**, **aggregation**, optional **filters** and **group-by**, and the **granularity**. Preview before saving. - Preview the result before saving. + Widget editor with chart type, metric, aggregation, group-by, and granularity controls + *The widget editor. The same metric reads very differently as a line over time vs. a single metric tile — pick the shape that answers your question.* - - Choose a global time range that applies to all widgets on the dashboard: - ![Time range](/screenshot/product/observe/dashboard/3.png) + + Choose a global time range (it applies to every widget), then drag and resize widgets into a layout. - - **Presets**: 30 mins, 6 hrs, Today, Yesterday, 7D, 30D, 3M, 6M, 12M. - - **Custom**: pick a specific start and end date. + A populated dashboard with widgets for latency, cost, and request volume + *A populated dashboard. Put the widgets you check most often at the top.* + - - Resize and reorder widgets to build your layout: - ![Arrange widgets](/screenshot/product/observe/dashboard/4.png) - Drag and drop to reorder. - +--- - - Use the menu on each widget to **edit**, **duplicate**, **resize**, or **delete** it. - ![Widget menu](/screenshot/product/observe/dashboard/5.png) - - +## Widget options + +| Option | Values | +|---|---| +| **Chart type** | Line, stacked line, column, stacked column, bar, stacked bar, pie, table, metric (single value). | +| **Metric** | Span count, error count, latency, token usage, cost, eval scores, and more. | +| **Aggregation** | Sum, average, median, count, distinct count, min, max. | +| **Granularity** | Minute, hour, day, week, month (adjusts to the time range). | +| **Group by** | Break a metric down by a span attribute (model, user, status). | +| **Time range** | 30 minutes, 6 hours, Today, Yesterday, 7D, 30D, 3M, 6M, 12M, or custom. | + +Pick aggregation and granularity together: *average latency per hour* and *sum of cost per day* ask different questions of the same traces. + +--- + +## Edge cases and limits - - Dashboards are scoped to your organization and project. All team members with access to the project can view and edit dashboards. - +- Dashboards visualize **aggregates**; they don't drill to a single span — click through to the [trace explorer](/docs/observe/features/llm-tracing) for that. +- Numbers reflect the **selected time range, granularity, and filters** — if a number looks wrong, check those three first. +- Very long ranges at fine granularity return more data and render slower. --- -## Next Steps +## Related features - - Connect the SDK and start capturing traces. + + Turn a dashboard metric into a threshold notification. - - Run evaluations on your traced spans to score quality. + + Drill from a trend down to the request behind it. - - Group traces into sessions for multi-turn analysis. + + Produce the eval scores you can chart here. - - Get notified when metrics cross a threshold. + + Break metrics down per end user. diff --git a/src/pages/docs/observe/features/evals.mdx b/src/pages/docs/observe/features/evals.mdx index 24225eca..6d958c14 100644 --- a/src/pages/docs/observe/features/evals.mdx +++ b/src/pages/docs/observe/features/evals.mdx @@ -1,95 +1,142 @@ --- -title: "Run Evals on Traces in Future AGI Observe" -description: "Run automated quality checks on traced spans in Observe: filter spans, choose historic or continuous runs, set sampling, and attach preset or custom evals." +title: "Running Evals" +description: "Score traced spans for hallucination, tone, bias, and more — filter which spans, run historically or continuously, sample to control cost, and read results per span." +page_type: "feature-deep-dive" +products: ["fi.evals", "traceAI"] +feature: "Production eval overlays" +feature_status: "stable" +ui_surfaces: ["Observe > Evals"] +audience: "engineer" +difficulty: "intermediate" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "run evals on production llm traces" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/traces" + reference: "/docs/evaluation/builtin" + feature: "/docs/observe/features/alerts" --- ## About -Evals run automated quality checks on your production traces, scoring every LLM response for hallucination, tone, bias, toxicity, and more. You configure which checks to run, filter which spans they apply to, and choose whether to evaluate historical data or new spans as they arrive. Results appear per span in the Observe dashboard and can trigger alerts when quality drops. - -{/* ARCADE EMBED START */} - -
-{/* ARCADE EMBED END */} +Evals run automated quality checks on your production traces, scoring each response for hallucination, tone, bias, toxicity, and more. You pick which checks to run, filter which spans they apply to, and choose whether to score existing traffic (historical) or new spans as they arrive (continuous). Scores attach to the span in the [trace explorer](/docs/observe/features/llm-tracing) and can feed [alerts](/docs/observe/features/alerts) when quality drops. --- ## When to use -- **Scoring production output quality**: Run historic evals after a release to check for hallucinations, bias, or unsafe content across real traffic. -- **Catching regressions in production**: Set up a continuous eval task so new spans are scored automatically and you see quality drops before users report them. -- **Spot-checking a specific time window**: Filter by date range or session to evaluate only the spans from an incident or a specific user flow. -- **Controlling eval cost**: Use sampling rate and span limits to evaluate a representative subset instead of every span. -- **Running multiple quality checks at once**: Attach several evals to one task so each span gets scored for tone, safety, and accuracy in a single run. +- **Score production quality** — after a release, run a historical eval for hallucinations, bias, or unsafe content across real traffic. +- **Catch regressions automatically** — a continuous task scores new spans so you see quality drops before users report them. +- **Investigate an incident** — filter by date range or session to evaluate only the spans from one window or flow. +- **Run several checks at once** — attach multiple evals so each span is scored for tone, safety, and accuracy in one run. + +--- + +## When not to use + +- **A one-off check while developing** — run an eval inline at the span; see [in-line evaluations](/docs/observe/features/manual-tracing/in-line-evals). +- **Watching a score trend over time** — chart it on a [Dashboard](/docs/observe/features/dashboard). +- **Defining a new evaluator** — that lives in the evaluation section; this page only *runs* evals against traces. --- -## How to +## What you can evaluate: span, trace, and session + +An eval can run at three levels, depending on what you want to judge: + +- **Span level** — scores a single step on its own: one LLM call, one tool call, one retrieval. Use it to check the quality of an individual operation, such as whether a generation was faithful or whether a tool received the right arguments. +- **Trace level** — scores one whole request end to end. The eval looks at the agent's internal steps and how they produced the final output, so you can catch issues that only appear across steps, like a hallucination introduced mid-chain or a tool-calling mistake. At this level the eval can read any span's data through `spans.0.*`, `spans.1.*`, … paths (the same attribute keys you see in a span's attribute section), so you can point it at the exact field to judge. +- **Session level** — scores a full multi-turn conversation between a user and your agent. Instead of one request, the eval looks at the whole exchange: how the agent handled several inputs in one conversation, and whether it stayed consistent — in both its answers and its actions — across turns. Use it for conversation-level behavior that no single trace captures. + +You pick the level by where you launch the eval from: the **Traces** view for span- and trace-level evals, the **Sessions** view for session-level. + +### Adding context to an eval + +When you configure an eval, you can attach **context** — extra trace or span data the evaluator sees beyond the field being scored. For example, you can hand a faithfulness eval the documents a retriever span returned, so it can judge the answer against the right source. Context lets an eval reason about one step in light of the rest of the request. + +--- + +## How to run evals - Define filters so the task runs only on the spans you care about. + Filter so the task runs only on the spans you care about. - ![Set filters](/images/docs/observe/1.png) + Eval task configuration showing span filters, run type, and sampling + *Scope the task before you run it — filtering down keeps cost and runtime predictable.* | Filter | Description | - |--------|-------------| - | `observation_type` | Node/span type (e.g. `llm`, `chain`, `agent`). | - | `date_range` | Time range: `[start_date, end_date]` applied to `created_at`. | - | `created_at` | Minimum creation time (spans at or after this value). | + |---|---| + | `observation_type` | Span type (`llm`, `chain`, `agent`, …). | + | `date_range` | `[start_date, end_date]` applied to `created_at`. | + | `created_at` | Minimum creation time. | | `project_id` | Restrict to a specific Observe project. | - | `session_id` | Restrict to traces in a given session. | - | `span_attributes_filters` | List of span-attribute conditions. | + | `session_id` | Restrict to one session. | + | `span_attributes_filters` | A list of span-attribute conditions. | + - Filters are stored in the task's `filters` field and applied when the task runs. + + - **Historical** — runs on existing rows matching the filters, up to the sampling rate and row limit, then completes. + - **Continuous** — runs on new spans as they arrive; each run processes only spans created since the last, and the task stays active. - - Set the **run type**: + + - **`sampling_rate`** — the percentage of matching rows to evaluate (0–100). + - **`spans_limit`** — the maximum number of matching rows the task processes (**default 1000**), shown in the UI as **Row limit**. The run stops at whichever cap is hit first. + - ![Choose run type](/images/docs/observe/2.png) + + Attach one or more eval configs. For evals that need an input (e.g. Bias Detection), set the **input key** to a span-attribute path like `gen_ai.output.messages.0.message.content` so the eval reads the right field. Then run the task. - - **Historical**: Run on existing spans matching the filters, up to the sampling cap and span limit. The task completes after processing. - - **Continuous**: Run on new spans as they arrive. Each run only processes spans created after the last run; the task stays active for ongoing evaluation. + Selecting eval templates to attach to a task and running it + *Attach the checks, map their inputs, run. See [built-in evals](/docs/evaluation/builtin) for what each one needs.* + - - ![Set sampling rate and span limit](/images/docs/observe/3.png) +--- - - **sampling_rate**: Percentage of matching spans to evaluate (0-100). For example, `50` evaluates 50% of filtered spans per run. - - **spans_limit**: Maximum number of spans to process per run (default 1000). The task stops when either the sampled count or this limit is reached. - +## Outputs and interpretation - - Attach one or more eval configs to the task. The task runs each selected eval on every span it processes. For evals that need an input (e.g. Bias Detection), set the **input key** to a span attribute path (e.g. `gen_ai.output.messages.0.message.content`) so the eval reads the right field from each span. See [built-in evals](/docs/evaluation/builtin) for supported evaluations and their required inputs. - +Scores attach to each evaluated span and appear under the span's **Evals** tab in the trace explorer; you can also alert on them. A task moves through these statuses: - - ![run](/images/docs/observe/4.png) +| Status | Meaning | +|---|---| +| `pending` | Created, not yet started. | +| `running` | Processing spans. | +| `completed` | Historical run finished. | +| `paused` | Temporarily stopped. | +| `failed` | The run errored. | +| `deleted` | Removed. | - Create or update the eval task via the API or UI, then run it. You can test the configuration before saving. Task status values: `pending`, `running`, `completed`, `failed`, `paused`, `deleted`. Results appear on the spans in the Observe dashboard and can be used for alerts. - - +--- + +## Edge cases and limits - - Eval tasks are processed asynchronously. Status and results update as runs complete. For continuous tasks, new spans are picked up on subsequent runs. - +- Eval tasks are **asynchronous** — status and results update as runs complete. +- **Continuous** tasks only pick up spans created after their last run; historical spans need a separate historical task. +- Model-based evals call a judge model, so the sampling rate and row limit directly control how much you spend — start small on real traffic. --- -## Next Steps +## Related features - - Connect the SDK and start capturing traces. + + Alert when an eval score crosses a threshold. - - Group traces into sessions for multi-turn analysis. + + Score a response inside a span as it runs. - - View activity and metrics per end user. + + The available checks and their required inputs. - - Get notified when metrics cross a threshold. + + Read eval scores attached to a span. diff --git a/src/pages/docs/observe/features/llm-tracing.mdx b/src/pages/docs/observe/features/llm-tracing.mdx new file mode 100644 index 00000000..ee691a6b --- /dev/null +++ b/src/pages/docs/observe/features/llm-tracing.mdx @@ -0,0 +1,200 @@ +--- +title: "Trace Explorer: Inspect Production AI Requests" +description: "Use the Observe trace explorer to inspect every production AI request — read the span tree, check latency, cost, and errors, and open any step's input and output." +page_type: "feature-deep-dive" +products: ["traceAI"] +feature: "Trace explorer" +feature_status: "stable" +ui_surfaces: ["Observe > Tracing"] +audience: "engineer" +difficulty: "beginner" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-26" +schema_type: "TechArticle" +seo: + primary_query: "view llm traces in production" +geo: + direct_answer: true +canonical: "/docs/observe/features/llm-tracing" +related: + concept: "/docs/tracing/concepts/traces" + reference: "/docs/tracing/concepts/spans" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" +--- + +## About + +The trace explorer is the page in Observe where you read every request your AI app handled. A **trace** is the complete record of one request — the user input, every model and tool call made along the way, the final output, the latency and token cost of each step, and whether anything failed. The explorer lists those traces newest-first and lets you open any one to see the exact steps inside it. Use it when you need to debug a specific request, find slow or failing requests, or pull real production data into a dataset. + +Observe trace explorer listing production traces with name, input, output, status, latency, and token columns +*The trace list, newest request first. Each row is one full request; the Status and Latency columns are the fastest way to spot what broke or ran slow.* + +--- + +## When to use + +- **Something went wrong for one user** — find their exact request and read what the AI did, step by step. +- **The app feels slow** — sort by the Latency column to see which requests, and which steps inside them, take the longest. +- **You're chasing an error pattern** — show only failed requests and look for what they share (a model, a tool, an input shape). +- **You're reviewing how an agent decided** — open the span tree or Agent Graph to follow the full decision path. +- **You need real data for testing** — select traces in bulk and move them into a dataset for evals or fine-tuning. + +--- + +## When not to use + +The trace explorer is for inspecting individual requests. Reach for a different surface when: + +- **You want aggregate trends** (error rate over time, p95 latency, daily cost) — build a [Dashboard](/docs/observe/features/dashboard) instead; the explorer shows requests, not roll-ups. +- **You need to change what a span captures** — that is instrumentation, not inspection. See [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- **You want a quality score on every response at scale** — run [Evals](/docs/observe/features/evals); the explorer only displays scores that already exist. +- **You're grouping by conversation or end user** — use the [Sessions](/docs/observe/features/session) or [Users](/docs/observe/features/users) views. + +--- + +## How it works internally + +Your app sends spans to FutureAGI through traceAI. A span is one operation — a model call, a tool call, a retrieval step. Spans that belong to the same request share a trace ID, and Observe reassembles them into one trace: a tree with the top-level request at the root and each step nested underneath. The explorer reads that tree. + +|traceAI spans| B[Observe ingest] + B -->|group by trace ID| C[Span tree per request] + C --> D[Trace list rows] + D -->|open a row| E[Detail panel: span tree + step details]`} /> + +Traces appear within seconds of a request finishing. A short-lived script must flush its spans before it exits, or the last trace may never arrive — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing) for the flush step. + + + Auto-refresh (in the header) re-checks for new traces every 10 seconds, and the date picker defaults to the past 7 days. If a request you recently sent is missing, widen the time window and confirm auto-refresh is on before assuming the trace was dropped. + + +Date-range dropdown showing Today, Yesterday, Past 7D / 30D / 3M / 6M / 12M, and Custom range +*The date-range picker (top-right) controls how far back the list and graphs look.* + +--- + +## The trace list + +The **Trace** tab is the default view — one row per request. The columns are how you scan for trouble without opening anything: + +| Column | What it tells you | +|---|---| +| **Trace Name** | The name of the top-level task, e.g. `support_agent.run`. | +| **Input** | A preview of what the user sent. | +| **Output** | A preview of what your AI replied. | +| **Timestamp** | When the request happened. | +| **Status** | **OK** (green) or **ERROR** (red — a step failed). | +| **Latency** | Total time from request to response. | +| **Tokens** | Total tokens used across the whole request. | + + + In a **voice project** the columns differ — Call Details, Status, Duration, Avg Latency, Turn Count, and Tokens — because calls are measured per turn, not per text exchange. See [Voice observability](/docs/observe/features/voice). + + +--- + +## Filtering + +Click **Filter** to narrow the trace list. There are three modes: + +Filter panel with the AI search bar, Basic and Query tabs, and the property list +*The filter panel. Stack several Basic conditions and they apply together (AND).* + +- **AI search** — describe what you want in plain English (*"errors on gpt-4o today"*) and the filter is built for you. +- **Basic** — pick a property, a condition, and a value; add as many as you need. +- **Query** — write a filter expression directly. + +You can filter on Trace Name, Status, Model, Span Kind, User ID, Provider, eval scores, and more — see [Trace filter syntax](/docs/observe/reference/trace-filter-syntax) for the full list. + +--- + +## Opening a trace: the detail panel + +Click any row and a panel slides in, split into two sides. Use the ↑ ↓ buttons to move between traces without closing it. + +Trace detail panel with the span tree on the left and the selected step's input, output, and timing on the right +*Left: the ordered steps the AI took. Right: everything about the step you clicked. Start at the top of the tree and walk down until a step's timing or output looks wrong.* + +**Left — the span tree.** Every step, in order, each with its name, duration, and pass/fail state. A support agent might show `llm.intent_classification` → `tool.check_order_status` → `llm.response_generation`. Click any step to inspect it. + +**Right — the step details.** For the selected step: + +- **Header**: type, status, start time, duration, prompt/completion/total tokens, and cost. LLM steps also show the model, e.g. `gpt-4o`. +- **Preview**: the exact input and output text, plus the full attribute list (model, provider, token counts). +- **Log View**: raw logs for the step. +- **Evals**: any quality scores attached to the step. +- **Annotations**: notes a human reviewer added. +- **Events**: anything that fired during the step. + +--- + +## Graph views + +Above the list, three views reshape the same data — switch with the **Display** panel. + +Display panel open on the Graph View tab with Rows, Columns, Metrics, Group, and Graph sections +*The Display panel controls the graph at the top and the columns below. "Set default for everyone" saves the layout for the whole team.* + +| View | What you see | +|---|---| +| **Graph View** | Latency and request volume over time — good for spotting spikes. | +| **Agent Graph** | How the AI's steps connect — useful for complex agent flows. | +| **Agent Path** | The same flow drawn as paths rather than a graph. | +--- + +## Bulk actions + +Tick the checkboxes on the left of each row to act on many traces at once; a toolbar shows how many are selected. + +Bulk action toolbar showing five selected traces and the Actions menu with Move to dataset, Add tags, and Add to annotation queue +*Select traces, then Actions. "Move to dataset" is how raw production requests become test or fine-tuning data.* + +| Action | What it does | +|---|---| +| **Move to dataset** | Saves the selected traces to a dataset for testing or fine-tuning. | +| **Add tags** | Labels all selected traces at once — useful for grouping by issue type. | +| **Add to annotation queue** | Sends them to a human-review queue for scoring. | + +--- + +## Saved views + +Set up the filters and columns you want, then click the **+** in the top-right to save them as a named view. Edit a saved view and a **Save view** button appears to update it. Saved views are shared across the project — anyone on the team can open them. + +--- + +## Performance, cost, and limits + +| Behavior | Detail | +|---|---| +| **Trace freshness** | Traces appear within seconds; auto-refresh polls every 10 seconds. | +| **Default time window** | Past 7 days; selectable up to the past 12 months or a custom range. | +| **Export** | The download icon exports the current view — the active filters and time range. | +| **Retention** | Trace retention depends on your plan. | + + + Span input and output can contain customer data. If you redact at the SDK with `TraceConfig` or the `FI_HIDE_*` environment variables, those fields show as hidden here — that is expected, not a missing trace. See [Mask span attributes](/docs/observe/features/manual-tracing/mask-span-attributes). + + +--- + +## Related features + + + + Group traces into multi-turn conversations and read per-session metrics. + + + See traces, sessions, and cost broken down per end user. + + + Attach automated quality scores to production traces. + + + Instrument your app so requests show up here in the first place. + + diff --git a/src/pages/docs/observe/features/manual-tracing/add-attributes-metadata-tags.mdx b/src/pages/docs/observe/features/manual-tracing/add-attributes-metadata-tags.mdx index 6ba8979d..03dac5d4 100644 --- a/src/pages/docs/observe/features/manual-tracing/add-attributes-metadata-tags.mdx +++ b/src/pages/docs/observe/features/manual-tracing/add-attributes-metadata-tags.mdx @@ -1,6 +1,24 @@ --- -title: "Enriching Spans with Attributes, Metadata, and Tags" -description: "Enrich spans with custom attributes, metadata, tags, session IDs, user IDs, and prompt templates beyond what standard auto-instrumentation captures." +title: "Enrich Spans with Attributes, Metadata, and Tags" +description: "Add custom attributes, metadata, tags, session and user IDs, and prompt templates to spans using set_attribute, semantic conventions, or context helpers." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Attach custom data to spans" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "10 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "add custom attributes to spans" +geo: + direct_answer: true +related: + reference: "/docs/observe/features/manual-tracing/semantic-conventions" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + concept: "/docs/tracing/concepts/spans" --- ## About @@ -19,6 +37,13 @@ A trace with only timing and status tells what happened, but not why. Without at --- +## Prerequisites + +- Tracing set up — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- The attribute keys you want to set — prefer the [semantic conventions](/docs/observe/features/manual-tracing/semantic-conventions) so spans stay queryable. + +--- + ## How to @@ -460,6 +485,25 @@ A trace with only timing and status tells what happened, but not why. Without at --- +## Verify + +Set an attribute, run a request, then open the trace: + +- The attribute appears in the span's **attributes** list. +- You can **filter** the trace list by it (for standard keys). + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Attribute not on the span | Set after the span closed, or on the wrong span. | Set it while the span is active (inside its block). | +| Can't filter by the attribute | Used a custom key the UI doesn't index. | Use a [semantic-convention](/docs/observe/features/manual-tracing/semantic-conventions) key where one exists. | +| Value dropped | Unsupported value type. | Use string, bool, int, float, or an array of those. | + +--- + ## Key concepts - **`set_attribute()`**:Attaches a key/value pair directly to the active span. Supports strings, numbers, and booleans. Prefix custom attributes with your company name to avoid naming conflicts. diff --git a/src/pages/docs/observe/features/manual-tracing/add-events-exceptions-status.mdx b/src/pages/docs/observe/features/manual-tracing/add-events-exceptions-status.mdx index c1ba95ba..99d43a7c 100644 --- a/src/pages/docs/observe/features/manual-tracing/add-events-exceptions-status.mdx +++ b/src/pages/docs/observe/features/manual-tracing/add-events-exceptions-status.mdx @@ -1,6 +1,24 @@ --- -title: "Integrate Events, Exceptions, and Status into Spans" -description: "Add OpenTelemetry events, exceptions, and status codes to spans in Future AGI to capture structured lifecycle information and error diagnostics." +title: "Add Events, Exceptions, and Status to Spans" +description: "Mark key moments with events, flag failures with an ERROR status, and attach full exception details to spans so errors are visible in traces and alerts." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Record events, status, and exceptions on a span" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "5 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "span events exceptions status opentelemetry" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/spans" + how_to: "/docs/observe/features/manual-tracing/get-current-span-context" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -21,6 +39,13 @@ Spans capture timing and attributes, but they do not automatically record what h --- +## Prerequisites + +- Tracing set up — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- Code running inside an active span, so `get_current_span()` returns a real span. + +--- + ## How to @@ -157,6 +182,25 @@ Spans capture timing and attributes, but they do not automatically record what h --- +## Verify + +Trigger the span on both a success and a failure path, then open the trace: + +- Events appear on the span's **Events** tab in the detail panel, with timestamps. +- A failed run shows **ERROR** status on the span, with the exception (type, message, stack trace) recorded under Events. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Events or status don't show | `get_current_span()` returned a no-op span. | Guard with `is_recording()` and run inside an active span. | +| Error isn't visible in the trace list | Status left at the default. | Call `set_status(Status(StatusCode.ERROR, ...))` on failure. | +| Stack trace missing | `record_exception()` wasn't called. | Pair `record_exception(ex)` with the ERROR status. | + +--- + ## Key concepts - **`add_event()` / `addEvent()`**:Attaches a timestamped message to the span at the moment it's called. Useful for logging discrete actions without creating a new span. diff --git a/src/pages/docs/observe/features/manual-tracing/advanced-tracing-examples.mdx b/src/pages/docs/observe/features/manual-tracing/advanced-tracing-examples.mdx index d3ba6745..741da8ff 100644 --- a/src/pages/docs/observe/features/manual-tracing/advanced-tracing-examples.mdx +++ b/src/pages/docs/observe/features/manual-tracing/advanced-tracing-examples.mdx @@ -1,6 +1,24 @@ --- -title: "Advanced OTEL Tracing: Context, Decorators, and Sampling" -description: "Explore manual context propagation, custom decorators, and sampling techniques for real-world async, multi-service, and high-volume tracing scenarios." +title: "Advanced Tracing: Context, Decorators, and Sampling" +description: "Manual context propagation, custom decorators, and sampling for async, multi-service, and high-volume tracing scenarios with OpenTelemetry and traceAI." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Apply advanced tracing patterns for async, multi-service, and sampling" +audience: "engineer" +difficulty: "advanced" +time_to_complete: "15 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "advanced opentelemetry tracing async sampling" +geo: + direct_answer: true +related: + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + concept: "/docs/tracing/concepts/spans" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -19,6 +37,13 @@ Basic span creation works for synchronous, single-service code. But real applica --- +## Prerequisites + +- Comfortable with [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing) and manual spans. +- An async, multi-service, or high-volume scenario where default tracing isn't enough. + +--- + ## How to @@ -661,6 +686,25 @@ Basic span creation works for synchronous, single-service code. But real applica --- +## Verify + +Run the scenario once, then open the trace: + +- Spans created across async boundaries or services share the **same trace** (correct parent/child links). +- With sampling on, the expected fraction of traces is recorded. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Spans split across multiple traces | Context wasn't propagated across the async/service boundary. | Propagate context explicitly (carrier inject/extract) as shown. | +| Everything or nothing sampled | Sampler misconfigured. | Check the sampler ratio and that it's set on the provider. | +| Lost spans under load | Exporter queue saturated. | Tune the batch processor, or reduce sampling. | + +--- + ## Key concepts - **`attach()` / `detach()`**:Python functions to manually bind a captured context to the current thread or async task. Always call `detach(token)` in a `finally` block to avoid context leaks. diff --git a/src/pages/docs/observe/features/manual-tracing/annotating-using-api.mdx b/src/pages/docs/observe/features/manual-tracing/annotating-using-api.mdx index b2d1d2b2..c0e56359 100644 --- a/src/pages/docs/observe/features/manual-tracing/annotating-using-api.mdx +++ b/src/pages/docs/observe/features/manual-tracing/annotating-using-api.mdx @@ -1,6 +1,24 @@ --- -title: "Adding Annotations to Spans Using the Bulk API" -description: "Label spans with custom tags, human feedback, and notes using the Future AGI bulk-annotation API for systematic trace enrichment." +title: "Annotate Spans with the Bulk API" +description: "Label spans with tags, human feedback, and notes using the FutureAGI bulk-annotation API for systematic trace enrichment and review." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Add annotations to spans via the bulk API" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "10 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "annotate spans bulk api" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/llm-tracing" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + concept: "/docs/tracing/concepts/spans" --- @@ -22,6 +40,13 @@ Traces show what happened but not whether the result was correct, helpful, or sa --- +## Prerequisites + +- Existing spans in a project to annotate. +- `FI_API_KEY` and `FI_SECRET_KEY` for API access. + +--- + ## How to @@ -275,6 +300,25 @@ Traces show what happened but not whether the result was correct, helpful, or sa --- +## Verify + +Send one annotation request, then open the annotated span: + +- The tag/feedback/note appears under the span's **Annotations** tab. +- A successful call returns a 2xx with the updated annotation record. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Annotation not visible | Wrong span or trace ID. | Confirm the IDs match the target span. | +| 401 / 403 | Missing or wrong keys. | Send `FI_API_KEY`/`FI_SECRET_KEY` for this workspace. | +| Looking for managed review | This is the bulk API, not the queue. | Use the [Annotations](/docs/annotations) system for queues and the Scores API. | + +--- + ## Key concepts **Response object** diff --git a/src/pages/docs/observe/features/manual-tracing/create-tool-spans.mdx b/src/pages/docs/observe/features/manual-tracing/create-tool-spans.mdx index c78dd39f..f7b75d22 100644 --- a/src/pages/docs/observe/features/manual-tracing/create-tool-spans.mdx +++ b/src/pages/docs/observe/features/manual-tracing/create-tool-spans.mdx @@ -1,6 +1,24 @@ --- -title: "Tool Spans Creation: Manually Trace Function Calls" -description: "Manually trace tool functions alongside LLM calls by creating spans that capture inputs, outputs, and key events in Future AGI." +title: "Create Tool Spans Manually" +description: "Trace a tool call as a TOOL span with its function name, arguments, and output, and nest the LLM response span underneath to see the full chain." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Trace a tool/function call and its nested LLM response" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "10 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "trace tool function calls opentelemetry" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/spans" + reference: "/docs/observe/features/manual-tracing/semantic-conventions" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" --- ## About @@ -17,6 +35,13 @@ LLM agents often call external tools (APIs, databases, code interpreters), but t --- +## Prerequisites + +- Tracing set up with a tracer — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- A tool or function your agent calls that you want visible in traces. + +--- + ## How to @@ -88,7 +113,7 @@ LLM agents often call external tools (APIs, databases, code interpreters), but t # Ensure 'tracer' is defined from the setup section above. # Ensure 'openai_client' is defined, e.g., from openai library - # Placeholder definitions for the example + # Example definitions for this snippet question = "What is the weather like in London?" def example_tool_function(input_args): print(f"Tool received: {input_args}") @@ -150,10 +175,10 @@ LLM agents often call external tools (APIs, databases, code interpreters), but t // import OpenAI from 'openai'; // const openaiClient = new OpenAI(); // Example // const model_version_ts = "gpt-4o"; - // const current_user_message_ts = [{ role: "user", content: "Placeholder" }]; + // const current_user_message_ts = [{ role: "user", content: "example" }]; // const TEMPERATURE_ts = 0.7; - // Placeholder definitions for the example + // Example definitions for this snippet const questionTs = "What is the weather like in Berlin?"; interface ToolArgs { city: string; } const exampleToolFunctionTs = async (inputArgs: ToolArgs): Promise => { @@ -227,6 +252,26 @@ LLM agents often call external tools (APIs, databases, code interpreters), but t --- +## Verify + +Run the tool function once, then open the trace in Observe: + +- A **TOOL** span appears with the function name, arguments, and output attributes. +- An **LLM** span is nested **underneath** it, showing the model input and output. +- The parent/child nesting matches the call order. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| LLM span isn't nested under the tool span | The LLM span was started outside the tool span's active scope. | Create the LLM span **inside** the tool span's `with` / `startActiveSpan` block. | +| Tool attributes missing when the tool fails | Attributes were set after the tool raised. | Set `fi.span.kind`, name, and arguments **before** invoking the tool, as the example does. | +| Spans don't appear | A short script exited before the exporter flushed. | Call `force_flush()` before exit, or use a real `register()` provider instead of the console exporter. | + +--- + ## Key concepts - **`fi.span.kind: "TOOL"`**:Marks the span as a tool call so it renders with the correct icon and label in the Future AGI dashboard. diff --git a/src/pages/docs/observe/features/manual-tracing/get-current-span-context.mdx b/src/pages/docs/observe/features/manual-tracing/get-current-span-context.mdx index d4157bcf..e0e6488e 100644 --- a/src/pages/docs/observe/features/manual-tracing/get-current-span-context.mdx +++ b/src/pages/docs/observe/features/manual-tracing/get-current-span-context.mdx @@ -1,6 +1,24 @@ --- -title: "Get Current Tracer and Span: Access Active Context" -description: "Access the active span or tracer at any point in your code to enrich traces with additional attributes and context in Future AGI." +title: "Get the Current Span or Tracer" +description: "Access the active span or tracer from anywhere in your code to add attributes or start child spans, without passing references through every function." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Read the active span or tracer deep in the call stack" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "5 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "get current span opentelemetry" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/spans" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -20,6 +38,13 @@ Spans and tracers are usually created at the top of a request, but the functions --- +## Prerequisites + +- Tracing already set up — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- Code running **inside an active span** (an auto-instrumented request or a span you started), so there is a current span to read. + +--- + ## How to @@ -122,6 +147,24 @@ Spans and tracers are usually created at the top of a request, but the functions --- +## Verify + +From a helper function, set an attribute on the current span (or start a child span), then run one request and open the trace: + +- The attribute appears on the active span's **attributes** list in the detail panel. +- A tracer-created span shows up as a **child** under the active request. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Attributes don't appear | `get_current_span()` returned a no-op span — nothing was active. | Call it inside an active span (within a `start_as_current_span` block or an auto-instrumented request). | +| JS: setting an attribute throws | `trace.getSpan(context.active())` returned `undefined`. | Guard with `if (currentSpan)` before setting attributes, as the example does. | + +--- + ## Key concepts - **`trace.get_current_span()`**: Returns the span that is currently active in the context. If no span is active, returns a no-op span. diff --git a/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx b/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx index 2aa5b933..65a6f9d5 100644 --- a/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx +++ b/src/pages/docs/observe/features/manual-tracing/in-line-evals.mdx @@ -1,6 +1,24 @@ --- -title: "In-line Evaluations: Attach Evals to Spans in Future AGI" -description: "Run evaluations directly inside a traced span so results are automatically attached to that span in the Future AGI dashboard." +title: "Run In-line Evaluations on Spans" +description: "Run an eval inside an active span with trace_eval=True so the score attaches to that span, putting trace data and eval result side by side in the dashboard." +page_type: "how-to" +products: ["fi.evals", "traceAI"] +task_intent: "Attach an eval result to the active span" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "10 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "inline eval attach to span" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/evals" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + concept: "/docs/tracing/concepts/spans" --- ## About @@ -17,6 +35,14 @@ Evaluation results are most useful when they sit next to the data that produced --- +## Prerequisites + +- Tracing set up — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- `fi.evals` available, with `FI_API_KEY` and `FI_SECRET_KEY` set. +- The eval template you want to run (e.g. `groundedness`). + +--- + ## How to @@ -85,6 +111,25 @@ Evaluation results are most useful when they sit next to the data that produced --- +## Verify + +Run the block, then open the trace: + +- The span shows the eval under its **Evals** tab, labelled with your `custom_eval_name`. +- `print(eval_result1)` shows the score and reason in the terminal. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Eval not attached to the span | `evaluate()` ran outside an active span. | Call it inside the `with tracer.start_as_current_span(...)` block. | +| `trace_eval` has no effect | No active span, or the global provider isn't set. | Register with `set_global_tracer_provider=True` and run inside a span. | +| Method or parameter not found | Installed SDK version differs from this example. | Confirm the `fi.evals` `Evaluator.evaluate()` signature for your installed version. | + +--- + ## Key concepts - **`trace_eval=True`**:The essential parameter that enables in-line evaluation. It tells the system to find the current active span and attach the evaluation results to it as span attributes. diff --git a/src/pages/docs/observe/features/manual-tracing/instrument-with-traceai-helpers.mdx b/src/pages/docs/observe/features/manual-tracing/instrument-with-traceai-helpers.mdx index 34d4368e..ec909b6e 100644 --- a/src/pages/docs/observe/features/manual-tracing/instrument-with-traceai-helpers.mdx +++ b/src/pages/docs/observe/features/manual-tracing/instrument-with-traceai-helpers.mdx @@ -1,6 +1,24 @@ --- -title: "Instrument with traceAI Helpers for Easier Tracing" -description: "Future AGI's traceAI library offers convenient abstractions that streamline your manual instrumentation process for LLM and agent tracing." +title: "Instrument with traceAI Helpers" +description: "Use FITracer decorators and context managers (@tracer.agent/chain/tool, using_attributes) to create typed spans with less boilerplate than raw OpenTelemetry." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Use FITracer helpers to create typed spans" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "10 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "fitracer decorators traceai" +geo: + direct_answer: true +related: + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + concept: "/docs/tracing/concepts/spans" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -19,6 +37,13 @@ Manual tracing with raw OpenTelemetry means writing a lot of setup code for ever --- +## Prerequisites + +- Tracing set up with a `FITracer` — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- A function or block you want to trace as an agent, chain, or tool span. + +--- + ## How to @@ -295,6 +320,25 @@ Manual tracing with raw OpenTelemetry means writing a lot of setup code for ever --- +## Verify + +Run a decorated function once, then open the trace: + +- The span shows the **kind** (agent/chain/tool) you used, with input and output captured. +- Nested helpers appear as child spans in the tree. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Input/output not captured | The decorator wasn't applied, or the call ran outside it. | Decorate with `@tracer.agent/chain/tool`, or run inside the context manager. | +| Wrong span kind | Used the generic tracer instead of the typed helper. | Use the matching `FITracer` helper for the operation. | +| No spans | `FITracer` not built from a registered provider. | Create it from `register(...)` as in setup. | + +--- + ## Key concepts - **`FITracer`**: Future AGI wrapper around the standard OTel tracer. Adds `set_input()` / `set_output()` / `set_tool()` on spans, automatic context injection, and typed decorators (`@tracer.chain`, `@tracer.agent`, `@tracer.tool`, `@tracer.llm`, `@tracer.retriever`). diff --git a/src/pages/docs/observe/features/manual-tracing/langfuse-integration.mdx b/src/pages/docs/observe/features/manual-tracing/langfuse-integration.mdx index 331acd61..74c56490 100644 --- a/src/pages/docs/observe/features/manual-tracing/langfuse-integration.mdx +++ b/src/pages/docs/observe/features/manual-tracing/langfuse-integration.mdx @@ -1,6 +1,24 @@ --- -title: "Langfuse Integration with Future AGI Evaluation Results" -description: "Integrate Future AGI evaluations with Langfuse to attach evaluation scores and results directly to your Langfuse traces." +title: "Attach FutureAGI Evals to Langfuse Traces" +description: "Run FutureAGI evaluations and write the scores back onto your Langfuse traces, so eval results live alongside the traces you already collect in Langfuse." +page_type: "how-to" +products: ["fi.evals"] +task_intent: "Send FutureAGI eval scores to Langfuse traces" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "10 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "futureagi evals langfuse integration" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/evals" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + concept: "/docs/tracing/concepts/traces" --- ## About @@ -17,6 +35,13 @@ Langfuse provides tracing but does not have a built-in evaluation engine. This i --- +## Prerequisites + +- A Langfuse project with traces, and its API keys. +- `fi.evals` available with `FI_API_KEY` and `FI_SECRET_KEY`. + +--- + ## How to @@ -108,6 +133,23 @@ Langfuse provides tracing but does not have a built-in evaluation engine. This i --- +## Verify + +Run an eval and push the score, then open the trace in Langfuse: + +- The FutureAGI score appears as a score on the matching Langfuse trace. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Score not on the Langfuse trace | Trace/observation ID mismatch. | Map the eval result to the correct Langfuse trace ID. | +| Auth error | Wrong Langfuse or FutureAGI keys. | Recheck both sets of credentials. | + +--- + ## Key concepts - **`platform="langfuse"`**:The essential parameter that directs evaluation results to Langfuse and links them with the current active span. diff --git a/src/pages/docs/observe/features/manual-tracing/log-prompt-templates.mdx b/src/pages/docs/observe/features/manual-tracing/log-prompt-templates.mdx index c2588f61..5971f0ae 100644 --- a/src/pages/docs/observe/features/manual-tracing/log-prompt-templates.mdx +++ b/src/pages/docs/observe/features/manual-tracing/log-prompt-templates.mdx @@ -1,6 +1,24 @@ --- -title: "Logging Prompt Templates and Variables in Future AGI Spans" -description: "Attach prompt template data to spans so Future AGI can surface it in the prompt playground for testing changes without deploying." +title: "Log Prompt Templates and Variables" +description: "Attach prompt template name, version, and variables to spans so FutureAGI surfaces them in the prompt playground for editing and re-running without a deploy." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Attach prompt template data to spans" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "5 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "log prompt templates llm tracing" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/spans" + how_to: "/docs/observe/features/manual-tracing/add-attributes-metadata-tags" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -17,6 +35,13 @@ LLM outputs depend entirely on the prompt, but the prompt itself is not captured --- +## Prerequisites + +- Tracing set up — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- A prompt template (name/version) you want to surface in the playground. + +--- + ## How to @@ -83,6 +108,25 @@ LLM outputs depend entirely on the prompt, but the prompt itself is not captured --- +## Verify + +Run a request inside the `using_attributes` (or `using_prompt_template`) block, then open the trace: + +- The span carries the prompt template attributes (name/label/version and variables). +- The template appears in the **prompt playground**, where you can edit variables and re-run. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Template not on the span | The LLM call ran outside the `using_*` block. | Make the model call **inside** the context-manager block. | +| Variables missing | Used `using_attributes` name-only. | Use `using_prompt_template` to attach the raw template, version, and variables. | +| Nothing in the playground | The instrumentor wasn't attached. | Confirm `OpenAIInstrumentor().instrument(...)` ran before the call. | + +--- + ## Key concepts - **`using_attributes`**: Context manager that enriches the current OpenTelemetry context with prompt template fields. All spans created by auto-instrumentors within the block carry the template data as span attributes. diff --git a/src/pages/docs/observe/features/manual-tracing/mask-span-attributes.mdx b/src/pages/docs/observe/features/manual-tracing/mask-span-attributes.mdx index 3865a1ca..484c1c5e 100644 --- a/src/pages/docs/observe/features/manual-tracing/mask-span-attributes.mdx +++ b/src/pages/docs/observe/features/manual-tracing/mask-span-attributes.mdx @@ -1,6 +1,24 @@ --- title: "Mask Span Attributes: Redact Sensitive Trace Data" -description: "Redact sensitive inputs, outputs, images, and embeddings from spans before export, using environment variables or TraceConfig in code." +description: "Redact inputs, outputs, messages, images, and embeddings from spans before export, using FI_HIDE_* environment variables or TraceConfig in code." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Redact sensitive data from spans before export" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "5 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "redact pii from llm traces" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/spans" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -18,6 +36,17 @@ Traces often contain sensitive data: user messages, API responses, PII, or large --- +## Prerequisites + +- Tracing set up — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- A decision on what's sensitive: inputs, outputs, messages, images, or embeddings. + + + Environment variables apply at **process start** — already-running workers keep the values they loaded, so restart them to pick up a change. Redaction operates on span **payloads**, not span, trace, or session **names**; never put secrets or PII in those names. + + +--- + ## How to @@ -88,6 +117,25 @@ Traces often contain sensitive data: user messages, API responses, PII, or large --- +## Verify + +Set one flag (e.g. `FI_HIDE_INPUTS=true`), restart the app, run a request, and open the trace: + +- The masked field shows as hidden/redacted in the span detail, not as the raw value. +- Unmasked fields still display normally. A redacted field is expected behavior, not a missing trace. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Data still visible after setting an env var | The worker was already running. | Restart the process; env vars apply at startup. | +| Code and env var disagree | `TraceConfig` takes precedence over env vars. | Set it at one level; precedence is `TraceConfig` > env var > default. | +| Base64 images bloat the payload | Image length not capped. | Set `FI_BASE64_IMAGE_MAX_LENGTH` or `hide_input_images`. | + +--- + ## Key concepts - **`TraceConfig`**:An object accepted by all traceAI auto-instrumentors. Use it to specify masking settings directly in code, scoped to a single instrumentor. diff --git a/src/pages/docs/observe/features/manual-tracing/semantic-conventions.mdx b/src/pages/docs/observe/features/manual-tracing/semantic-conventions.mdx index ebbcec89..9d7f7820 100644 --- a/src/pages/docs/observe/features/manual-tracing/semantic-conventions.mdx +++ b/src/pages/docs/observe/features/manual-tracing/semantic-conventions.mdx @@ -1,6 +1,21 @@ --- -title: "FI Semantic Conventions: Standard Span Attribute Keys" -description: "Use standardized attribute keys for spans to ensure consistent, queryable trace data across LLM models, frameworks, and vendors." +title: "Semantic Conventions: Standard Span Attribute Keys" +description: "The standardized attribute keys for spans — span kinds, message, document, embedding, and tool-call attributes — that keep trace data consistent and queryable." +page_type: "reference" +products: ["traceAI"] +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "span attribute semantic conventions" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/spans" + how_to: "/docs/observe/features/manual-tracing/add-attributes-metadata-tags" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -860,7 +875,7 @@ Every LLM provider returns data in a different format. Without a standard set of - **`RerankerAttributes`**: Attribute keys for reranker spans (input/output documents, query, model name, top-k). - **`EmbeddingAttributes`**: Attribute keys for embedding spans (text and vector). - **`ToolCallAttributes`**: Attribute keys for tool call objects generated by an LLM (ID, function name, arguments). -- **`FiSpanKindValues`**: Enumeration of valid values for `fi.span.kind`: `LLM`, `CHAIN`, `RETRIEVER`, `RERANKER`, `EMBEDDING`, `AGENT`, `TOOL`, `GUARDRAIL`, `EVALUATOR`, `UNKNOWN`. +- **`FiSpanKindValues`**: Enumeration of valid values for `fi.span.kind`: `LLM`, `CHAIN`, `RETRIEVER`, `RERANKER`, `EMBEDDING`, `AGENT`, `TOOL`, `GUARDRAIL`, `EVALUATOR`, `UNKNOWN`, `CONVERSATION`, `VECTOR_DB`, `A2A_CLIENT`, `A2A_SERVER`. - **Flattening**: OpenTelemetry span attributes must be simple scalar types or flat lists. Nested objects (such as lists of messages) must be flattened with index prefixes like `llm.input_messages.0.message.role`. --- diff --git a/src/pages/docs/observe/features/manual-tracing/set-session-user-id.mdx b/src/pages/docs/observe/features/manual-tracing/set-session-user-id.mdx index 91f4a21f..622011c8 100644 --- a/src/pages/docs/observe/features/manual-tracing/set-session-user-id.mdx +++ b/src/pages/docs/observe/features/manual-tracing/set-session-user-id.mdx @@ -1,6 +1,24 @@ --- -title: "Set Session ID and User ID on Spans in Future AGI" -description: "Add SessionID and UserID as span attributes to group and filter traces by conversation session and end user in Future AGI." +title: "Set Session and User IDs on Spans" +description: "Add session.id and user.id to spans so traces group into conversations and roll up per end user — set them directly or with context helpers." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Attach session and user IDs to spans" +audience: "engineer" +difficulty: "intermediate" +time_to_complete: "5 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "set session id user id spans" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/sessions-and-users" + feature: "/docs/observe/features/session" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" --- ## About @@ -17,6 +35,13 @@ Traces are isolated by default. Without a session or user identifier, there is n --- +## Prerequisites + +- Tracing set up — see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- Stable, non-sensitive session and user identifiers (avoid raw PII). + +--- + ## How to @@ -297,6 +322,25 @@ Traces are isolated by default. Without a session or user identifier, there is n --- +## Verify + +Run a request inside the `using_session` / `using_user` block, then open Observe: + +- The trace appears in the [Sessions](/docs/observe/features/session) view under your `session.id`, and under the user in the [Users](/docs/observe/features/users) view. +- Opening the trace shows `session.id` / `user.id` on the span attributes. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| Trace not grouped into a session or user | The model call ran outside the context block. | Make the call **inside** the `using_session` / `using_user` / `using_attributes` block. | +| JS/TS: IDs not propagating | Baggage not set on the active context. | Use `propagation.createBaggage()` + `context.with()` as shown. | +| Empty grouping | An empty-string ID was passed. | IDs must be non-empty strings. | + +--- + ## Key concepts - **`using_session`**:Context manager that adds `session.id` to the OpenTelemetry context. All spans from traceAI auto-instrumentors within the block will carry this attribute. Input must be a non-empty string. diff --git a/src/pages/docs/observe/features/manual-tracing/set-up-tracing.mdx b/src/pages/docs/observe/features/manual-tracing/set-up-tracing.mdx index 20f8fb08..00f04936 100644 --- a/src/pages/docs/observe/features/manual-tracing/set-up-tracing.mdx +++ b/src/pages/docs/observe/features/manual-tracing/set-up-tracing.mdx @@ -1,6 +1,24 @@ --- -title: "Set Up Tracing with Future AGI and OpenTelemetry" -description: "Connect your application to Future AGI by registering a tracer provider and adding instrumentation with auto-instrumentors or manual OpenTelemetry spans." +title: "Set Up Tracing with OpenTelemetry" +description: "Connect your app to FutureAGI: register a tracer provider, add an auto-instrumentor or manual OpenTelemetry spans, and confirm traces arrive in your project." +page_type: "how-to" +products: ["traceAI"] +task_intent: "Instrument an application to send traces to FutureAGI" +audience: "engineer" +difficulty: "beginner" +time_to_complete: "10 minutes" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "set up opentelemetry tracing futureagi" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/traces" + reference: "/docs/observe/features/manual-tracing/semantic-conventions" + feature: "/docs/observe/features/llm-tracing" --- ## About @@ -15,7 +33,15 @@ Tracing captures every LLM call, tool invocation, or custom operation in your ap - **Experiment tracking**: Register an Experiment project with eval tags and version names to compare prompt or model changes across runs. - **Custom spans**: Use `FITracer` to manually create spans for operations that auto-instrumentors don't cover. - **Privacy control**: Use `TraceConfig` to redact sensitive inputs, outputs, or messages before they leave your app. -- **Any Python or JS/TS app**: Works with any application via OpenTelemetry. Auto-instrumentors cover 20+ frameworks. +- **Any Python or JS/TS app**: Works with any application via OpenTelemetry. Auto-instrumentors cover a wide range of frameworks — see the [Auto Instrumentation catalog](/docs/tracing/auto). + +--- + +## Prerequisites + +- **Python 3.9+** or **Node 18+**. +- A FutureAGI account with your **`FI_API_KEY`** and **`FI_SECRET_KEY`** from the [dashboard](https://app.futureagi.com/dashboard/keys). +- The core `fi-instrumentation-otel` package plus the instrumentor for your framework (e.g. `traceAI-openai`). --- @@ -291,6 +317,31 @@ Tracing captures every LLM call, tool invocation, or custom operation in your ap +--- + +## Verify + +Send one request through your app, then open the project in the FutureAGI dashboard: + +- A new **trace** appears within a few seconds, named after your top-level operation. +- Opening it shows spans with **input**, **output**, **latency**, **model**, and **token count**. +- For an Observe project, find it under **Observe → your project → Tracing**. + +If the trace is there with those fields, instrumentation is working end to end. + +--- + +## Troubleshooting + +| Symptom | Cause | Fix | +|---|---|---| +| No trace appears | A short script exited before the batch exporter flushed. | Call `trace_provider.force_flush()` before the process ends, or set `batch=False` in `register()`. | +| Spans are missing | The instrumentor ran after the framework client was created. | Call `register()` and `instrument()` **before** constructing the client. | +| Auth or connection error | Wrong or missing keys. | Confirm `FI_API_KEY` and `FI_SECRET_KEY` are set for this workspace. | +| gRPC errors | gRPC transport without its dependency. | Install `fi-instrumentation-otel[grpc]`, or use `Transport.HTTP`. | + +--- + ## Key concepts - **`register()`**: Single setup call that configures the OTLP exporter, span processor, and project scope. Returns a `TracerProvider`. diff --git a/src/pages/docs/observe/features/quickstart.mdx b/src/pages/docs/observe/features/quickstart.mdx index f810737d..f054d71d 100644 --- a/src/pages/docs/observe/features/quickstart.mdx +++ b/src/pages/docs/observe/features/quickstart.mdx @@ -1,20 +1,45 @@ --- -title: "Set Up Observability with Future AGI Observe" -description: "Instrument your application and send traces to an Observe project so you can monitor LLM calls, latency, and cost in one place." +title: "Quickstart: Send Your First Trace to Observe" +description: "Install traceAI, register an Observe project, run one OpenAI request, and confirm the trace appears in FutureAGI with model, latency, and token cost — in about 5 minutes." +page_type: "quickstart" +products: ["traceAI"] +time_to_complete: "5 minutes" +primary_language: "python" +success_artifact: "trace" +audience: "engineer" +difficulty: "beginner" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-26" +schema_type: "HowTo" +api_keys_needed: ["FI_API_KEY", "FI_SECRET_KEY", "OPENAI_API_KEY"] +dashboard_verification_path: "Observe > your project > Tracing" +seo: + primary_query: "send first llm trace to futureagi" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/traces" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + feature: "/docs/observe/features/llm-tracing" --- ## About -This is how you connect your application to Future AGI so LLM calls are captured in the Observe dashboard. Register a project, instrument your app, and every request appears automatically with its inputs, outputs, cost, latency, and token usage. +Send your first traced request to FutureAGI in about five minutes. You'll install the traceAI instrumentor, register an Observe project, run one normal OpenAI chat completion, and confirm the request shows up in the [trace explorer](/docs/observe/features/llm-tracing) with its model, latency, and token cost. Everything after this — sessions, evals, users, alerts — builds on traces, so this is the page to start from. --- -## When to use +## Prerequisites -- **First-time setup**: Get traces flowing into the Observe dashboard so you can start monitoring production LLM calls. -- **Production monitoring**: See latency, cost, and token usage for every LLM call in one place instead of scraping logs. -- **Debugging**: Tie a user report or failure to a specific trace and span so you can reproduce and fix issues. -- **Baseline for other Observe features**: Sessions, evals, user tracking, and alerts all require traces to be set up first. +- **Python 3.9+** (or Node 18+ for the TypeScript path). +- A **FutureAGI account** and your **`FI_API_KEY`** + **`FI_SECRET_KEY`** from the [dashboard keys page](https://app.futureagi.com/dashboard/keys). +- An **`OPENAI_API_KEY`** (this example traces an OpenAI call). + + + Pin packages to the version you test against. The commands below were last verified on **2026-05-25**; add a version (e.g. `traceAI-openai==`) before shipping to CI so a future release can't silently change behavior. + --- @@ -22,7 +47,7 @@ This is how you connect your application to Future AGI so LLM calls are captured - Install the core instrumentation package and the framework instrumentor for your LLM provider. + Install the core instrumentation package and the instrumentor for your provider. ```bash Python @@ -34,93 +59,71 @@ This is how you connect your application to Future AGI so LLM calls are captured - - Set environment variables so the SDK can connect to Future AGI. Get your API keys from the [dashboard](https://app.futureagi.com/dashboard/keys). + + Read keys from the environment — never hardcode them in source. ```python Python import os - os.environ["FI_API_KEY"] = "YOUR_API_KEY" - os.environ["FI_SECRET_KEY"] = "YOUR_SECRET_KEY" + os.environ["FI_API_KEY"] = "YOUR_FI_API_KEY" + os.environ["FI_SECRET_KEY"] = "YOUR_FI_SECRET_KEY" ``` - ```typescript - process.env.FI_API_KEY = FI_API_KEY; - process.env.FI_SECRET_KEY = FI_SECRET_KEY; + ```typescript JS/TS + process.env.FI_API_KEY = "YOUR_FI_API_KEY"; + process.env.FI_SECRET_KEY = "YOUR_FI_SECRET_KEY"; ``` - - Call `register` with `project_type` set to Observe and a `project_name`. Optionally set `transport` (e.g. GRPC or HTTP). + + `register` returns a tracer provider. Set `project_type` to `OBSERVE` and give the project a name. - ```python + ```python Python from fi_instrumentation import register, Transport from fi_instrumentation.fi_types import ProjectType trace_provider = register( project_type=ProjectType.OBSERVE, - project_name="FUTURE_AGI", + project_name="my-first-project", transport=Transport.GRPC, ) ``` - ```typescript + ```typescript JS/TS import { register, ProjectType } from "@traceai/fi-core"; const traceProvider = register({ project_type: ProjectType.OBSERVE, - project_name: "FUTURE_AGI" + project_name: "my-first-project", }); ``` - - Use one of two options: - - - **Auto Instrumentor**: For supported frameworks (e.g. OpenAI). Use Future AGI's [Auto Instrumentation](/docs/tracing/auto); recommended for most apps. - - **Manual tracing**: For custom spans, use [OpenTelemetry](/docs/tracing/concepts/otel). [Learn more →](/docs/observe/features/manual-tracing/set-up-tracing) - - Example with the OpenAI instrumentor: install the package, instrument with your trace provider, then use the OpenAI client as usual. Traces appear in your [Observe dashboard](https://app.futureagi.com/dashboard/projects/observe). - - - ```python - pip install traceAI-openai - ``` - ```typescript - npm install @traceai/openai - ``` - + + Attach the OpenAI instrumentor to the provider, then call OpenAI as you normally would. - ```python + ```python Python from traceai_openai import OpenAIInstrumentor + from openai import OpenAI OpenAIInstrumentor().instrument(tracer_provider=trace_provider) - ``` - ```typescript - import { OpenAIInstrumentation } from "@traceai/openai"; - - const openaiInstrumentation = new OpenAIInstrumentation({}); - ``` - - - - ```python - from openai import OpenAI - os.environ["OPENAI_API_KEY"] = "your-openai-api-key" + os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY" client = OpenAI() - completion = client.chat.completions.create( model="gpt-4o", - messages=[{"role": "user", "content": "Write a one-sentence bedtime story about a unicorn."}] + messages=[{"role": "user", "content": "Write a one-sentence bedtime story about a unicorn."}], ) print(completion.choices[0].message.content) ``` - ```typescript + ```typescript JS/TS + import { OpenAIInstrumentation } from "@traceai/openai"; import { OpenAI } from "openai"; + new OpenAIInstrumentation({}); + const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); const completion = await client.chat.completions.create({ model: "gpt-4o", @@ -129,27 +132,52 @@ This is how you connect your application to Future AGI so LLM calls are captured console.log(completion.choices[0].message.content); ``` + + Expected terminal output (the model text varies): + + ```text + Under a sky of silver stars, a gentle unicorn dipped its horn into a moonlit + pool and wished every sleeping child sweet dreams. + ``` -For supported frameworks and more options, see the [Auto Instrumentation](/docs/tracing/auto) page. +--- + +## Confirm the trace + +Open **Observe → your project → Tracing**. Within a few seconds you should see one new trace row for the request, showing **Status OK**, the **gpt-4o** model, the **latency**, and the **token count**. Click it to read the prompt, the completion, and the span timing. + +Observe trace explorer with one new OpenAI trace showing OK status, model, latency, and token columns +*Your request, now a trace. If the row is here with an OK status, instrumentation is working end to end.* + +--- + +## Troubleshooting + +| Symptom | Fix | +|---|---| +| No trace appears | If this is a short script that exits immediately, the batch exporter may not have flushed — call `trace_provider.force_flush()` before the process ends. | +| Still nothing | Widen the date picker (it defaults to the past 7 days) and turn on **Auto refresh**. | +| Wrong or empty project | Confirm `project_name` matches the project you're viewing, and that `FI_API_KEY`/`FI_SECRET_KEY` are the keys for this workspace. | + +For custom spans and frameworks beyond OpenAI, see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing) and [Auto Instrumentation](/docs/tracing/auto). --- ## Next Steps - - Run evaluations on your traced spans to score quality. + + Read and debug the traces you started capturing. - - Group traces into sessions for multi-turn analysis. + + Score the quality of production responses. - - View activity and metrics per end user. + + Group traces into multi-turn conversations. - + Get notified when metrics cross a threshold. - diff --git a/src/pages/docs/observe/features/session.mdx b/src/pages/docs/observe/features/session.mdx index 7644ef9a..82cac85f 100644 --- a/src/pages/docs/observe/features/session.mdx +++ b/src/pages/docs/observe/features/session.mdx @@ -1,123 +1,124 @@ --- -title: "Group Traces by Session: Multi-turn Conversation Analysis" -description: "Group traces into sessions so you can view and analyze multi-turn conversations, chatbot flows, and per-session metrics in Observe." +title: "Sessions: Group Traces into Conversations" +description: "Group traces into sessions so you can read a full multi-turn conversation, see per-session duration, cost, and tokens, and debug the turn where something went wrong." +page_type: "feature-deep-dive" +products: ["traceAI"] +feature: "Sessions" +feature_status: "stable" +ui_surfaces: ["Observe > Sessions"] +audience: "engineer" +difficulty: "beginner" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-26" +schema_type: "TechArticle" +seo: + primary_query: "group llm traces into sessions" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/sessions-and-users" + how_to: "/docs/observe/features/manual-tracing/set-session-user-id" + feature: "/docs/observe/features/llm-tracing" --- ## About -Sessions group related traces together under a single identifier. A chatbot conversation, a multi-step user journey, or any sequence of LLM calls that belong to the same flow can be tracked as one session. The Observe dashboard shows sessions with their duration, cost, and token usage so you can review the full flow, drill into individual traces, and spot where things went wrong. +A session is one multi-turn conversation, reassembled from its traces. When a chatbot answers five messages, that's five separate [traces](/docs/tracing/concepts/traces) — a session ties them back together under one ID so you can read the whole exchange in order, see its total duration, cost, and tokens, and jump to the exact turn that failed. You create a session by setting the `session.id` attribute on your spans; Observe groups every trace that shares a value into one session. --- ## When to use -- **Chatbot and multi-turn flows**: Group all traces for a single conversation so you can review the full exchange and debug a specific turn. -- **User journey analysis**: Treat one user's sequence of requests as a session to understand behavior and find drop-off points. -- **Session-level metrics**: See total duration, cost, and tokens for an entire session instead of checking each trace individually. -- **Filtering and drill-down**: Filter sessions by time range, open a session to see its traces, then open a trace to see spans and eval results. +- **Chatbot and multi-turn flows** — read a full conversation instead of disconnected requests, and debug one turn in context. +- **User-journey analysis** — treat a sequence of requests as one flow to find where users drop off or escalate. +- **Session-level metrics** — see total duration, cost, and token usage for a whole conversation at once. +- **Triage** — filter sessions by time, open one to see its traces, then open a trace for span-level detail and [eval results](/docs/observe/features/evals). --- -## How to - - - - For a trace to appear in a session, the span must carry a **session identifier** via the `session.id` attribute. All traces with the same session name in the same project form one session. The backend creates the session automatically when the first trace with that identifier arrives. - - - - When creating a span manually, set the attribute so the trace is attached to the session: - - - ```python Python - from fi_instrumentation import register, FITracer - - trace_provider = register( - project_type=ProjectType.OBSERVE, - project_name="PROJECT_NAME", - ) - - tracer = FITracer(trace_provider.get_tracer(__name__)) - - with tracer.start_as_current_span( - f"SPAN_NAME", - ) as span: - span.set_status(Status(StatusCode.OK)) - span.set_attribute("session.id", "session123") - span.set_attribute("input.value", "input") - span.set_attribute("output.value", "output") - ``` - ```javascript JS/TS - const { register, ProjectType } = require("@traceai/fi-core"); - - const traceProvider = register({ - projectType: ProjectType.OBSERVE, - projectName: "FUTURE_AGI" - }); - - const tracer = traceProvider.getTracer("manual-instrumentation-example"); - - tracer.startActiveSpan("HandleFunctionCall", {}, (span) => { - span.setAttribute("session.id", "my-session-id"); - span.end(); - }); - ``` - - - - - To tag all spans in a block with the same session, use context so every span gets `session.id` automatically: - - - ```python Python - from fi_instrumentation import using_session - - with using_session(session_id="my-session-id"): - # All spans created within this block get session.id = "my-session-id" - ... - ``` - ```javascript JS/TS - import { context, propagation } from "@opentelemetry/api"; - - const sessionId = "my-js-session-id"; - - const activeContext = context.active(); - const baggageWithSession = propagation.createBaggage({ - "session.id": { value: sessionId } - }); - const newContext = propagation.setBaggage(activeContext, baggageWithSession); - - context.with(newContext, () => { - // All spans created within this block get session.id = "my-js-session-id" - }); - ``` - - - - - In the Observe UI, open the project and go to the Sessions view. You can filter by time range, see a list of sessions with duration and metrics, open a session to see its traces, and click **View Trace** for span-level detail and [eval](/docs/observe/features/evals) results. - - - - - For more on setting `session.id` with Trace AI helpers and context, see the [manual tracing guide](/docs/observe/features/manual-tracing/set-session-user-id). - +## When not to use + +- **Debugging a single request** — use the [trace explorer](/docs/observe/features/llm-tracing); a session is the wrong granularity. +- **Rolling up by person, not conversation** — use [Users](/docs/observe/features/users), which spans all of a user's sessions. +- **Aggregate trends across many sessions** — build a [Dashboard](/docs/observe/features/dashboard). + +--- + +## Set it up + +A trace joins a session when its spans carry the `session.id` attribute. Set it directly on a span, or set it once for a block with a context helper so every span inside inherits it. + + +```python Python +from fi_instrumentation import using_session + +# Every span created in this block gets session.id = "my-session-id" +with using_session(session_id="my-session-id"): + ... +``` +```javascript JS/TS +import { context, propagation } from "@opentelemetry/api"; + +const baggage = propagation.createBaggage({ "session.id": { value: "my-session-id" } }); +const ctx = propagation.setBaggage(context.active(), baggage); +context.with(ctx, () => { + // All spans created here get session.id = "my-session-id" +}); +``` + + +To set it on a single manual span instead, call `span.set_attribute("session.id", "...")`. Full patterns are in [Set session and user IDs](/docs/observe/features/manual-tracing/set-session-user-id). + +--- + +## What you see in Observe + +Open the project and switch to the **Sessions** tab. Each row is one conversation. + +Observe Sessions tab listing conversations with first and last message, duration, total cost, and trace count +*One row per conversation. Sort by Total Cost or Total Traces to find the longest or most expensive sessions.* + +| Column | What it shows | +|---|---| +| **Session Id** | The shared identifier for the conversation. | +| **First Message** | The opening message. | +| **Last Message** | The most recent message. | +| **Duration** | How long the conversation lasted. | +| **Total Cost** | Combined cost of all calls in the session. | +| **Total Traces** | How many requests were part of it. | + +Open a session to see its traces in order, then open any trace for the span tree, eval scores, and annotations. + +--- + +## Inputs and edge cases + +| Input | Detail | +|---|---| +| `session.id` (attribute) | Required for a trace to join a session. Any non-empty string; use a stable conversation ID. | + +- A session is **created automatically** when the first trace carrying its `session.id` arrives — there's nothing to pre-register. +- A trace **without** a `session.id` doesn't belong to a session; it still appears in the trace explorer. +- Sessions are scoped to a project; the same `session.id` in two projects are two different sessions. +- Don't put raw PII in the `session.id`. --- -## Next Steps +## Related features - - Connect the SDK and start capturing traces. + + How grouping by conversation and user works. - - Run evaluations on your traced spans to score quality. + + Roll traces and sessions up per end user. - - View activity and metrics per end user. + + Every way to attach a session ID. - - Get notified when metrics cross a threshold. + + Score the responses inside a session. diff --git a/src/pages/docs/observe/features/users.mdx b/src/pages/docs/observe/features/users.mdx index 5f689619..0813c296 100644 --- a/src/pages/docs/observe/features/users.mdx +++ b/src/pages/docs/observe/features/users.mdx @@ -1,118 +1,121 @@ --- -title: "User Dashboard: Per-User Trace and Session Analytics" -description: "View all traces, sessions, and metrics per end user in one place so you can debug, analyze behavior, and optimize at the user level." +title: "Users: Per-User Trace and Session Analytics" +description: "Group every trace and session by end user so you can debug one customer's full history, find who's driving cost, and spot quality drops per user." +page_type: "feature-deep-dive" +products: ["traceAI"] +feature: "Users" +feature_status: "stable" +ui_surfaces: ["Observe > Users"] +audience: "engineer" +difficulty: "beginner" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-26" +schema_type: "TechArticle" +seo: + primary_query: "per user llm analytics observability" +geo: + direct_answer: true +related: + concept: "/docs/tracing/concepts/sessions-and-users" + how_to: "/docs/observe/features/manual-tracing/set-session-user-id" + feature: "/docs/observe/features/session" --- ## About -The **user dashboard** in Observe groups all traces and sessions by end user. Each user row shows aggregated metrics like cost, tokens, latency, error count, eval pass rate, and guardrail triggers. You identify users by setting a `user.id` attribute on your spans. Once the backend sees that attribute, it creates a user entry and links all matching spans to it. Open any user to see their full activity: traces, sessions, and metrics in one view. +The Users view groups every [trace](/docs/tracing/concepts/traces) and [session](/docs/observe/features/session) by end user. Each row shows the user's activity at a glance — trace count, session count, and when they were first and last active — and you open a user for their full history: every session and trace, with per-user cost, evals, and guardrail results. So you can answer "what happened to this customer?" without writing a query. You identify a user by setting the `user.id` attribute on your spans; Observe creates the user on first sight and links every matching span to it. --- ## When to use -- **A user reports a bug**: Open their row in the dashboard, see every trace and session they triggered, and pinpoint which request failed and why. -- **Costs spike unexpectedly**: Sort users by cost or token usage to find who is driving the increase and whether it is normal usage or a runaway loop. -- **You need to measure engagement**: Check activation date, last active, active days, and session counts per user to see who is adopting the product and who dropped off. -- **Eval scores drop for a segment**: Filter users by eval pass rate to find accounts with low quality scores, then drill into their traces to understand the pattern. -- **Support asks "what happened to this user?"**: Search by user ID, open their detail view, and walk through their traces and sessions without writing a single query. +- **A user reports a bug** — open their row, see every trace and session they triggered, find the failed request. +- **Cost spikes** — open a heavy user to see cost and token usage per session and spot a runaway loop. +- **Engagement** — read activation date, last-active, and session counts to see who adopted and who dropped off. +- **Quality regressions in a segment** — open users and check their eval results to find low-quality accounts, then drill into their traces. --- -## How to +## When not to use - - - For a span to count under a user in the dashboard, it must carry a **user identifier**. In the OTLP path this comes from the span attribute **`user.id`**. When a span is ingested with this attribute (for an Observe project), the backend gets or creates an `EndUser` for that project and organization with that `user_id` (and optional `user_id_type`) and links the observation span to that end user. All spans with the same `user.id` in the same project contribute to that user's metrics and appear in their detail view. - +- **One conversation, not a person** — use [Sessions](/docs/observe/features/session). +- **A single request** — use the [trace explorer](/docs/observe/features/llm-tracing). +- **Authentication or identity** — `user.id` is a grouping key, not an auth identity; Observe never verifies it. - - Set **`user.id`** (required). You can also set **`user.id.type`** (email, phone, uuid, custom), **`user.id.hash`**, and **`user.metadata`** (JSON) for display or filtering. +--- + +## Set it up - - ```python Python - from fi_instrumentation import register, FITracer - from fi_instrumentation.fi_types import ProjectType - from opentelemetry.trace import Status, StatusCode +A span counts under a user when it carries `user.id`. Set it for a block with a context helper, or directly on a span. You can also set `user.id.type` (`email` | `phone` | `uuid` | `custom`), `user.id.hash`, and `user.metadata`. - trace_provider = register( - project_type=ProjectType.OBSERVE, - project_name="PROJECT_NAME", + +```python Python +from fi_instrumentation import using_attributes + +with using_attributes(user_id="newuser@example.com", session_id="new-session"): + response = client.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": "Write a haiku."}], ) - tracer = FITracer(trace_provider.get_tracer(__name__)) - - with tracer.start_as_current_span("SPAN_NAME") as span: - span.set_status(Status(StatusCode.OK)) - span.set_attribute("user.id", "vivek.gupta") - span.set_attribute("user.id.type", "email") # email | phone | uuid | custom - span.set_attribute("user.id.hash", "") # optional - span.set_attribute("user.metadata", {}) # optional - span.set_attribute("input.value", "input") - span.set_attribute("output.value", "output") - ``` - ```javascript JS/TS - const { register, ProjectType } = require("@traceai/fi-core"); - - const traceProvider = register({ - projectType: ProjectType.OBSERVE, - projectName: "FUTURE_AGI" - }); - const tracer = traceProvider.getTracer("manual-instrumentation-example"); - - tracer.startActiveSpan("SPAN_NAME", {}, (span) => { - span.setAttribute("user.id", "vivek.gupta"); - span.setAttribute("user.id.type", "email"); - span.end(); - }); - ``` - - - - - To tag all spans in a block with the same user, use a context that sets `user.id` (and optional type/metadata) so every span in that block is linked to that end user. With the Python SDK you can use **`using_attributes`** and pass `user_id` (and optionally `session_id`). - - - ```python Python - from fi_instrumentation import using_attributes - - with using_attributes(user_id="newuser@example.com", session_id="new-session"): - response = client.chat.completions.create( - model="gpt-3.5-turbo", - messages=[{"role": "user", "content": "Write a haiku."}], - max_tokens=20, - ) - ``` - - - - - - Open the project and go to the **Users** (user dashboard) view. - - Table columns: user_id, activation date, last active, trace count, error count, session count, avg latency, LLM calls, eval pass rate, guardrail triggers, tokens, cost. - - Search by user ID; apply filters as needed. - - Click a user for detail: **Summary** metrics, **Traces** tab (trace ID, session, latency, input/output, evals, cost, annotations), **Sessions** tab (session ID, time range, trace count, evals, cost). - ![Dashboard](/images/docs/observe/5.png) - - - - - End users are unique per project and organization by `(user_id, user_id_type)`. Sending the same `user.id` (and type) on spans in the same Observe project ties those spans to one end user in the dashboard. - +``` +```javascript JS/TS +tracer.startActiveSpan("handle_request", {}, (span) => { + span.setAttribute("user.id", "newuser@example.com"); + span.setAttribute("user.id.type", "email"); + span.end(); +}); +``` + + + + `user.id` is exported in span data. Use a stable but non-sensitive identifier — a hashed customer ID, not a raw email or phone number — or set `user.id.hash`. See [Mask span attributes](/docs/observe/features/manual-tracing/mask-span-attributes). + + +--- + +## What you see in Observe + +Open the project and go to the **Users** view. Each row is one end user. + +Observe Users view listing end users with User ID, First Active, Last Active, number of traces, and number of sessions columns +*One row per user, with trace and session counts rolled up. Sort by trace or session count to find your most active users, then open one for the full breakdown.* + +The list has six columns: **User ID**, **First Active**, **Last Active**, **No. of Traces**, **No. of Sessions**, and **Actions**. Click a user for their detail view, where cost, evals, and guardrail results are broken down per session and trace. + +User detail view with summary metrics, a Traces tab, and a Sessions tab for one end user +*User detail: a Trace tab and a Sessions tab. The Sessions tab lists each session with its first and last message, duration, total traces, and total cost.* + +--- + +## Inputs and edge cases + +| Input | Detail | +|---|---| +| `user.id` (attribute) | Required to attribute a span to a user. Any non-empty string. | +| `user.id.type` | Optional: `email`, `phone`, `uuid`, or `custom`. | +| `user.id.hash`, `user.metadata` | Optional, for display and filtering. | + +- Users are **unique per project + organization** by `(user_id, user_id_type)` — the same `user.id` with a different type is a different user. +- A user is **created on first sight** of a span carrying `user.id`; nothing to pre-register. +- Spans without `user.id` are unattributed; they still appear in the trace explorer. --- -## Next Steps +## Related features - - Connect the SDK and start capturing traces. + + How grouping by user and conversation works. - - Run evaluations on your traced spans to score quality. + + Group a user's traces into conversations. - - Group traces into sessions for multi-turn analysis. + + Every way to attach a user ID. - - Get notified when metrics cross a threshold. + + Keep PII out of exported spans. diff --git a/src/pages/docs/observe/features/voice.mdx b/src/pages/docs/observe/features/voice.mdx index e526ce5c..20b3e5d1 100644 --- a/src/pages/docs/observe/features/voice.mdx +++ b/src/pages/docs/observe/features/voice.mdx @@ -1,106 +1,122 @@ --- -title: "Voice Observability: Call Logs as Traces in Observe" -description: "Connect a voice provider like Vapi or Retell and get call logs as traces in Observe without any SDK instrumentation or code changes." +title: "Voice Observability: Call Logs as Traces" +description: "Connect a voice provider like Vapi or Retell and get every call as a trace in Observe — transcript, recording, cost, and duration — with no SDK or code changes." +page_type: "feature-deep-dive" +products: ["traceAI"] +feature: "Voice observability" +feature_status: "stable" +ui_surfaces: ["Observe > Agent definitions", "Observe > Projects"] +audience: "engineer" +difficulty: "beginner" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +last_screenshotted: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "voice agent observability vapi retell" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/llm-tracing" + how_to: "/docs/observe/features/evals" + concept: "/docs/tracing/concepts/traces" --- - ## About -Voice agents are hard to debug. Conversations happen in real time, across multiple turns, and when something goes wrong you usually find out from a user complaint, not a log. **Voice observability** fixes this by pulling call logs from your voice provider into Observe automatically. No SDK or code changes needed. Connect a provider (Vapi, or Retell) using its API key and assistant ID, and every call shows up as a trace with its transcript, recording URLs, cost, and duration. From there you can run [evaluations](/docs/observe/features/evals), set [alerts](/docs/observe/features/alerts), search, filter, and export, the same way you would with any other trace. +Voice agents are hard to debug: conversations happen in real time, across turns, and you usually hear about failures from a user, not a log. Voice observability pulls call logs from your voice provider into Observe automatically — **no SDK or code changes**. Connect a provider (Vapi, Retell, or Eleven Labs) with its API key and assistant ID, and every call shows up as a [trace](/docs/tracing/concepts/traces) with its transcript, recording URLs, cost, and duration. From there you can run [evals](/docs/observe/features/evals), set [alerts](/docs/observe/features/alerts), and filter and export, exactly like any other trace. + --- ## When to use -- **Visibility into voice agent calls**: See all conversations for a voice agent in one project without adding SDK instrumentation. -- **Evaluate voice conversations**: Run evals (quality, bias, adherence) on conversation spans from voice calls. -- **Alerts on voice metrics**: Set monitors on voice project metrics and get notified when something degrades. -- **Transcripts and recordings for debugging**: Access transcript and recording URLs from the trace view. -- **Multiple voice providers**: Support for Vapi, Retell so you can monitor agents regardless of provider. +- **See every voice call** for an agent in one project, without instrumenting code. +- **Evaluate conversations** — run quality, bias, or adherence evals on voice call spans. +- **Alert on voice metrics** — get notified when a voice project degrades. +- **Debug with transcripts and recordings** — open a call to read the transcript and play the recording. --- -## How to +## When not to use + +- **A text/SDK app** — that uses normal instrumentation; start at the [quickstart](/docs/observe/features/quickstart). +- **An unsupported provider** — only Vapi, Retell, and Eleven Labs are supported today (see below). + +--- + +## Connect a voice provider - From your voice provider's dashboard, obtain: - - **API key** - - **Assistant ID** (or agent ID) - - These are required when observability is enabled. Supported providers: [Vapi](https://dashboard.vapi.ai), [Retell](https://www.retellai.com/). + From your provider's dashboard, get the **API key** and **Assistant ID** (a.k.a. agent ID). Supported providers: [Vapi](https://dashboard.vapi.ai), [Retell](https://www.retellai.com/), [Eleven Labs](https://elevenlabs.io). The API key and Assistant ID are both required when observability is enabled. - Go to the **Agent definition** section and click **Create agent definition**. - ![Agent definition list](/screenshot/product/observe/voice/agent_definition_list.png) + Open **Agent definitions** and create one. Fill in the agent name and provider; the API key and Assistant ID are masked. Check **Enable Observability**, then **Create** — you're returned to the list with the new agent. - Fill in agent name, provider, and other required fields. The API key and Assistant ID are masked for security. - ![Create agent definition form](/screenshot/product/observe/voice/agent_definition_filled.png) + Create agent definition form with provider, masked API key, Assistant ID, and Enable Observability toggle + *The API key and Assistant ID are only required when Observability is on.* - Check **Enable Observability**. The API key and Assistant ID are required only if observability is enabled. - ![Agent definition details](/screenshot/product/observe/voice/agent_definition_details.jpeg) - - Click **Create**. You are redirected to the agent list where the new agent is now visible. - ![Agent definition list with new agent](/screenshot/product/observe/voice/agent_definition_list_with_new.jpeg) + Agent definitions list showing the newly created voice agent + *Your agent definitions. Each one maps to a project that collects its calls.* - Open the **Projects** tab. A project with the same name as your agent lists all call logs. - ![Projects list](/screenshot/product/observe/voice/project_list.png) + Open **Projects** — a project named after your agent collects its calls. Open it to see the voice table (calls with status, duration, cost), then click a call for the detail drawer. - Open the project to see the voice observability table (calls with status, duration, cost). - ![Voice observability table](/screenshot/product/observe/voice/voice_observability_table.png) + Voice project table listing calls with status, duration, and cost + *Each row is a call, captured as a trace.* - Click a call to open the detail drawer (transcript, recording URLs, call data). - ![Call log detail drawer](/screenshot/product/observe/voice/call_log_detail_drawer_marked.jpeg) + Call detail drawer with transcript, recording URLs, and call metadata + *Inside a call: transcript, recording URLs, and call data — the context a user complaint never gives you.* - Click an agent definition to open the edit form. You can edit any field. - - - If you **disable** observability, the API key and Assistant ID become optional. - - If you **enable** observability (or keep it on), API key and Assistant ID are required. - - - - ![Agent with observability disabled](/screenshot/product/observe/voice/agent_update_observability_disabled.png) - - - ![Agent with observability enabled](/screenshot/product/observe/voice/agent_update_observability_enabled.png) - - + Open an agent definition to edit it. Disabling observability makes the API key and Assistant ID optional; enabling it makes them required again. +--- + +## Edge cases and limits + +- Voice projects use **call-shaped columns** (Call Details, Status, Duration, Avg Latency, Turn Count, Tokens, Cost) rather than the text trace columns. +- The **API key and Assistant ID are mandatory** whenever observability is enabled; the agent definition won't capture calls without them. +- Transcripts and recordings can contain sensitive customer audio — treat the recording URLs as sensitive data. + +--- + ## Supported providers - [Vapi](https://dashboard.vapi.ai) - [Retell](https://www.retellai.com/) +- [Eleven Labs](https://elevenlabs.io) --- -## Next Steps +## Related features - - Connect the SDK and start capturing traces. + + Inspect voice calls alongside your other traces. - - Run evaluations on your traced spans to score quality. + + Score voice conversations for quality and safety. - - Get notified when metrics cross a threshold. + + Get notified when a voice project degrades. - - View activity and metrics per end user. + + Instrument a text/SDK app instead. diff --git a/src/pages/docs/observe/index.mdx b/src/pages/docs/observe/index.mdx index 478e3c5f..b38c76f3 100644 --- a/src/pages/docs/observe/index.mdx +++ b/src/pages/docs/observe/index.mdx @@ -1,45 +1,91 @@ --- -title: "Future AGI Observe: Monitor LLM Apps in Production" -description: "Monitor and evaluate LLM applications in production with real-time tracing, session analysis, cost tracking, and alerting." +title: "Observe: Monitor LLM Apps in Production" +description: "Observe is FutureAGI's production monitoring for LLM apps — replay any request, track quality, cost, and latency, score responses with evals, and get alerted the moment something degrades." +page_type: "product-overview" +products: ["traceAI"] +audience: "engineer" +status: "review" +owner: "observability" +last_screenshotted: "2026-05-25" +schema_type: "SoftwareApplication" +seo: + primary_query: "llm production observability monitoring" +geo: + direct_answer: true +related: + quickstart: "/docs/observe/features/quickstart" + concept: "/docs/tracing/concepts/traces" + feature: "/docs/observe/features/llm-tracing" --- ## About -Observability is how you monitor your AI application after it goes live. Once your app is in production, things change: user inputs vary, model behavior shifts, and issues come up that testing never caught. Observability gives you a continuous view of how your application is performing so you can stay on top of it. +Observe is where you monitor your AI application in production. When an answer is wrong, a request is slow, or cost spikes, it shows you exactly what your app did on that request — so you find the cause instead of guessing, and you catch problems before your users report them. You can replay any request end to end, watch quality, cost, and latency trends, group activity by conversation or by end user, score responses automatically, and get alerted the moment something degrades. -It tracks every response your application generates, groups them by session and user, scores them for quality, and alerts you when something goes wrong. Instead of finding out about problems from users, you see them in the dashboard first. +Under the hood, Observe records each request as a [trace](/docs/tracing/concepts/traces) — but day to day you work with the views built on top: the trace explorer, sessions, users, evals, dashboards, and alerts. If you're starting fresh, send your [first trace](/docs/observe/features/quickstart); if traces are already flowing, open the [trace explorer](/docs/observe/features/llm-tracing). -Sessions Overview +Observe dashboard listing production traces with status, latency, and token columns +*Every production request, captured as a trace and ready to inspect, group, score, or alert on.* --- -## How Observability Connects to Other Features +## What you work with -- **Prototype**: After you promote a winning version in Prototype, its traces continue flowing into Observe so you can monitor production performance against the same quality criteria. [Learn more](/docs/prototype) -- **Evaluation**: Observability uses the same built-in eval templates to score production traces automatically. Any eval you configured in Prototype or Datasets runs the same way here. [Learn more](/docs/evaluation) -- **Alerts**: Observability feeds into the alerting system so you are notified when quality, cost, or latency crosses a threshold in production. [Learn more](/docs/observe/features/alerts) +Everything in Observe is built on the [trace](/docs/tracing/concepts/traces), with the rest layered on top. Day to day, these are the things you'll use: + +| Object | What it is | Where you use it | +|---|---|---| +| **Trace** | The full record of one request, made of spans. | [Trace explorer](/docs/observe/features/llm-tracing) | +| **Span** | One step inside a trace (model, tool, retrieval, eval). | [Trace explorer](/docs/observe/features/llm-tracing) | +| **Session** | Traces from one multi-turn conversation. | [Sessions](/docs/observe/features/session) | +| **User** | All traces and sessions from one end user. | [Users](/docs/observe/features/users) | +| **Eval score** | A quality score attached to a trace or span. | [Evals](/docs/observe/features/evals) | +| **Dashboard** | Custom charts over your trace metrics. | [Dashboards](/docs/observe/features/dashboard) | +| **Alert** | A notification when a metric crosses a threshold. | [Alerts](/docs/observe/features/alerts) | --- -## Getting Started with Observability +## Start here - - Connect the SDK and start capturing traces in minutes. + + Instrument your app and see a request in Observe in ~5 minutes. + + + The mental model Observe is built on. - - Run evaluations on observed traces and sessions. + + Inspect, filter, and debug production requests. + + Score production responses for quality. + + + +--- + +## How Observe connects to the rest of FutureAGI + +- **Tracing** — Observe reads the spans your app emits through traceAI. Start at [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). +- **Evaluation** — the same eval templates you use elsewhere run against production traces here; see [Run evals on traces](/docs/observe/features/evals). +- **Alerts** — Observe metrics feed the [alerting system](/docs/observe/features/alerts) so threshold breaches reach you by email or Slack. +- **Voice** — connect a voice provider and calls arrive as traces; see [Voice observability](/docs/observe/features/voice). + +--- + +## Next Steps + + - Group and analyze multi-turn interactions. + Group and analyze multi-turn conversations. - - Track and analyze activity by user. + + Track activity, cost, and quality per end user. - - Configure alerts for real-time issue detection. + + Build custom charts over your trace metrics. - - Monitor voice agent interactions and call quality. + + Monitor voice-agent calls as traces. diff --git a/src/pages/docs/observe/reference/dashboard-metric-definitions.mdx b/src/pages/docs/observe/reference/dashboard-metric-definitions.mdx new file mode 100644 index 00000000..a8db9d79 --- /dev/null +++ b/src/pages/docs/observe/reference/dashboard-metric-definitions.mdx @@ -0,0 +1,66 @@ +--- +title: "Dashboard Metric Definitions" +description: "Reference for the metrics, aggregations, and granularities available in Observe dashboard widgets — what each metric measures and how it's rolled up." +page_type: "reference" +products: ["traceAI"] +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "dashboard metric definitions observability" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/dashboard" + how_to: "/docs/observe/features/alerts" +--- + +## About + +Each [dashboard](/docs/observe/features/dashboard) widget plots one metric, rolled up by an aggregation over a time granularity. This page defines the metrics and the aggregation and granularity options. + +## Metrics + +| Metric | What it measures | +|---|---| +| Span count | Number of spans matching the widget's filters. | +| Error count | Number of spans/traces with `ERROR` status. | +| Span response time | Latency of spans. | +| LLM response time | Latency of LLM spans specifically. | +| Token usage | Tokens consumed (prompt + completion). | +| Cost | Computed cost of model calls. | +| Eval pass-rate | Share of evaluated spans that passed their eval. | + +The widget editor lists the metrics available for your project. + +## Aggregations + +| Aggregation | Result | +|---|---| +| Sum | Total across the bucket. | +| Average | Mean across the bucket. | +| Median | 50th percentile. | +| Count | Number of matching records. | +| Distinct count | Number of unique values. | +| Min / Max | Smallest / largest value in the bucket. | + +## Granularity + +Buckets the time axis: **minute, hour, day, week, month**. Available options adjust to the selected time range (a 12-month range won't offer minute granularity). + +## Reading a metric correctly + +A widget's number is always *metric × aggregation × granularity × filters*. "Average LLM response time per hour" and "max LLM response time per day" come from the same spans but answer different questions. If a number looks wrong, see [Dashboard numbers look wrong](/docs/observe/troubleshooting/dashboard-numbers-look-wrong). + +## Related + + + + Build widgets from these metrics. + + + Alert when one of these metrics crosses a threshold. + + diff --git a/src/pages/docs/observe/reference/export-formats.mdx b/src/pages/docs/observe/reference/export-formats.mdx new file mode 100644 index 00000000..797b2eae --- /dev/null +++ b/src/pages/docs/observe/reference/export-formats.mdx @@ -0,0 +1,55 @@ +--- +title: "Trace Export and Endpoints" +description: "Reference for getting trace data out of Observe — exporting the current view, the available formats, and the OTLP endpoints traces are sent to (cloud and self-hosted)." +page_type: "reference" +products: ["traceAI"] +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "export traces endpoints futureagi" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/llm-tracing" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" +--- + +## About + +There are two directions for trace data: **out of** Observe (exporting what you're viewing) and **into** Observe (the OTLP endpoints traceAI sends to). This page covers both. + +## Export from the trace explorer + +The download icon in the [trace explorer](/docs/observe/features/llm-tracing) header exports the **current view** — the traces that match your active filters and time range. + +| Format | Use for | +|---|---| +| CSV | Spreadsheet analysis, sharing, importing elsewhere. | + +## Ingestion endpoints + +traceAI exports spans over OTLP to FutureAGI. The transport and target are environment-driven: + +| Variable | Transport | Default | +|---|---|---| +| `FI_BASE_URL` | HTTP collector | FutureAGI cloud | +| `FI_GRPC_URL` | gRPC collector | FutureAGI cloud | + +- **Cloud:** leave the defaults; set `FI_API_KEY` and `FI_SECRET_KEY`. +- **Self-hosted:** point `FI_BASE_URL` / `FI_GRPC_URL` at your own collector host so spans stay in your network. + +Choose the transport with `transport=Transport.HTTP` (default) or `Transport.GRPC` in `register()`. See [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). + +## Related + + + + Filter, then export the current view. + + + Configure the OTLP endpoint and transport. + + diff --git a/src/pages/docs/observe/reference/trace-filter-syntax.mdx b/src/pages/docs/observe/reference/trace-filter-syntax.mdx new file mode 100644 index 00000000..b1252cdc --- /dev/null +++ b/src/pages/docs/observe/reference/trace-filter-syntax.mdx @@ -0,0 +1,70 @@ +--- +title: "Trace Filter Syntax" +description: "Reference for filtering traces in Observe — the three filter modes, the properties you can filter on, and the operators available in Basic mode." +page_type: "reference" +products: ["traceAI"] +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "trace filter syntax observability" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/llm-tracing" + concept: "/docs/tracing/concepts/spans" +--- + +## About + +The **Filter** panel in the [trace explorer](/docs/observe/features/llm-tracing) narrows which traces are shown. It offers three modes — plain-language AI search, a Basic property/condition/value builder, and a Query expression for power users. This page lists the modes, the properties you can filter on, and the Basic operators. + +## Filter modes + +| Mode | Use it for | +|---|---| +| **AI search** | Describe what you want in plain English (e.g. *"errors on gpt-4o today"*) and the filter is built for you. | +| **Basic** | Pick a property, a condition, and a value. Add several; they apply together (AND). | +| **Query** | Write a filter expression directly, for complex conditions. | + +## Filterable properties + +| Property | Description | +|---|---| +| `Trace ID` | The unique trace identifier. | +| `Trace Name` | The top-level task name (e.g. `support_agent.run`). | +| `Span Name` | The name of a span within the trace. | +| `Status` | `OK` or `ERROR`. | +| `Model` | The model on an LLM span (e.g. `gpt-4o`). | +| `Node Type` | The span/observation type (`llm`, `chain`, `tool`, …). | +| `Span Kind` | The `fi.span.kind` value. | +| `User ID` | The `user.id` attribute. | +| `Provider` | The model provider. | +| `Service / Trace Name` | The service or trace name. | +| Eval scores | Filter by an eval score attached to the span. | +| Annotation values | Filter by a human annotation value. | + +## Basic operators + +Operators available depend on the property's type. Common ones: + +| Operator | Applies to | +|---|---| +| `is` / `is not` | Exact match (status, model, enums). | +| `contains` | Substring match (names, inputs). | +| `greater than` / `less than` | Numeric values (latency, tokens, eval score). | + +The Basic builder shows the available operators for each property. + +## Related + + + + Where filters are applied and saved as views. + + + The fields these properties map to. + + diff --git a/src/pages/docs/observe/troubleshooting/alerts-did-not-fire.mdx b/src/pages/docs/observe/troubleshooting/alerts-did-not-fire.mdx new file mode 100644 index 00000000..f436b4a4 --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/alerts-did-not-fire.mdx @@ -0,0 +1,67 @@ +--- +title: "An alert did not fire" +description: "A metric crossed your threshold but no alert arrived. Usual causes: the monitor's frequency hasn't elapsed, it's muted, the threshold direction is wrong, or notifications are misconfigured." +page_type: "troubleshooting" +products: ["traceAI"] +failure_surface: "dashboard" +symptom: "alert did not fire" +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + primary_query: "alert did not fire monitor" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/alerts" + how_to: "/docs/observe/features/dashboard" +--- + +## About + +A metric crossed what you thought was the threshold, but no email or Slack arrived. The usual causes are timing (the monitor only evaluates on its schedule), the monitor being muted, the threshold direction or value being set differently than you remember, or the notification channel itself failing. Check the schedule and mute state first — those explain most "missing" alerts. + +## Symptoms + +- A metric clearly breached the limit but no notification came. +- Alerts used to arrive and stopped. +- The alert log shows nothing for the period you expected. + +## Run the smoke test + +Open the monitor and read its **frequency, mute state, operator, threshold value, and notification channels**, then check the alert log for recent entries. + +## Fix the common causes + +1. **Frequency hasn't elapsed.** A monitor evaluates on `alert_frequency` (minimum 5, default 60 minutes). A brief breach between runs may not be caught — lower the frequency if you need faster detection. +2. **Monitor is muted.** `is_mute` stops notifications while the monitor keeps evaluating; unmute it. +3. **Threshold direction or value.** `threshold_operator` (`Greater than` / `Less than`) and the critical value must match the breach you expect — a "less than" monitor won't fire on a spike. +4. **Percentage-change baseline.** A percentage-change monitor needs enough history in its `auto_threshold_time_window` to compute a baseline; a new project may not alert yet. +5. **Notification channel.** Verify `notification_emails` (up to 5) and/or `slack_webhook_url`; a bad webhook silently drops the message. + +## Verify the fix + +Set a deliberately easy threshold, wait one `alert_frequency` cycle, and confirm an alert log entry plus the email/Slack message arrive. Then restore the real threshold. + +## Prevent this in your app + +- Match `alert_frequency` to how fast you need to know — don't leave it at 60 if minutes matter. +- Test each notification channel once when you create the monitor. + +## Still not working + +Send the monitor name, its config, and the breach timestamp to support@futureagi.com. + +## Related + + + + How monitors and thresholds are configured. + + + Confirm the metric trend the alert watches. + + diff --git a/src/pages/docs/observe/troubleshooting/dashboard-numbers-look-wrong.mdx b/src/pages/docs/observe/troubleshooting/dashboard-numbers-look-wrong.mdx new file mode 100644 index 00000000..3efdb447 --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/dashboard-numbers-look-wrong.mdx @@ -0,0 +1,67 @@ +--- +title: "Dashboard numbers look wrong" +description: "A dashboard widget shows a number you didn't expect. Almost always it's the time range, granularity, aggregation, filters, or sampling — not bad data." +page_type: "troubleshooting" +products: ["traceAI"] +failure_surface: "dashboard" +symptom: "dashboard numbers look wrong" +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + primary_query: "dashboard numbers wrong observability" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/dashboard" + how_to: "/docs/observe/features/llm-tracing" +--- + +## About + +A widget shows a number that doesn't match what you expected — cost too low, latency too high, a count that seems off. Almost always the data is right and the *query* is reading it differently than you assumed: the time range, granularity, aggregation, or filters change what a widget reports. Check those four before suspecting the underlying traces. + +## Symptoms + +- A metric looks far higher or lower than reality. +- Two widgets that "should" match don't. +- A number changed when you only changed the time range or granularity. + +## Run the smoke test + +Open the widget editor and read its **time range, granularity, aggregation, group-by, and filters**. Then cross-check one value against the [trace explorer](/docs/observe/features/llm-tracing) for the same window. + +## Fix the common causes + +1. **Time range / granularity.** A chart reflects the selected window and bucket size. *Average latency per hour* and *per day* give different numbers from the same traces. +2. **Aggregation mismatch.** Sum vs. average vs. median answer different questions — confirm the widget uses the one you mean. +3. **Filters narrowing the data.** A widget filter (model, status, attribute) silently excludes traces; clear it to compare against the full set. +4. **Eval sampling.** If a metric is built on evals run at a sampling rate, it covers a *subset* of spans, not all of them. +5. **Timezone.** Day boundaries follow the dashboard timezone — an apparent gap may be a boundary effect. + +## Verify the fix + +Set the widget's time range and granularity to match your expectation, clear extra filters, and confirm the value lines up with a trace-explorer count for the same window. + +## Prevent this in your app + +- Label widgets with their aggregation and window so readers don't misread them. +- Keep one "all traffic, no filters" reference widget to sanity-check the others. + +## Still not working + +If a value still can't be reconciled with the trace list for the same window, send the dashboard, widget config, and window to support@futureagi.com. + +## Related + + + + How widgets are configured. + + + Cross-check a number against the raw traces. + + diff --git a/src/pages/docs/observe/troubleshooting/missing-attributes.mdx b/src/pages/docs/observe/troubleshooting/missing-attributes.mdx new file mode 100644 index 00000000..978ffc4e --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/missing-attributes.mdx @@ -0,0 +1,67 @@ +--- +title: "Spans or attributes are missing from a trace" +description: "The trace appears but spans are missing or fields like input/output are blank. Usual causes: masking is on, the instrumentor isn't attached, or a custom key isn't indexed." +page_type: "troubleshooting" +products: ["traceAI"] +failure_surface: "sdk" +symptom: "spans or attributes missing from trace" +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + primary_query: "missing spans attributes trace" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/llm-tracing" + how_to: "/docs/observe/features/manual-tracing/add-attributes-metadata-tags" +--- + +## About + +The trace shows up, but it's incomplete — a nested span is missing, or fields like input and output are blank, or a custom attribute you set isn't there. The usual causes are redaction being switched on (in which case blank is *expected*), the framework's instrumentor not being attached, or an attribute set on the wrong span or after it closed. Check redaction first, because a hidden field is working as designed, not a bug. + +## Symptoms + +- A span's **input/output show as hidden or blank**. +- A framework's child spans (e.g. nested LangGraph nodes) don't appear. +- A custom attribute you set isn't on the span, or you can't filter by it. + +## Run the smoke test + +Open the trace, click the span, and check the **attributes** list. Compare what's missing against the causes below. + +## Fix the common causes + +1. **Redaction is on (check first).** If `FI_HIDE_INPUTS`/`FI_HIDE_OUTPUTS` or `TraceConfig` masking is set, those fields are intentionally hidden — that's expected. See [Mask span attributes](/docs/observe/features/manual-tracing/mask-span-attributes). +2. **Instrumentor not attached for that framework.** Each framework needs its own instrumentor attached to the provider; install and `instrument()` the one for the missing spans. +3. **Custom attribute set on the wrong span / after close.** Set attributes while the span is active; a value set after the `with` block closes is dropped. +4. **Custom key isn't indexed for filtering.** The UI filters on standard keys — use a [semantic-convention](/docs/observe/features/manual-tracing/semantic-conventions) key where one exists. +5. **Unsupported value type.** Attribute values must be string, bool, int, float, or an array of those. + +## Verify the fix + +Re-run one request and confirm the previously-missing span or attribute now shows in the span detail (and that you can filter by it, for standard keys). + +## Prevent this in your app + +- Decide masking deliberately and document it, so blank fields aren't mistaken for bugs. +- Prefer semantic-convention keys for anything you'll filter or evaluate on. + +## Still not working + +Send the trace ID, the framework + instrumentor versions, and the attribute you expected, to support@futureagi.com. + +## Related + + + + How attributes get onto spans. + + + Why a field might be intentionally hidden. + + diff --git a/src/pages/docs/observe/troubleshooting/no-traces-appearing.mdx b/src/pages/docs/observe/troubleshooting/no-traces-appearing.mdx new file mode 100644 index 00000000..6b1b3cb9 --- /dev/null +++ b/src/pages/docs/observe/troubleshooting/no-traces-appearing.mdx @@ -0,0 +1,66 @@ +--- +title: "No traces appear in Observe" +description: "Your app ran but no trace shows up in Observe. The usual causes are an unflushed short-lived script, instrumenting after the client is created, wrong keys, or a narrow time window." +page_type: "troubleshooting" +products: ["traceAI"] +failure_surface: "sdk" +symptom: "no traces appear in observe" +audience: "engineer" +status: "review" +owner: "observability" +last_tested: "2026-05-25" +support_escalation: "support@futureagi.com" +schema_type: "TechArticle" +seo: + primary_query: "no traces appearing futureagi" +geo: + direct_answer: true +related: + feature: "/docs/observe/features/llm-tracing" + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" +--- + +## About + +You instrumented your app and ran it, but no trace shows up in the [trace explorer](/docs/observe/features/llm-tracing). The most common cause is a short-lived script that exited before its spans flushed; the next most common are instrumenting *after* the client was created, the wrong keys or project, or a time window that hides the trace. Work the checks below in order — the first two fix the large majority of cases — and you'll have a trace within a few seconds of a test request. + +## Symptoms + +- A request ran with no error, but no new row appears in the trace list. +- A short script (a one-off `python app.py`) never produces a trace. +- Traces appeared before but stopped after a code change. + +## Run the smoke test + +Send one request, then open **Observe → your project → Tracing** with **Auto refresh** on and the date range widened to **Today**. If the trace still isn't there, go through the causes below. + +## Fix the common causes + +1. **Short script exited before flush (most common).** The batch exporter sends on an interval; a script that finishes immediately can exit first. Call `trace_provider.force_flush()` before the process ends, or pass `batch=False` to `register()`. +2. **Instrumented after the client was created.** Call `register()` and the instrumentor **before** constructing the framework client — otherwise the client isn't wrapped. +3. **Wrong keys or project.** Confirm `FI_API_KEY` and `FI_SECRET_KEY` are this workspace's keys, and that `project_name` matches the project you're viewing. +4. **Time window or refresh.** The date picker defaults to the past 7 days; widen it and enable Auto refresh. + +## Verify the fix + +Send one more request. A new trace should appear within seconds, **Status OK**, with input, output, latency, and model populated. + +## Prevent this in your app + +- Add `trace_provider.force_flush()` to short scripts and job runners. +- Call `register()` + `instrument()` once at startup, before any client is built. + +## Still not working + +Collect your `project_name`, a request timestamp, your installed `fi-instrumentation-otel` and instrumentor versions, and any stderr, and contact support@futureagi.com. + +## Related + + + + The setup this page diagnoses. + + + Where traces should appear. + + diff --git a/src/pages/docs/tracing/concepts/index.mdx b/src/pages/docs/tracing/concepts/index.mdx index 59d88314..960e7b33 100644 --- a/src/pages/docs/tracing/concepts/index.mdx +++ b/src/pages/docs/tracing/concepts/index.mdx @@ -1,23 +1,46 @@ --- -title: "Understanding Observability: LLM Tracing Core Concepts" -description: "Core concepts behind LLM observability in Future AGI: what gets captured, how data is structured, and why monitoring matters for AI applications." +title: "Tracing Concepts" +description: "The core concepts behind LLM observability in FutureAGI: traces, spans, OpenTelemetry, and traceAI — what gets captured, how it's structured, and why it matters." +page_type: "concept" +diataxis: "explanation" +products: ["traceAI"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What are the core tracing concepts in FutureAGI?" +direct_answer: "FutureAGI observability is built on traces and spans, collected via OpenTelemetry and the traceAI SDK built on top of it. Your app emits spans, each tagged with a trace ID; all spans that share an ID form one trace, and that traced data powers the whole UI." +audience: "engineer" +status: "review" +owner: "observability" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "llm observability core concepts" +geo: + direct_answer: true +related: + concepts: ["/docs/tracing/concepts/traces", "/docs/tracing/concepts/spans"] + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + feature: "/docs/observe/features/llm-tracing" --- ## About LLM observability is the practice of capturing, structuring, and analyzing everything that happens inside your AI application. Every LLM call, retrieval, tool execution, and agent decision is recorded as structured data that you can search, filter, score, and alert on. -Future AGI's observability stack is built on OpenTelemetry. Your application sends traces to the platform, and everything else (dashboards, evals, sessions, alerts) runs on top of that traced data. Without tracing, there is nothing to observe. +Future AGI's observability stack is built on OpenTelemetry. Your application sends traces to the platform, and everything else (dashboards, evals, sessions, alerts) runs on top of that traced data. --- ## The Tracing Pipeline -Your app emits **spans** (LLM calls, tool calls, chain steps) via OpenTelemetry or the traceAI SDK. The backend receives them over HTTP or gRPC, groups them into **traces**, and stores them by project. +Your app emits **spans** (LLM calls, tool calls, chain steps) through the traceAI SDK — which is built on OpenTelemetry — or with OpenTelemetry directly. Each span carries a trace ID, so all the spans from one request make up a single **trace**; the backend receives them over HTTP or gRPC and stores them by project. -``` -Your App → traceAI / OpenTelemetry SDK → OTLP (HTTP or gRPC) → Future AGI Backend → Observe Dashboard -``` +|traceAI / OTel SDK| B[OTLP: HTTP or gRPC] + B --> C[FutureAGI backend] + C --> D[Observe dashboard]`} /> Each **trace** is one request or execution. Each **span** is one operation (LLM, tool, retriever, etc.) with input, output, timing, and optional cost and tokens. That data powers the entire UI: trace list, span detail, [sessions](/docs/observe/features/session), [evals](/docs/observe/features/evals), and [alerts](/docs/observe/features/alerts). diff --git a/src/pages/docs/tracing/concepts/otel.mdx b/src/pages/docs/tracing/concepts/otel.mdx index 6b136d3f..626d7274 100644 --- a/src/pages/docs/tracing/concepts/otel.mdx +++ b/src/pages/docs/tracing/concepts/otel.mdx @@ -1,18 +1,92 @@ --- -title: "What is OpenTelemetry? Future AGI Tracing Explained" -description: "Learn how Future AGI uses OpenTelemetry for vendor-neutral, high-performance tracing of AI applications with standardized telemetry collection." +title: "What Is OpenTelemetry?" +description: "OpenTelemetry is the open, vendor-neutral standard FutureAGI uses to collect and export traces — your app emits OTel spans that traceAI sends to Observe." +page_type: "concept" +diataxis: "explanation" +products: ["traceAI"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What is OpenTelemetry and how does FutureAGI use it?" +direct_answer: "OpenTelemetry (OTel) is the open standard for collecting traces, metrics, and logs. FutureAGI builds on it: your app emits OTel spans that traceAI exports to Observe." +audience: "engineer" +status: "review" +owner: "observability" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "what is opentelemetry llm tracing" +geo: + direct_answer: true +related: + concepts: ["/docs/tracing/concepts/traceai"] + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + feature: "/docs/observe/features/llm-tracing" --- -[OpenTelemetry (OTel)](https://opentelemetry.io/) is an open-source observability framework designed for collecting, processing, and exporting traces, metrics, and logs from applications. It provides a standardized way to instrument applications and infrastructure to gain insights into their performance and behavior. +## About -We use OTel at Future AGI because it's vendor-agnostic, open source, and highly performant. It's a standard that includes batch processing of traces and spans in the magnitude of billions. +[OpenTelemetry](https://opentelemetry.io/) (OTel) is the open, vendor-neutral standard for collecting traces, metrics, and logs from software. It defines how a [span](/docs/tracing/concepts/spans) is structured and how spans are exported, so any tool that speaks OTel can read them. FutureAGI is built on it: your app emits OTel spans, and [traceAI](/docs/tracing/concepts/traceai) exports them over OTLP to Observe. Because it's a standard, the same instrumentation works across languages, frameworks, and backends. -## Why Use It? +--- + +## Why it matters + +Building on a standard is what keeps you un-locked-in. Your instrumentation isn't proprietary to FutureAGI — the same OTel spans can go to any OTel-compatible backend, and FutureAGI can ingest spans from anything that emits them. OTel is also built for scale: batch export handles high-volume tracing without blocking your app. So the choice to standardize on OTel is why "set up tracing once" works regardless of your stack. + +--- + +## Mental model + +OTel sits between your app and FutureAGI: your code (or an instrumentor) creates spans, a processor batches them, and an exporter ships them over OTLP. + + B[OTel SDK: create spans] + B --> C[Span processor: batch] + C -->|OTLP HTTP/gRPC| D[FutureAGI backend] + D --> E[Observe]`} /> + +traceAI plugs into this pipeline — it's the layer that creates *LLM-shaped* spans and configures the exporter to point at FutureAGI. + +--- -- 🔓 **Vendor-neutral**: Not locked to any specific provider -- 🌐 **Open source**: Free and community-driven -- ⚡ **High performance**: Handles billions of traces efficiently +## When to use + +- You want instrumentation that isn't locked to one vendor. +- You already emit OTel spans and want them in Observe. +- You're tracing across multiple languages or services and need one standard. +- You need high-volume tracing with batched export. + +--- + +## What it isn't + +- **OTel is not a backend.** It collects and exports spans; storing and displaying them is FutureAGI's job. +- **OTel is not traceAI.** traceAI is the FutureAGI layer *on top of* OTel that adds LLM-specific spans and conventions. See [traceAI](/docs/tracing/concepts/traceai). +- **OTel is not logging.** It's structured, timed traces — not flat log lines. + +--- + +## How FutureAGI uses OpenTelemetry + +`register()` configures an OTel `TracerProvider` with an OTLP exporter pointed at FutureAGI, and a span processor (batched by default). Auto-instrumentors and manual spans both feed that provider. You can send over **HTTP** (default) or **gRPC**. Self-hosted deployments point the exporter at their own collector URL. See [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). + +--- -OTel collects traces, metrics, and logs to monitor system performance and events. +## Related concepts -You can learn more about how we trace applications using OpenTelemetry on our [traceAI](/docs/tracing/concepts/traceai) page. + + + The FutureAGI layer built on OpenTelemetry. + + + The unit OpenTelemetry collects. + + + Configure the OTel exporter to FutureAGI. + + + Read the spans once they arrive. + + diff --git a/src/pages/docs/tracing/concepts/sessions-and-users.mdx b/src/pages/docs/tracing/concepts/sessions-and-users.mdx new file mode 100644 index 00000000..957f2748 --- /dev/null +++ b/src/pages/docs/tracing/concepts/sessions-and-users.mdx @@ -0,0 +1,103 @@ +--- +title: "What Are Sessions and Users?" +description: "Sessions and users are how FutureAGI groups traces — a session is one multi-turn conversation, a user is one end user across all their sessions and traces." +page_type: "concept" +diataxis: "explanation" +products: ["traceAI"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What are sessions and users in FutureAGI?" +direct_answer: "A session groups the traces of one multi-turn conversation; a user groups every session and trace from one end user. Both are set with attributes on your spans." +audience: "engineer" +status: "review" +owner: "observability" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "group llm traces by session and user" +geo: + direct_answer: true +related: + concepts: ["/docs/tracing/concepts/traces"] + how_to: "/docs/observe/features/manual-tracing/set-session-user-id" + feature: "/docs/observe/features/session" +--- + +## About + +Sessions and users are the two ways FutureAGI groups individual traces into something larger. + +A single [trace](/docs/tracing/concepts/traces) is one request. But a chatbot conversation is many requests, and one customer has many conversations. A **session** ties together the traces of one multi-turn interaction — a whole support chat, start to finish. A **user** ties together every session and trace belonging to one end user. You create both by setting an attribute on your spans (`session.id` and `user.id`); FutureAGI does the grouping. Use them when a single trace is too small a unit — when you need a customer's whole history, not one message. + +--- + +## Why it matters + +Most real problems span more than one request. A bug report says "the assistant kept losing track" — that is a *session* problem, visible only when you read the conversation in order. A cost spike traces back to *one user* hammering an expensive model — visible only when you roll traces up per user. Without these groupings you are stuck reading isolated requests and reconstructing context by hand. With them, FutureAGI shows per-session and per-user metrics — trace count, latency, cost, eval pass-rate — so behavior over time becomes legible. + +--- + +## Mental model + +Users contain sessions; sessions contain traces; traces contain spans. It is one hierarchy, built from two attributes you set. + + S1["Session (session.id)"] + U --> S2["Session (session.id)"] + S1 --> T1["Trace"] + S1 --> T2["Trace"] + S2 --> T3["Trace"]`} /> + +A trace with a `session.id` joins (or starts) that session; a trace with a `user.id` is attributed to that user. A trace can carry both, one, or neither. + +--- + +## When to use + +- A user reports an issue that played out over several turns, and you need the whole conversation. +- You're analyzing drop-off or escalation across a multi-step chatbot flow. +- A cost or latency spike looks tied to specific end users and you want a per-user breakdown. +- You're checking whether quality (eval pass-rate) differs across users or cohorts. +- You want to filter or group the trace list by conversation or by person. + +--- + +## What it isn't + +- **A session is not a trace.** A trace is one request; a session is the set of traces in one conversation. See [Traces](/docs/tracing/concepts/traces). +- **A user is not a session.** A user spans many sessions over time; a session is one sitting. +- **A user ID is not an auth identity.** It is whatever stable identifier you choose to set (often a hashed customer ID) — FutureAGI does not authenticate it, it only groups by it. Avoid putting raw PII in it. + +--- + +## How FutureAGI represents sessions and users + +Both are span attributes you set in your instrumentation: + +| Attribute | What it does | +|---|---| +| `session.id` | Groups every trace carrying the same value into one session. | +| `user.id` | Attributes every trace carrying the same value to one user. | + +A session is created when the first trace with a given `session.id` arrives; a user is tracked the same way for `user.id`. You can see the groupings in the [Sessions](/docs/observe/features/session) and [Users](/docs/observe/features/users) views, each with its own rolled-up metrics. To set the attributes, see [Set session and user IDs](/docs/observe/features/manual-tracing/set-session-user-id). + +--- + +## Related concepts + + + + The single request that sessions and users group together. + + + Add the attributes that drive the grouping. + + + Read multi-turn conversations and per-session metrics. + + + Read per-user activity, cost, and quality. + + diff --git a/src/pages/docs/tracing/concepts/spans.mdx b/src/pages/docs/tracing/concepts/spans.mdx index cd7699f6..e8a19a2f 100644 --- a/src/pages/docs/tracing/concepts/spans.mdx +++ b/src/pages/docs/tracing/concepts/spans.mdx @@ -1,82 +1,112 @@ --- -title: "What are Spans? LLM, Tool, and Chain Span Types" -description: "Understand spans in Future AGI tracing. Learn about span types including LLM, tool, chain, retriever, and embedding spans with their attributes." +title: "What Is a Span?" +description: "A span is one operation inside a trace in FutureAGI — a model call, tool call, retrieval, or evaluator run — with its inputs, outputs, timing, and cost." +page_type: "concept" +diataxis: "explanation" +products: ["traceAI"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What is a span in FutureAGI?" +direct_answer: "A span is one timed operation inside a trace — a model call, tool call, retrieval, agent step, or evaluator run — recording its inputs, outputs, duration, and cost." +audience: "engineer" +status: "review" +owner: "observability" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "what is a span llm tracing" +geo: + direct_answer: true +related: + concepts: ["/docs/tracing/concepts/traces"] + how_to: "/docs/observe/features/manual-tracing/create-tool-spans" + reference: "/docs/observe/features/manual-tracing/semantic-conventions" --- -Spans are the fundamental units of tracing in observability frameworks, providing structured, event-level data for monitoring, debugging, and performance analysis. A span represents a discrete operation executed within a system, capturing execution timing, hierarchical relationships, and metadata relevant to the operation’s context. +## About -They are aggregated into traces, which collectively depict the flow of execution across various system components. This document provides an in-depth technical analysis of spans, their attributes, classifications, and their role in system observability. +A span is one operation inside a trace. ---- - -## Structure of Spans - -A span consists of multiple attributes that encapsulate its execution details. These attributes can be categorized into the following sections: - -- **Identification and context** provide the span's unique ID, trace ID, and optional parent span ID, establishing hierarchical relationships. It may also include a project reference for system-wide organization. - -- **Execution details** define the operation recorded, including a descriptive name, span type (e.g., function call, API request, database query), and input/output data. If an operation fails, error metadata captures failure details like error codes, messages, and stack traces. - -- **Timing and performance** track execution efficiency through start and end timestamps, latency measurement, and resource usage, such as computational cost or token consumption for LLM-related spans. - -- **Metadata and custom attributes** provide additional context via tags, annotations, and JSON-based extensible fields. Execution environment details, including host machine, service instance, and deployment version, further enrich observability. +In FutureAGI, a span records a single step in an AI request — a model call, tool call, retrieval, agent decision, guardrail check, evaluator run, or custom function. A trace is the whole request; spans are the steps inside it. Each span carries its own input and output, start and end time, duration, and (for model calls) token counts and cost. Spans matter because they show *where* latency, cost, errors, or bad outputs entered the system — the trace tells you a request was slow, the span tells you which step was slow. --- -## Types of Spans -Spans are categorized based on the type of operation they capture. This classification ensures structured trace analysis and aids in performance monitoring. +## Why it matters -- **Tool Spans** -It tracks operations executed by external tools or functions. It captures essential details, including the tool’s name, description, parameters, and performance metrics, enabling comprehensive monitoring of tool interactions. +A response is the sum of its steps, and failures usually live in one step, not the whole request. A span isolates that step: it shows the exact prompt sent to the model, the arguments passed to a tool, the chunks a retriever returned, or the score an evaluator assigned. traceAI captures spans automatically for supported frameworks, and lets you [add custom spans](/docs/observe/features/manual-tracing/create-tool-spans) where automatic instrumentation does not reach — so the part of your pipeline you most need to debug is never a black box. -- **Chain Spans** -It represents individual steps in a sequential workflow where data flows through multiple interconnected operations. It facilitates the visualization and analysis of execution pipelines, helping optimize process efficiency and detect bottlenecks. +--- -- **LLM Spans** -It captures interactions with large language models, recording input prompts, generated completions, token usage, and invocation parameters. These spans provide insights into model performance, response times, and computational costs. +## Mental model -- **Retriever Spans** -It logs data retrieval operations, such as querying a database or fetching documents from an index. It stores search parameters and results, ensuring traceability and facilitating performance assessment of retrieval mechanisms. +Spans nest. A parent span (say, an agent) contains the child spans it triggered, and each child can have children of its own. The nesting is how FutureAGI reconstructs which step caused which. -- **Embedding Spans** -It tracks text-to-vector transformations used in machine learning applications. It records embedding vectors, associated model metadata, and processing details, supporting efficient monitoring of vectorization processes. + B["chain span"] + A --> C["tool span"] + B --> D["retriever span"] + B --> E["llm span"]`} /> -- **Agent Spans** -It documents actions performed by autonomous agents, including decision-making logic and tool interactions. It captures the rationale behind an agent’s choices, providing transparency into automated workflows and AI-driven decision processes. +Each box is a span with its own timing and attributes. The edges are parent-child links recorded on the spans, which is what lets the [trace explorer](/docs/observe/features/llm-tracing) draw the tree. -- **Reranker Spans** -It logs result reordering or ranking adjustments based on specific scoring criteria. It retains input documents and their updated rankings, facilitating analysis of ranking models and relevance optimization. +--- -- **Unknown Spans** -It serves as a fallback for operations that do not fit predefined span types. It ensures that all observed activities are recorded, even when their category is not explicitly defined. +## When to use -- **Guardrail Spans** -It monitors compliance and enforce safety rules within a system. It captures validation results, applied policies, and compliance status, ensuring adherence to predefined operational constraints. +Spans aren't optional or an alternative to traces — every trace is made of spans, captured automatically. You'll look at an individual span when: -- **Evaluator Spans** -It represents assessment activities conducted to measure system performance or model effectiveness. It tracks evaluation metrics, scoring data, and feedback, supporting the continuous improvement of models and workflows. +- A request is slow and you need to find the single step responsible. +- An answer is wrong and you want the exact prompt and output of the step that produced it. +- A tool or retrieval misbehaved and you need its real arguments and results. +- You're adding instrumentation and need to decide what each step should capture. +- You want an eval or annotation attached to a specific step, not the whole request. --- -## Span Attributes +## What it isn't -Attributes are key-value pairs that contain metadata that can be used to annotate a span to carry information about the operation it is tracking. +- **A span is not a trace.** A trace is the full request; a span is one operation in it. See [Traces](/docs/tracing/concepts/traces). +- **A span is not a log line.** A log is a flat text event; a span is a timed unit with structured input, output, status, and attributes, linked to a parent. +- **A span is not an event.** Events are point-in-time markers *inside* a span (e.g. an exception); the span is the operation that contains them. -For example, if a span invokes an LLM, the model name, the invocation parameters, the token count etc. +--- -### Attribute Rules +## How FutureAGI represents a span -1. **Keys**: Must be non-null string values -2. **Values**: Must be one of the following non-null types: - - String - - Boolean - - Floating point value - - Integer - - Array of any of the above types +Every span has identification (its own ID, the trace ID, and an optional parent span ID), execution details (name, kind, input, output, error metadata), timing (start, end, latency), and attributes (key-value metadata, including token counts and cost for model calls). The **kind** classifies the operation, set via `fi.span.kind`: -### Semantic Attributes +| Span kind | Captures | +|---|---| +| `LLM` | A model call — prompt, completion, tokens, cost, model name. | +| `TOOL` | A function or external tool call — name, arguments, output. | +| `CHAIN` | A step in a sequential pipeline. | +| `RETRIEVER` | A search or document fetch — query and results. | +| `EMBEDDING` | A text-to-vector operation. | +| `RERANKER` | A reordering of retrieved results. | +| `AGENT` | An autonomous decision step. | +| `GUARDRAIL` | A safety or policy check. | +| `EVALUATOR` | A quality assessment attached to the span. | +| `UNKNOWN` | Fallback for an unclassified operation. | -Semantic Attributes are standardized naming conventions for common metadata present in typical operations. Using semantic attribute naming is recommended to ensure consistency across systems. +Attribute keys must be non-null strings; values must be a string, boolean, integer, float, or an array of those. Use the standard keys so spans stay queryable — see [semantic conventions](/docs/observe/features/manual-tracing/semantic-conventions). -> See [semantic conventions](/docs/observe/features/manual-tracing/semantic-conventions) for more information. +--- +## Related concepts + + + + The full request that spans are grouped into. + + + Add custom spans where auto-instrumentation stops. + + + The standard attribute keys for spans. + + + Inspect spans inside a trace. + + diff --git a/src/pages/docs/tracing/concepts/traceai.mdx b/src/pages/docs/tracing/concepts/traceai.mdx index 39c2f188..63566050 100644 --- a/src/pages/docs/tracing/concepts/traceai.mdx +++ b/src/pages/docs/tracing/concepts/traceai.mdx @@ -1,36 +1,106 @@ --- -title: "What is traceAI? Future AGI Open-Source Tracing SDK" -description: "Learn about traceAI, Future AGI's open-source package for standardized AI application tracing built on OpenTelemetry with framework-specific instrumentors." ---- - -An OSS package to enable standardized tracing of AI applications and frameworks - -traceAI is a set of conventions and plugins that is complimentary to OpenTelemetry to enable tracing of AI applications. It instruments and monitors different code executions across models, frameworks, and vendors and maps them to a set of standardized attributes for traces and spans. - -traceAI is natively supported by Future AGI, but can be used with any OpenTelemetry-compatible backend as well. traceAI provides a set of instrumentations for popular machine learning SDKs and frameworks in a variety of languages. - -## Python - -| Package | Description | Version | -|---------|-------------|----------| -| `traceAI-openai` | traceAI Instrumentation for OpenAI. | [![PyPI](https://img.shields.io/pypi/v/traceAI-openai)](https://pypi.org/project/traceAI-openai)| -| `traceAI-anthropic` | traceAI Instrumentation for Anthropic. | [![PyPI](https://img.shields.io/pypi/v/traceAI-anthropic)](https://pypi.org/project/traceAI-anthropic)| -| `traceAI-llamaindex` | traceAI Instrumentation for LlamaIndex. | [![PyPI](https://img.shields.io/pypi/v/traceAI-llamaindex)](https://pypi.org/project/traceAI-llamaindex)| -| `traceAI-langchain` | traceAI Instrumentation for LangChain. | [![PyPI](https://img.shields.io/pypi/v/traceAI-langchain)](https://pypi.org/project/traceAI-langchain)| -| `traceAI-mcp` | traceAI Instrumentation for MCP. | [![PyPI](https://img.shields.io/pypi/v/traceAI-mcp)](https://pypi.org/project/traceAI-mcp)| -| `traceAI-mistralai` | traceAI Instrumentation for MistralAI. | [![PyPI](https://img.shields.io/pypi/v/traceAI-mistralai)](https://pypi.org/project/traceAI-mistralai)| -| `traceAI-vertexai` | traceAI Instrumentation for VertexAI. | [![PyPI](https://img.shields.io/pypi/v/traceAI-vertexai)](https://pypi.org/project/traceAI-vertexai)| -| `traceAI-google-genai` | traceAI Instrumentation for Google GenAI. | [![PyPI](https://img.shields.io/pypi/v/traceAI-google-genai)](https://pypi.org/project/traceAI-google-genai)| -| `traceAI-google-adk` | traceAI Instrumentation for Google ADK. | [![PyPI](https://img.shields.io/pypi/v/traceAI-google-adk)](https://pypi.org/project/traceAI-google-adk) -| `traceAI-crewai` | traceAI Instrumentation for CrewAI. | [![PyPI](https://img.shields.io/pypi/v/traceAI-crewai)](https://pypi.org/project/traceAI-crewai)| -| `traceAI-haystack` | traceAI Instrumentation for Haystack. | [![PyPI](https://img.shields.io/pypi/v/traceAI-haystack)](https://pypi.org/project/traceAI-haystack)| -| `traceAI-litellm` | traceAI Instrumentation for liteLLM. | [![PyPI](https://img.shields.io/pypi/v/traceAI-litellm)](https://pypi.org/project/traceAI-litellm)| -| `traceAI-groq` | traceAI Instrumentation for Groq. | [![PyPI](https://img.shields.io/pypi/v/traceAI-groq)](https://pypi.org/project/traceAI-groq)| -| `traceAI-autogen` | traceAI Instrumentation for Autogen. | [![PyPI](https://img.shields.io/pypi/v/traceAI-autogen)](https://pypi.org/project/traceAI-autogen)| -| `traceAI-guardrails` | traceAI Instrumentation for Guardrails. | [![PyPI](https://img.shields.io/pypi/v/traceAI-guardrails)](https://pypi.org/project/traceAI-guardrails)| -| `traceAI-openai-agents` | traceAI Instrumentation for OpenAI Agents. | [![PyPI](https://img.shields.io/pypi/v/traceAI-openai-agents)](https://pypi.org/project/traceAI-openai-agents)| -| `traceAI-smolagents` | traceAI Instrumentation for SmolAgents. | [![PyPI](https://img.shields.io/pypi/v/traceAI-smolagents)](https://pypi.org/project/traceAI-smolagents)| -| `traceAI-dspy` | traceAI Instrumentation for DSPy. | [![PyPI](https://img.shields.io/pypi/v/traceAI-dspy)](https://pypi.org/project/traceAI-dspy)| -| `traceAI-bedrock` | traceAI Instrumentation for AWS Bedrock. | [![PyPI](https://img.shields.io/pypi/v/traceAI-bedrock)](https://pypi.org/project/traceAI-bedrock)| -| `traceAI-portkey` | traceAI Instrumentation for Portkey. | [![PyPI](https://img.shields.io/pypi/v/traceAI-portkey)](https://pypi.org/project/traceAI-portkey)| -| `traceAI-instructor` | traceAI Instrumentation for Instructor. | [![PyPI](https://img.shields.io/pypi/v/traceAI-instructor)](https://pypi.org/project/traceAI-instructor)| \ No newline at end of file +title: "What Is traceAI?" +description: "traceAI is FutureAGI's open-source instrumentation layer on OpenTelemetry — it captures LLM, tool, and retrieval calls as standardized spans across frameworks." +page_type: "concept" +diataxis: "explanation" +products: ["traceAI"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What is traceAI?" +direct_answer: "traceAI is FutureAGI's open-source instrumentation layer on OpenTelemetry. It captures model, tool, and retrieval calls as standardized spans across frameworks and vendors." +audience: "engineer" +status: "review" +owner: "observability" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "what is traceai instrumentation" +geo: + direct_answer: true +related: + concepts: ["/docs/tracing/concepts/otel"] + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + feature: "/docs/observe/features/llm-tracing" +--- + +## About + +traceAI is FutureAGI's open-source instrumentation layer, built on [OpenTelemetry](/docs/tracing/concepts/otel). It's a set of conventions and per-framework instrumentors that capture what your AI app does — model calls, tool calls, retrievals, agent steps — and map them to standardized [span](/docs/tracing/concepts/spans) attributes. Add the instrumentor for your framework, and those calls become traces in Observe without hand-writing spans. traceAI is natively supported by FutureAGI but emits standard OTel, so it works with any OTel-compatible backend too. + +--- + +## Why it matters + +Raw OpenTelemetry knows nothing about LLMs — it has no concept of a prompt, a completion, token cost, or a tool call. traceAI fills that gap: it turns framework calls into *LLM-shaped* spans with consistent keys, so a LangChain trace and an OpenAI trace look the same in Observe and are queryable the same way. That standardization is what makes filtering, evals, and dashboards work across different stacks. + +--- + +## Mental model + +traceAI is the adapter between your framework and OpenTelemetry: the instrumentor wraps the framework, produces standardized spans, and hands them to the OTel pipeline that exports to FutureAGI. + + B[traceAI instrumentor] + B --> C[Standardized OTel spans] + C --> D[FutureAGI Observe]`} /> + +You pick the instrumentor that matches your framework; the rest of the pipeline is the same OTel flow for everyone. + +--- + +## When to use + +- You want LLM/agent calls traced without writing spans by hand. +- You use a supported framework (OpenAI, LangChain, LlamaIndex, CrewAI, …). +- You want consistent, queryable span attributes across different SDKs. +- You want instrumentation that stays portable across OTel backends. + +--- + +## What it isn't + +- **traceAI is not a backend.** It produces spans; FutureAGI stores and displays them. +- **traceAI is not a replacement for OpenTelemetry.** It's complementary — conventions and instrumentors *on top of* OTel. See [OpenTelemetry](/docs/tracing/concepts/otel). +- **traceAI is not only for FutureAGI.** It emits standard OTel and works with any compatible backend. + +--- + +## How FutureAGI represents traceAI + +traceAI ships as the core `fi-instrumentation-otel` package plus a per-framework instrumentor you install alongside it. A sample of the Python instrumentors: + +| Package | Instruments | +|---|---| +| `traceAI-openai` | OpenAI | +| `traceAI-anthropic` | Anthropic | +| `traceAI-langchain` | LangChain | +| `traceAI-llamaindex` | LlamaIndex | +| `traceAI-crewai` | CrewAI | +| `traceAI-bedrock` | AWS Bedrock | +| `traceAI-litellm` | LiteLLM | +| `traceAI-google-adk` | Google ADK | +| `traceAI-dspy` | DSPy | +| `traceAI-haystack` | Haystack | + +See the full, current list in the [Auto Instrumentation catalog](/docs/tracing/auto). To wire one up, see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). + +--- + +## Related concepts + + + + The standard traceAI is built on. + + + What traceAI produces. + + + Install an instrumentor and start capturing. + + + Every supported framework. + + diff --git a/src/pages/docs/tracing/concepts/traces.mdx b/src/pages/docs/tracing/concepts/traces.mdx index 1df11412..043b56f3 100644 --- a/src/pages/docs/tracing/concepts/traces.mdx +++ b/src/pages/docs/tracing/concepts/traces.mdx @@ -1,25 +1,107 @@ --- -title: "What are Traces? Understanding Trace Structure" -description: "A trace in Future AGI represents the full execution flow of an AI request, composed of spans that capture each LLM call, tool use, and retrieval step." +title: "What Is a Trace?" +description: "A trace is the complete record of one AI request — every model call, tool call, retrieval, output, latency, and cost for a single response." +page_type: "concept" +diataxis: "explanation" +products: ["traceAI"] +concept_family: "tracing" +concept_level: "foundational" +primary_question: "What is a trace in FutureAGI?" +direct_answer: "A trace is the complete record of one AI request: the ordered spans — model calls, tool calls, retrievals, and evaluator runs — that produced a single response." +audience: "engineer" +status: "review" +owner: "observability" +last_diagram_reviewed: "2026-05-25" +schema_type: "TechArticle" +seo: + primary_query: "what is a trace llm observability" +geo: + direct_answer: true +related: + concepts: ["/docs/tracing/concepts/spans"] + how_to: "/docs/observe/features/manual-tracing/set-up-tracing" + feature: "/docs/observe/features/llm-tracing" --- -## Key Features -1. **Execution Flow:** -A trace captures the entire lifecycle of a request, from initiation to completion. It records the sequence of operations and their interactions, providing a detailed map of the request's journey through the system. -2. **Span Aggregation:** -Traces are composed of multiple spans, each representing a discrete operation. By aggregating these spans, traces offer a structured view of the execution flow, highlighting dependencies and interactions between different components. -3. **Performance Analysis:** -Traces are essential for performance analysis, as they allow teams to measure latency, identify bottlenecks, and optimize system efficiency. By examining the execution flow, teams can pinpoint areas for improvement and ensure optimal performance. -4. **Debugging and Diagnostics:** -Traces provide a detailed execution path, enabling teams to trace unexpected behaviors and diagnose issues effectively. By following the flow of a request, teams can identify the root cause of errors and implement corrective measures. +## About + +A trace is the complete record of one AI request. + +For example, when a support agent answers a refund question, the reply is only the last artifact. The trace shows everything that produced it: the user input, the model's intent classification, the order-lookup tool call, the retrieved policy text, the final generation, and the latency, token cost, and evaluator scores at each step. A trace is made of **spans** — one per operation — linked into a tree by a shared trace ID. Reach for a trace when you need to debug why a request failed, reproduce a production issue, or turn a bad response into a regression test. + +--- + +## Why it matters + +Without traces, a wrong or slow AI response is a dead end — you see the output but not the five steps that caused it, so debugging becomes guesswork over logs. A trace turns that into a readable execution path: you can see that the retriever returned the wrong policy chunk, or that one tool call took four seconds, or that an eval flagged the answer as unsupported. In FutureAGI, traceAI captures this automatically for supported frameworks and sends it to Observe, where latency, cost, errors, and quality are all attached to the same request. + +--- + +## Mental model + +A trace is a **tree** — a branching hierarchy where each step nests under the step that triggered it. The whole request *is* the trace; its **root span** is the operation that started the request, and every other span hangs beneath the step that called it. For example, a single support-agent request looks like this: + + S1["llm.intent_classification"] + T --> S2["tool.check_order_status"] + T --> S3["chain.generate_reply"] + S3 --> S4["retriever.knowledge_base"] + S3 --> S5["llm.response_generation"]`} /> + +Every span in that tree shares one trace ID, so FutureAGI can reassemble the steps — even when they run across async tasks or services — into a single request you can read top to bottom. --- -## Use Cases -1. **Dependency Analysis:** Traces help in understanding the dependencies between different operations within a system, allowing teams to optimize workflows and improve efficiency. -2. **Performance Monitoring:** By measuring latency across spans, traces can identify performance bottlenecks and areas for optimization, ensuring that the system operates at peak efficiency. -3. **Error Diagnosis:** Traces provide a detailed execution path, allowing teams to trace unexpected behaviors from input to output and diagnose issues effectively. +## When to use + +- A user reports a bad answer and you need the exact request to see what the AI actually did. +- A request was slow and you want to find which step — model, tool, or retrieval — spent the time. +- You're reproducing a production failure and need the real inputs, not a paraphrase. +- You want to promote a real production request into a dataset for evals or fine-tuning. +- You're reviewing how an agent reasoned, step by step, before it answered. + +--- + +## What it isn't + +- **A trace is not a span.** A span is one operation; a trace is the whole request made of many spans. See [Spans](/docs/tracing/concepts/spans). +- **A trace is not a session.** A session groups many traces from one conversation or user; a trace is a single request inside it. See [Sessions](/docs/observe/features/session). +- **A trace is not a log line.** Logs are flat text events; a trace is a timed, structured tree with inputs, outputs, cost, and [eval scores](/docs/observe/features/evals) attached to each step. + +--- + +## How FutureAGI represents a trace + +traceAI emits OpenTelemetry spans; FutureAGI groups them by trace ID and stores the tree. In the [trace explorer](/docs/observe/features/llm-tracing), one row is one trace, and opening it shows the span tree plus per-step input, output, latency, token counts, cost, and any eval or annotation attached. A trace carries: + +| Field | What it holds | +|---|---| +| **Trace ID** | The shared identifier that links every span in the request. | +| **Trace name** | The top-level task, e.g. `support_agent.run`. | +| **Spans** | The ordered operations — model, tool, retriever, evaluator — that ran. | +| **Status** | OK, or ERROR if any span failed. | +| **Latency / tokens / cost** | Totals rolled up from the spans. | +| **Session & user IDs** | Optional links that group the trace by conversation and end user. | + +To get traces flowing, see [Set up tracing](/docs/observe/features/manual-tracing/set-up-tracing). To name the operations inside a trace consistently, see [semantic conventions](/docs/observe/features/manual-tracing/semantic-conventions). --- -In summary, traces are a vital component of observability frameworks, providing a structured and comprehensive view of the execution flow within a system. They enable teams to analyze dependencies, monitor performance, and diagnose issues, ensuring the reliability and efficiency of the system. +## Related concepts + + + + The individual operations a trace is built from. + + + Read and debug traces in Observe. + + + Instrument your app so it emits traces. + + + Group multiple traces into one conversation. + + diff --git a/src/pages/index.astro b/src/pages/index.astro index 8e92d422..228f3adf 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -36,7 +36,7 @@ const sections = [ color: "blue", href: "/docs/observe", links: [ - { title: "Quickstart", href: "/docs/observe/quickstart" }, + { title: "Quickstart", href: "/docs/observe/features/quickstart" }, { title: "Tracing", href: "/docs/tracing" }, ] }, diff --git a/src/plugins/vite-docs-transform.mjs b/src/plugins/vite-docs-transform.mjs index dab3550b..2dffacfc 100644 --- a/src/plugins/vite-docs-transform.mjs +++ b/src/plugins/vite-docs-transform.mjs @@ -22,6 +22,7 @@ const COMPONENT_MAP = { CopyButton: '@docs/CopyButton.astro', Expandable: '@docs/Expandable.astro', Icon: '@docs/Icon.astro', + Mermaid: '@docs/Mermaid.astro', Note: '@docs/Note.astro', ParamField: '@docs/ParamField.astro', Prerequisites: '@docs/Prerequisites.astro',