From 995a2ff1e6ed273d253e89348de882a5104da77a Mon Sep 17 00:00:00 2001 From: sk-portkey Date: Tue, 18 Nov 2025 14:14:46 +0530 Subject: [PATCH 1/2] feat: usage and rate limit policies --- openapi.yaml | 1337 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 1101 insertions(+), 236 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index 5a14fb37..c1c6fe49 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -106,6 +106,11 @@ tags: description: Get overall summary for the selected time bucket. - name: Analytics > Groups description: Get grouped metrics for the selected time bucket. + - name: Usage Limits Policies + description: Manage usage limits policies to control total usage over time + - name: Rate Limits Policies + description: Manage rate limits policies to control request or token rates + paths: # Note: When adding an endpoint, make sure you also add it in the `groups` section, in the end of this file, @@ -18798,237 +18803,689 @@ paths: - object - data -components: - securitySchemes: - Portkey-Key: - type: apiKey - in: header - name: x-portkey-api-key - Virtual-Key: - type: apiKey - in: header - name: x-portkey-virtual-key - Provider-Auth: - type: http - scheme: "bearer" - Provider-Name: - type: apiKey - in: header - name: x-portkey-provider - Config: - type: apiKey - in: header - name: x-portkey-config - Custom-Host: - type: apiKey - in: header - name: x-portkey-custom-host + /policies/usage-limits: + post: + tags: + - Usage Limits Policies + summary: Create Usage Limits Policy + description: Create a new usage limits policy to control total usage (cost or tokens) over a period. + operationId: createUsageLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateUsageLimitsPolicyRequest' + examples: + monthlyCostLimit: + summary: Monthly Cost Limit per API Key + value: + name: Monthly Cost Limit per API Key + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: api_key + type: cost + credit_limit: 1000.0 + alert_threshold: 800.0 + periodic_reset: monthly + tokenLimit: + summary: Token Limit per User + value: + name: Token Limit per User + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: metadata.user_id + type: tokens + credit_limit: 1000000 + periodic_reset: weekly + responses: + '200': + description: Policy created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error - parameters: - TimeOfGenerationMin: - in: query - name: time_of_generation_min - required: true - schema: - type: string - format: date-time - description: Minimum time of generation (ISO8601 format) - example: "2024-08-23T15:50:23+05:30" - TimeOfGenerationMax: - in: query - name: time_of_generation_max - required: true - schema: - type: string - format: date-time - description: Maximum time of generation (ISO8601 format) - example: "2024-08-23T15:50:23+05:30" - TotalUnitsMin: - in: query - name: total_units_min - schema: - type: integer - minimum: 0 - description: Minimum total units (tokens) - TotalUnitsMax: - in: query - name: total_units_max - schema: - type: integer - minimum: 0 - description: Maximum total units (tokens) - CostMin: - in: query - name: cost_min - schema: - type: number - minimum: 0 - description: Minimum cost (in cents) - CostMax: - in: query - name: cost_max - schema: - type: number - minimum: 0 - description: Maximum cost (in cents) - PromptTokenMin: - in: query - name: prompt_token_min - schema: - type: integer - minimum: 0 - description: Minimum number of prompt tokens - PromptTokenMax: - in: query - name: prompt_token_max - schema: - type: integer - minimum: 0 - description: Maximum number of prompt tokens - CompletionTokenMin: - in: query - name: completion_token_min - schema: - type: integer - minimum: 0 - description: Minimum number of completion tokens - CompletionTokenMax: - in: query - name: completion_token_max - schema: - type: integer - minimum: 0 - description: Maximum number of completion tokens - StatusCode: - in: query - name: status_code - schema: - type: string - description: Comma separated response status codes - example: 401,403 - PageSize: - in: query - name: page_size - schema: - type: integer - minimum: 0 - description: Number of items per page - CurrentPage: - in: query - name: current_page - schema: - type: integer - minimum: 0 - description: Current page number - WeightedFeedbackMin: - in: query - name: weighted_feedback_min - schema: - type: number - minimum: -10 - maximum: 10 - description: Minimum weighted feedback score - WeightedFeedbackMax: - in: query - name: weighted_feedback_max - schema: - type: number - minimum: -10 - maximum: 10 - description: Maximum weighted feedback score - OrderBy: - in: query - name: order_by - schema: - type: string - description: Field to order results by - OrderByType: - in: query - name: order_by_type - schema: - type: string - description: Type of ordering (e.g., asc, desc) - VirtualKeys: - in: query - name: virtual_keys - schema: - type: string - description: Comma separated virtual key slugs - example: vk-slug-1,vk-slug-2 - Configs: - in: query - name: configs - schema: - type: string - description: Comma separated config slugs - example: pc-config-slug-1,pc-config-slug-2 - WorkspaceSlug: - in: query - name: workspace_slug - schema: - type: string - description: Workspace slug filter. If a workspace API key is being used, this filter will not be taken into consideration. If an organisation API key is used and no workspace slug is passed, default workspace will be used. - ApiKeyIds: - in: query - name: api_key_ids - schema: - type: string - description: Comma separated API key UUIDs - example: 765768a9-b4ec-4694-962c-d55f40cdb0dc,7c22af5a-8119-46b8-8d9b-bad3ad382387 - Metadata: - in: query - name: metadata - schema: - type: string - description: Stringifed json object with key value metadata pairs - example: '{"_user":"user_1", "env": "staging"}' - AiOrgModel: - in: query - name: ai_org_model - schema: - type: string - description: Comma separated ai provider and model combination. Double underscore (__) should be used as a separator for each provider and model combination - example: openai__gpt-3.5-turbo,azure-openai__gpt-35-turbo - TraceId: - in: query - name: trace_id - schema: - type: string - description: Comma separated trace IDs - example: my-unique-trace-1,my-unique-trace-2 - SpanId: - in: query - name: span_id - schema: - type: string - description: Comma separated span IDs - example: my-unique-span-1,my-unique-span-2 - PromptSlug: - in: query - name: prompt_slug - schema: - type: string - description: Comma separated prompt slugs - example: prompt-slug-1,prompt-slug-2 - PortkeyTraceId: - in: header - name: x-portkey-trace-id - schema: - type: string - description: An ID you can pass to refer to one or more requests later on. If not provided, Portkey generates a trace ID automatically for each request. [Docs](https://portkey.ai/docs/product/observability/traces) - required: false - PortkeySpanId: - in: header - name: x-portkey-span-id - schema: - type: string - description: An ID you can pass to refer to a span under a trace. - required: false - PortkeySpanName: - in: header - name: x-portkey-span-name - schema: - type: string + get: + tags: + - Usage Limits Policies + summary: List Usage Limits Policies + description: List all usage limits policies with optional filtering. + operationId: listUsageLimitsPolicies + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/WorkspaceIdQuery' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active + - name: type + in: query + description: Filter by policy type + required: false + schema: + type: string + enum: [cost, tokens] + - $ref: '#/components/parameters/PageSize' + - $ref: '#/components/parameters/CurrentPage' + responses: + '200': + description: List of usage limits policies + content: + application/json: + schema: + $ref: '#/components/schemas/UsageLimitsPolicyListResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + /policies/usage-limits/{policyUsageLimitsId}: + get: + tags: + - Usage Limits Policies + summary: Get Usage Limits Policy + description: Get a single usage limits policy by ID. + operationId: getUsageLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/PolicyUsageLimitsId' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active + - name: include_usage + in: query + description: Include usage information for each value key + required: false + schema: + type: boolean + default: false + responses: + '200': + description: Usage limits policy details + content: + application/json: + schema: + $ref: '#/components/schemas/UsageLimitsPolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + put: + tags: + - Usage Limits Policies + summary: Update Usage Limits Policy + description: Update an existing usage limits policy. + operationId: updateUsageLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/PolicyUsageLimitsId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateUsageLimitsPolicyRequest' + example: + credit_limit: 2000.0 + alert_threshold: 1500.0 + reset_usage_for_value: api-key-123 + responses: + '200': + description: Policy updated successfully + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + delete: + tags: + - Usage Limits Policies + summary: Delete Usage Limits Policy + description: Archive (soft delete) a usage limits policy. + operationId: deleteUsageLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/PolicyUsageLimitsId' + responses: + '200': + description: OK + headers: + Content-Type: + schema: + type: string + example: application/json + content: + application/json: + schema: + type: object + example: {} + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + /policies/rate-limits: + post: + tags: + - Rate Limits Policies + summary: Create Rate Limits Policy + description: Create a new rate limits policy to control the rate of requests or tokens consumed per minute, hour, or day. + operationId: createRateLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateRateLimitsPolicyRequest' + examples: + requestsPerMinute: + summary: 100 Requests per Minute per API Key + value: + name: 100 RPM per API Key + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: api_key + type: requests + unit: rpm + value: 100 + tokensPerHour: + summary: 10K Tokens per Hour per User + value: + name: 10K Tokens per Hour per User + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: metadata.user_id + type: tokens + unit: rph + value: 10000 + responses: + '200': + description: Policy created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '500': + description: Server error + + get: + tags: + - Rate Limits Policies + summary: List Rate Limits Policies + description: List all rate limits policies with optional filtering. + operationId: listRateLimitsPolicies + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/WorkspaceIdQuery' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active + - name: type + in: query + description: Filter by policy type + required: false + schema: + type: string + enum: [requests, tokens] + - name: unit + in: query + description: Filter by rate unit + required: false + schema: + type: string + enum: [rpm, rph, rpd] + - $ref: '#/components/parameters/PageSize' + - $ref: '#/components/parameters/CurrentPage' + responses: + '200': + description: List of rate limits policies + content: + application/json: + schema: + $ref: '#/components/schemas/RateLimitsPolicyListResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + /policies/rate-limits/{rateLimitsPolicyId}: + get: + tags: + - Rate Limits Policies + summary: Get Rate Limits Policy + description: Get a single rate limits policy by ID. + operationId: getRateLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/RateLimitsPolicyId' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active + responses: + '200': + description: Rate limits policy details + content: + application/json: + schema: + $ref: '#/components/schemas/RateLimitsPolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + put: + tags: + - Rate Limits Policies + summary: Update Rate Limits Policy + description: Update an existing rate limits policy. + operationId: updateRateLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/RateLimitsPolicyId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateRateLimitsPolicyRequest' + example: + value: 200 + unit: rph + responses: + '200': + description: OK + headers: + Content-Type: + schema: + type: string + example: application/json + content: + application/json: + schema: + type: object + example: {} + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + delete: + tags: + - Rate Limits Policies + summary: Delete Rate Limits Policy + description: Delete a rate limits policy. + operationId: deleteRateLimitsPolicy + security: + - BearerAuth: [] + - ApiKeyAuth: [] + parameters: + - $ref: '#/components/parameters/RateLimitsPolicyId' + responses: + '200': + description: OK + headers: + Content-Type: + schema: + type: string + example: application/json + content: + application/json: + schema: + type: object + example: {} + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + +components: + securitySchemes: + Portkey-Key: + type: apiKey + in: header + name: x-portkey-api-key + Virtual-Key: + type: apiKey + in: header + name: x-portkey-virtual-key + Provider-Auth: + type: http + scheme: "bearer" + Provider-Name: + type: apiKey + in: header + name: x-portkey-provider + Config: + type: apiKey + in: header + name: x-portkey-config + Custom-Host: + type: apiKey + in: header + name: x-portkey-custom-host + + parameters: + TimeOfGenerationMin: + in: query + name: time_of_generation_min + required: true + schema: + type: string + format: date-time + description: Minimum time of generation (ISO8601 format) + example: "2024-08-23T15:50:23+05:30" + TimeOfGenerationMax: + in: query + name: time_of_generation_max + required: true + schema: + type: string + format: date-time + description: Maximum time of generation (ISO8601 format) + example: "2024-08-23T15:50:23+05:30" + TotalUnitsMin: + in: query + name: total_units_min + schema: + type: integer + minimum: 0 + description: Minimum total units (tokens) + TotalUnitsMax: + in: query + name: total_units_max + schema: + type: integer + minimum: 0 + description: Maximum total units (tokens) + CostMin: + in: query + name: cost_min + schema: + type: number + minimum: 0 + description: Minimum cost (in cents) + CostMax: + in: query + name: cost_max + schema: + type: number + minimum: 0 + description: Maximum cost (in cents) + PromptTokenMin: + in: query + name: prompt_token_min + schema: + type: integer + minimum: 0 + description: Minimum number of prompt tokens + PromptTokenMax: + in: query + name: prompt_token_max + schema: + type: integer + minimum: 0 + description: Maximum number of prompt tokens + CompletionTokenMin: + in: query + name: completion_token_min + schema: + type: integer + minimum: 0 + description: Minimum number of completion tokens + CompletionTokenMax: + in: query + name: completion_token_max + schema: + type: integer + minimum: 0 + description: Maximum number of completion tokens + StatusCode: + in: query + name: status_code + schema: + type: string + description: Comma separated response status codes + example: 401,403 + PageSize: + in: query + name: page_size + schema: + type: integer + minimum: 0 + description: Number of items per page + CurrentPage: + in: query + name: current_page + schema: + type: integer + minimum: 0 + description: Current page number + WeightedFeedbackMin: + in: query + name: weighted_feedback_min + schema: + type: number + minimum: -10 + maximum: 10 + description: Minimum weighted feedback score + WeightedFeedbackMax: + in: query + name: weighted_feedback_max + schema: + type: number + minimum: -10 + maximum: 10 + description: Maximum weighted feedback score + OrderBy: + in: query + name: order_by + schema: + type: string + description: Field to order results by + OrderByType: + in: query + name: order_by_type + schema: + type: string + description: Type of ordering (e.g., asc, desc) + VirtualKeys: + in: query + name: virtual_keys + schema: + type: string + description: Comma separated virtual key slugs + example: vk-slug-1,vk-slug-2 + Configs: + in: query + name: configs + schema: + type: string + description: Comma separated config slugs + example: pc-config-slug-1,pc-config-slug-2 + WorkspaceSlug: + in: query + name: workspace_slug + schema: + type: string + description: Workspace slug filter. If a workspace API key is being used, this filter will not be taken into consideration. If an organisation API key is used and no workspace slug is passed, default workspace will be used. + ApiKeyIds: + in: query + name: api_key_ids + schema: + type: string + description: Comma separated API key UUIDs + example: 765768a9-b4ec-4694-962c-d55f40cdb0dc,7c22af5a-8119-46b8-8d9b-bad3ad382387 + Metadata: + in: query + name: metadata + schema: + type: string + description: Stringifed json object with key value metadata pairs + example: '{"_user":"user_1", "env": "staging"}' + AiOrgModel: + in: query + name: ai_org_model + schema: + type: string + description: Comma separated ai provider and model combination. Double underscore (__) should be used as a separator for each provider and model combination + example: openai__gpt-3.5-turbo,azure-openai__gpt-35-turbo + TraceId: + in: query + name: trace_id + schema: + type: string + description: Comma separated trace IDs + example: my-unique-trace-1,my-unique-trace-2 + SpanId: + in: query + name: span_id + schema: + type: string + description: Comma separated span IDs + example: my-unique-span-1,my-unique-span-2 + PromptSlug: + in: query + name: prompt_slug + schema: + type: string + description: Comma separated prompt slugs + example: prompt-slug-1,prompt-slug-2 + PortkeyTraceId: + in: header + name: x-portkey-trace-id + schema: + type: string + description: An ID you can pass to refer to one or more requests later on. If not provided, Portkey generates a trace ID automatically for each request. [Docs](https://portkey.ai/docs/product/observability/traces) + required: false + PortkeySpanId: + in: header + name: x-portkey-span-id + schema: + type: string + description: An ID you can pass to refer to a span under a trace. + required: false + PortkeySpanName: + in: header + name: x-portkey-span-name + schema: + type: string description: Name for the Span ID required: false PortkeyParentSpanId: @@ -19050,13 +19507,37 @@ components: name: x-portkey-cache-namespace schema: type: string - description: Partition your Portkey cache store based on custom strings, ignoring metadata and other headers - PortkeyCacheForceRefresh: - in: header - name: x-portkey-cache-force-refresh + description: Partition your Portkey cache store based on custom strings, ignoring metadata and other headers + PortkeyCacheForceRefresh: + in: header + name: x-portkey-cache-force-refresh + schema: + type: boolean + description: Forces a cache refresh for your request by making a new API call and storing the updated value + + PolicyUsageLimitsId: + name: policyUsageLimitsId + in: path + required: true + description: Usage limits policy UUID + schema: + type: string + format: uuid + RateLimitsPolicyId: + name: rateLimitsPolicyId + in: path + required: true + description: Rate limits policy UUID + schema: + type: string + format: uuid + WorkspaceIdQuery: + name: workspace_id + in: query + required: false + description: Workspace ID or slug schema: - type: boolean - description: Forces a cache refresh for your request by making a new API call and storing the updated value + type: string schemas: Error: @@ -32319,6 +32800,390 @@ components: VertexBatchParams: type: object + Condition: + type: object + required: + - key + - value + properties: + key: + type: string + description: | + Condition key. Valid values: + - `api_key` - Apply to a specific API key + - `organisation_id` - Apply to an organization + - `workspace_id` - Apply to a workspace + - `metadata.*` - Apply based on custom metadata fields (e.g., `metadata.user_id`, `metadata.team`) + example: workspace_id + value: + type: string + description: Condition value + example: workspace-123 + + GroupBy: + type: object + required: + - key + properties: + key: + type: string + description: | + Group by key. Valid values: + - `api_key` - Group by API key + - `organisation_id` - Group by organization + - `workspace_id` - Group by workspace + - `metadata.*` - Group by custom metadata fields + example: api_key + + CreateUsageLimitsPolicyRequest: + type: object + required: + - conditions + - group_by + - type + - credit_limit + properties: + name: + type: string + maxLength: 255 + description: Policy name + example: Monthly Cost Limit + conditions: + type: array + minItems: 1 + items: + $ref: '#/components/schemas/Condition' + description: Array of conditions that define which requests the policy applies to + group_by: + type: array + minItems: 1 + items: + $ref: '#/components/schemas/GroupBy' + description: Array of group by fields that define how usage is aggregated + type: + type: string + enum: [cost, tokens] + description: Policy type + credit_limit: + type: number + minimum: 0 + description: Maximum usage allowed + alert_threshold: + type: number + nullable: true + minimum: 0 + description: Threshold at which to send alerts. Must be less than credit_limit. + periodic_reset: + type: string + nullable: true + enum: [monthly, weekly] + description: Reset period. If not provided, limit is cumulative. + workspace_id: + type: string + description: Workspace ID or slug. Required if not using API key authentication. + organisation_id: + type: string + format: uuid + description: Organization ID. Required if not using API key authentication. + + UpdateUsageLimitsPolicyRequest: + type: object + properties: + name: + type: string + maxLength: 255 + description: Policy name + credit_limit: + type: number + minimum: 0 + description: Maximum usage allowed + alert_threshold: + type: number + nullable: true + minimum: 0 + description: Threshold at which to send alerts. Must be less than credit_limit. + periodic_reset: + type: string + nullable: true + enum: [monthly, weekly] + description: Reset period. Set to null to remove periodic reset. + reset_usage_for_value: + type: string + description: Reset usage for a specific value key (e.g., API key). This will reset the usage counter for that key to 0. + + CreateRateLimitsPolicyRequest: + type: object + required: + - conditions + - group_by + - type + - unit + - value + properties: + name: + type: string + maxLength: 255 + description: Policy name + example: 100 Requests per Minute + conditions: + type: array + minItems: 1 + items: + $ref: '#/components/schemas/Condition' + description: Array of conditions that define which requests the policy applies to + group_by: + type: array + minItems: 1 + items: + $ref: '#/components/schemas/GroupBy' + description: Array of group by fields that define how usage is aggregated + type: + type: string + enum: [requests, tokens] + description: Policy type + unit: + type: string + enum: [rpm, rph, rpd] + description: | + Rate unit: + - `rpm` - Requests/Tokens per minute + - `rph` - Requests/Tokens per hour + - `rpd` - Requests/Tokens per day + value: + type: number + description: Rate limit value + workspace_id: + type: string + description: Workspace ID or slug. Required if not using API key authentication. + organisation_id: + type: string + format: uuid + description: Organization ID. Required if not using API key authentication. + + UpdateRateLimitsPolicyRequest: + type: object + properties: + name: + type: string + maxLength: 255 + description: Policy name + unit: + type: string + enum: [rpm, rph, rpd] + description: Rate unit + value: + type: number + description: Rate limit value + + UsageLimitsPolicy: + type: object + required: + - id + - type + - status + - workspace_id + - organisation_id + - created_at + - last_updated_at + properties: + id: + type: string + format: uuid + description: Policy UUID + name: + type: string + nullable: true + description: Policy name + conditions: + type: array + items: + $ref: '#/components/schemas/Condition' + description: Array of conditions + group_by: + type: array + items: + $ref: '#/components/schemas/GroupBy' + description: Array of group by fields + type: + type: string + enum: [cost, tokens] + description: Policy type + credit_limit: + type: number + description: Maximum usage allowed + alert_threshold: + type: number + nullable: true + description: Alert threshold + periodic_reset: + type: string + nullable: true + enum: [monthly, weekly] + description: Reset period + status: + type: string + enum: [active, archived] + description: Policy status + workspace_id: + type: string + format: uuid + description: Workspace UUID + organisation_id: + type: string + format: uuid + description: Organization UUID + created_at: + type: string + format: date-time + description: Creation timestamp + last_updated_at: + type: string + format: date-time + description: Last update timestamp + value_key_usage_map: + type: object + additionalProperties: + $ref: '#/components/schemas/ValueKeyUsage' + description: Map of value keys to usage information (only included when include_usage=true) + + ValueKeyUsage: + type: object + properties: + current_usage: + type: number + description: Current usage value + status: + type: string + enum: [active, exhausted] + description: Usage status + is_threshold_alerts_sent: + type: boolean + description: Whether threshold alerts have been sent + is_exhausted_alerts_sent: + type: boolean + description: Whether exhausted alerts have been sent + + RateLimitsPolicy: + type: object + required: + - id + - type + - unit + - value + - status + - workspace_id + - organisation_id + - created_at + - last_updated_at + properties: + id: + type: string + format: uuid + description: Policy UUID + name: + type: string + nullable: true + description: Policy name + conditions: + type: array + items: + $ref: '#/components/schemas/Condition' + description: Array of conditions + group_by: + type: array + items: + $ref: '#/components/schemas/GroupBy' + description: Array of group by fields + type: + type: string + enum: [requests, tokens] + description: Policy type + unit: + type: string + enum: [rpm, rph, rpd] + description: Rate unit + value: + type: number + description: Rate limit value + status: + type: string + enum: [active, archived] + description: Policy status + workspace_id: + type: string + format: uuid + description: Workspace UUID + organisation_id: + type: string + format: uuid + description: Organization UUID + created_at: + type: string + format: date-time + description: Creation timestamp + last_updated_at: + type: string + format: date-time + description: Last update timestamp + + CreatePolicyResponse: + type: object + properties: + id: + type: string + format: uuid + description: Created policy UUID + object: + type: string + description: Resource type + example: policy_usage_limits + + UsageLimitsPolicyListResponse: + type: object + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/UsageLimitsPolicy' + total: + type: integer + description: Total number of policies + + UsageLimitsPolicyResponse: + allOf: + - $ref: '#/components/schemas/UsageLimitsPolicy' + - type: object + properties: + object: + type: string + example: policy_usage_limits + + RateLimitsPolicyListResponse: + type: object + properties: + object: + type: string + example: list + data: + type: array + items: + $ref: '#/components/schemas/RateLimitsPolicy' + total: + type: integer + description: Total number of policies + + RateLimitsPolicyResponse: + allOf: + - $ref: '#/components/schemas/RateLimitsPolicy' + - type: object + properties: + object: + type: string + example: policy_rate_limits + security: - Portkey-Key: [] From 0b2bb0741c8e9c3344bfc418cc32b018911ee387 Mon Sep 17 00:00:00 2001 From: sk-portkey Date: Tue, 18 Nov 2025 14:28:17 +0530 Subject: [PATCH 2/2] feat: usage and rate limit policies --- openapi.yaml | 2021 +++++++++++++++++++++++++------------------------- 1 file changed, 1009 insertions(+), 1012 deletions(-) diff --git a/openapi.yaml b/openapi.yaml index c1c6fe49..69bed1f7 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -17143,112 +17143,176 @@ paths: }) console.log(apiKey); - /analytics/graphs/requests: - servers: *ControlPlaneServers + /policies/usage-limits: + post: + tags: + - Usage Limits Policies + summary: Create Usage Limits Policy + description: Create a new usage limits policy to control total usage (cost or tokens) over a period. + operationId: createUsageLimitsPolicy + security: + - Portkey-Key: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateUsageLimitsPolicyRequest' + examples: + monthlyCostLimit: + summary: Monthly Cost Limit per API Key + value: + name: Monthly Cost Limit per API Key + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: api_key + type: cost + credit_limit: 1000.0 + alert_threshold: 800.0 + periodic_reset: monthly + tokenLimit: + summary: Token Limit per User + value: + name: Token Limit per User + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: metadata.user_id + type: tokens + credit_limit: 1000000 + periodic_reset: weekly + responses: + '200': + description: Policy created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + get: tags: - - Analytics > Graphs - summary: Get requests graph + - Usage Limits Policies + summary: List Usage Limits Policies + description: List all usage limits policies with optional filtering. + operationId: listUsageLimitsPolicies + security: + - Portkey-Key: [] parameters: - - $ref: "#/components/parameters/TimeOfGenerationMin" - - $ref: "#/components/parameters/TimeOfGenerationMax" - - $ref: "#/components/parameters/TotalUnitsMin" - - $ref: "#/components/parameters/TotalUnitsMax" - - $ref: "#/components/parameters/CostMin" - - $ref: "#/components/parameters/CostMax" - - $ref: "#/components/parameters/PromptTokenMin" - - $ref: "#/components/parameters/PromptTokenMax" - - $ref: "#/components/parameters/CompletionTokenMin" - - $ref: "#/components/parameters/CompletionTokenMax" - - $ref: "#/components/parameters/StatusCode" - - $ref: "#/components/parameters/WeightedFeedbackMin" - - $ref: "#/components/parameters/WeightedFeedbackMax" - - $ref: "#/components/parameters/VirtualKeys" - - $ref: "#/components/parameters/Configs" - - $ref: "#/components/parameters/WorkspaceSlug" - - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/Metadata" - - $ref: "#/components/parameters/AiOrgModel" - - $ref: "#/components/parameters/TraceId" - - $ref: "#/components/parameters/SpanId" - - $ref: "#/components/parameters/PromptSlug" + - $ref: '#/components/parameters/WorkspaceIdQuery' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active + - name: type + in: query + description: Filter by policy type + required: false + schema: + type: string + enum: [cost, tokens] + - $ref: '#/components/parameters/PageSize' + - $ref: '#/components/parameters/CurrentPage' responses: - "200": - description: OK - headers: - Content-Type: - schema: - type: string - example: application/json + '200': + description: List of usage limits policies content: application/json: schema: - type: object - properties: - summary: - type: object - properties: - total: - type: integer - description: Total requests across all data points - required: - - total - data_points: - type: array - items: - type: object - properties: - timestamp: - type: string - format: date-time - description: The timestamp for the data point bucket - total: - type: integer - description: Total requests for this data point bucket - required: - - timestamp - - total - description: An array of data points, each with a timestamp and metrics - object: - type: string - description: The type of object being returned - enum: [analytics-graph] - required: - - summary - - data_points - - object + $ref: '#/components/schemas/UsageLimitsPolicyListResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error - /analytics/graphs/cost: - servers: *ControlPlaneServers + /policies/usage-limits/{policyUsageLimitsId}: get: tags: - - Analytics > Graphs - summary: Get cost graph + - Usage Limits Policies + summary: Get Usage Limits Policy + description: Get a single usage limits policy by ID. + operationId: getUsageLimitsPolicy + security: + - Portkey-Key: [] parameters: - - $ref: "#/components/parameters/TimeOfGenerationMin" - - $ref: "#/components/parameters/TimeOfGenerationMax" - - $ref: "#/components/parameters/TotalUnitsMin" - - $ref: "#/components/parameters/TotalUnitsMax" - - $ref: "#/components/parameters/CostMin" - - $ref: "#/components/parameters/CostMax" - - $ref: "#/components/parameters/PromptTokenMin" - - $ref: "#/components/parameters/PromptTokenMax" - - $ref: "#/components/parameters/CompletionTokenMin" - - $ref: "#/components/parameters/CompletionTokenMax" - - $ref: "#/components/parameters/StatusCode" - - $ref: "#/components/parameters/WeightedFeedbackMin" - - $ref: "#/components/parameters/WeightedFeedbackMax" - - $ref: "#/components/parameters/VirtualKeys" - - $ref: "#/components/parameters/Configs" - - $ref: "#/components/parameters/WorkspaceSlug" - - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/Metadata" - - $ref: "#/components/parameters/AiOrgModel" - - $ref: "#/components/parameters/TraceId" - - $ref: "#/components/parameters/SpanId" - - $ref: "#/components/parameters/PromptSlug" + - $ref: '#/components/parameters/PolicyUsageLimitsId' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active + - name: include_usage + in: query + description: Include usage information for each value key + required: false + schema: + type: boolean + default: false responses: - "200": + '200': + description: Usage limits policy details + content: + application/json: + schema: + $ref: '#/components/schemas/UsageLimitsPolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + put: + tags: + - Usage Limits Policies + summary: Update Usage Limits Policy + description: Update an existing usage limits policy. + operationId: updateUsageLimitsPolicy + security: + - Portkey-Key: [] + parameters: + - $ref: '#/components/parameters/PolicyUsageLimitsId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateUsageLimitsPolicyRequest' + example: + credit_limit: 2000.0 + alert_threshold: 1500.0 + reset_usage_for_value: api-key-123 + responses: + '200': description: OK headers: Content-Type: @@ -17259,79 +17323,30 @@ paths: application/json: schema: type: object - properties: - summary: - type: object - properties: - total: - type: integer - description: Total cost in cents across all data points - avg: - type: integer - description: Average cost per request across all data points - required: - - total - - avg - data_points: - type: array - items: - type: object - properties: - timestamp: - type: string - format: date-time - description: The timestamp for the data point bucket - total: - type: integer - description: Total cost in cents for this data point bucket - avg: - type: integer - description: Average cost per request for this data point bucket - required: - - timestamp - - total - - avg - description: An array of data points, each with a timestamp and metrics - object: - type: string - description: The type of object being returned - enum: [analytics-graph] - required: - - summary - - data_points - - object + example: {} + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error - /analytics/graphs/latency: - servers: *ControlPlaneServers - get: + delete: tags: - - Analytics > Graphs - summary: Get latency graph + - Usage Limits Policies + summary: Delete Usage Limits Policy + description: Archive (soft delete) a usage limits policy. + operationId: deleteUsageLimitsPolicy + security: + - Portkey-Key: [] parameters: - - $ref: "#/components/parameters/TimeOfGenerationMin" - - $ref: "#/components/parameters/TimeOfGenerationMax" - - $ref: "#/components/parameters/TotalUnitsMin" - - $ref: "#/components/parameters/TotalUnitsMax" - - $ref: "#/components/parameters/CostMin" - - $ref: "#/components/parameters/CostMax" - - $ref: "#/components/parameters/PromptTokenMin" - - $ref: "#/components/parameters/PromptTokenMax" - - $ref: "#/components/parameters/CompletionTokenMin" - - $ref: "#/components/parameters/CompletionTokenMax" - - $ref: "#/components/parameters/StatusCode" - - $ref: "#/components/parameters/WeightedFeedbackMin" - - $ref: "#/components/parameters/WeightedFeedbackMax" - - $ref: "#/components/parameters/VirtualKeys" - - $ref: "#/components/parameters/Configs" - - $ref: "#/components/parameters/WorkspaceSlug" - - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/Metadata" - - $ref: "#/components/parameters/AiOrgModel" - - $ref: "#/components/parameters/TraceId" - - $ref: "#/components/parameters/SpanId" - - $ref: "#/components/parameters/PromptSlug" + - $ref: '#/components/parameters/PolicyUsageLimitsId' responses: - "200": + '200': description: OK headers: Content-Type: @@ -17342,179 +17357,184 @@ paths: application/json: schema: type: object - properties: - summary: - type: object - properties: - avg: - type: integer - description: Average latency in ms across all data points - p50: - type: integer - description: 50th percentile latency in ms across all data points - p90: - type: integer - description: 90th percentile latency in ms across all data points - p99: - type: integer - description: 99th percentile latency in ms across all data points + example: {} + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error - required: - - avg - - p50 - - p90 - - p99 - data_points: - type: array - items: - type: object - properties: - timestamp: - type: string - format: date-time - description: The timestamp for the data point bucket - avg: - type: integer - description: Average latency in ms for this data point bucket - p50: - type: integer - description: 50th percentile latency in ms for this data point bucket - p90: - type: integer - description: 90th percentile latency in ms for this data point bucket - p99: - type: integer - description: 99th percentile latency in ms for this data point bucket - required: - - timestamp - - avg - - p50 - - p90 - - p99 - description: An array of data points, each with a timestamp and metrics - object: - type: string - description: The type of object being returned - enum: [analytics-graph] - required: - - summary - - data_points - - object + /policies/rate-limits: + post: + tags: + - Rate Limits Policies + summary: Create Rate Limits Policy + description: Create a new rate limits policy to control the rate of requests or tokens consumed per minute, hour, or day. + operationId: createRateLimitsPolicy + security: + - Portkey-Key: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateRateLimitsPolicyRequest' + examples: + requestsPerMinute: + summary: 100 Requests per Minute per API Key + value: + name: 100 RPM per API Key + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: api_key + type: requests + unit: rpm + value: 100 + tokensPerHour: + summary: 10K Tokens per Hour per User + value: + name: 10K Tokens per Hour per User + conditions: + - key: workspace_id + value: workspace-123 + group_by: + - key: metadata.user_id + type: tokens + unit: rph + value: 10000 + responses: + '200': + description: Policy created successfully + content: + application/json: + schema: + $ref: '#/components/schemas/CreatePolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '500': + description: Server error - /analytics/graphs/tokens: - servers: *ControlPlaneServers get: tags: - - Analytics > Graphs - summary: Get tokens graph + - Rate Limits Policies + summary: List Rate Limits Policies + description: List all rate limits policies with optional filtering. + operationId: listRateLimitsPolicies + security: + - Portkey-Key: [] parameters: - - $ref: "#/components/parameters/TimeOfGenerationMin" - - $ref: "#/components/parameters/TimeOfGenerationMax" - - $ref: "#/components/parameters/TotalUnitsMin" - - $ref: "#/components/parameters/TotalUnitsMax" - - $ref: "#/components/parameters/CostMin" - - $ref: "#/components/parameters/CostMax" - - $ref: "#/components/parameters/PromptTokenMin" - - $ref: "#/components/parameters/PromptTokenMax" - - $ref: "#/components/parameters/CompletionTokenMin" - - $ref: "#/components/parameters/CompletionTokenMax" - - $ref: "#/components/parameters/StatusCode" - - $ref: "#/components/parameters/WeightedFeedbackMin" - - $ref: "#/components/parameters/WeightedFeedbackMax" - - $ref: "#/components/parameters/VirtualKeys" - - $ref: "#/components/parameters/Configs" - - $ref: "#/components/parameters/WorkspaceSlug" - - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/Metadata" - - $ref: "#/components/parameters/AiOrgModel" - - $ref: "#/components/parameters/TraceId" - - $ref: "#/components/parameters/SpanId" - - $ref: "#/components/parameters/PromptSlug" + - $ref: '#/components/parameters/WorkspaceIdQuery' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active + - name: type + in: query + description: Filter by policy type + required: false + schema: + type: string + enum: [requests, tokens] + - name: unit + in: query + description: Filter by rate unit + required: false + schema: + type: string + enum: [rpm, rph, rpd] + - $ref: '#/components/parameters/PageSize' + - $ref: '#/components/parameters/CurrentPage' responses: - "200": - description: OK - headers: - Content-Type: - schema: - type: string - example: application/json + '200': + description: List of rate limits policies content: application/json: schema: - type: object - properties: - summary: - type: object - properties: - total: - type: integer - description: Total tokens across all data points - avg: - type: integer - description: Average tokens per request across all data points - required: - - total - - avg - data_points: - type: array - items: - type: object - properties: - timestamp: - type: string - format: date-time - description: The timestamp for the data point bucket - total: - type: integer - description: Total tokens for this data point bucket - avg: - type: integer - description: Average tokens per request for this data point bucket - required: - - timestamp - - avg - - total - description: An array of data points, each with a timestamp and metrics - object: - type: string - description: The type of object being returned - enum: [analytics-graph] - required: - - summary - - data_points - - object + $ref: '#/components/schemas/RateLimitsPolicyListResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error - /analytics/graphs/users: - servers: *ControlPlaneServers + /policies/rate-limits/{rateLimitsPolicyId}: get: tags: - - Analytics > Graphs - summary: Get users graph. Returns unique user count across different time buckets + - Rate Limits Policies + summary: Get Rate Limits Policy + description: Get a single rate limits policy by ID. + operationId: getRateLimitsPolicy + security: + - Portkey-Key: [] parameters: - - $ref: "#/components/parameters/TimeOfGenerationMin" - - $ref: "#/components/parameters/TimeOfGenerationMax" - - $ref: "#/components/parameters/TotalUnitsMin" - - $ref: "#/components/parameters/TotalUnitsMax" - - $ref: "#/components/parameters/CostMin" - - $ref: "#/components/parameters/CostMax" - - $ref: "#/components/parameters/PromptTokenMin" - - $ref: "#/components/parameters/PromptTokenMax" - - $ref: "#/components/parameters/CompletionTokenMin" - - $ref: "#/components/parameters/CompletionTokenMax" - - $ref: "#/components/parameters/StatusCode" - - $ref: "#/components/parameters/WeightedFeedbackMin" - - $ref: "#/components/parameters/WeightedFeedbackMax" - - $ref: "#/components/parameters/VirtualKeys" - - $ref: "#/components/parameters/Configs" - - $ref: "#/components/parameters/WorkspaceSlug" - - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/Metadata" - - $ref: "#/components/parameters/AiOrgModel" - - $ref: "#/components/parameters/TraceId" - - $ref: "#/components/parameters/SpanId" - - $ref: "#/components/parameters/PromptSlug" + - $ref: '#/components/parameters/RateLimitsPolicyId' + - name: status + in: query + description: Filter by status + required: false + schema: + type: string + enum: [active, archived] + default: active responses: - "200": + '200': + description: Rate limits policy details + content: + application/json: + schema: + $ref: '#/components/schemas/RateLimitsPolicyResponse' + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + put: + tags: + - Rate Limits Policies + summary: Update Rate Limits Policy + description: Update an existing rate limits policy. + operationId: updateRateLimitsPolicy + security: + - Portkey-Key: [] + parameters: + - $ref: '#/components/parameters/RateLimitsPolicyId' + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateRateLimitsPolicyRequest' + example: + value: 200 + unit: rph + responses: + '200': description: OK headers: Content-Type: @@ -17525,46 +17545,58 @@ paths: application/json: schema: type: object - properties: - summary: - type: object - properties: - total: - type: integer - description: Total unique users across all data points - required: - - total - data_points: - type: array - items: - type: object - properties: - timestamp: - type: string - format: date-time - description: The timestamp for the data point bucket - total: - type: integer - description: Total unique users for this data point bucket - required: - - timestamp - - total - description: An array of data points, each with a timestamp and metrics - object: - type: string - description: The type of object being returned - enum: [analytics-graph] - required: - - summary - - data_points - - object + example: {} + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error + + delete: + tags: + - Rate Limits Policies + summary: Delete Rate Limits Policy + description: Delete a rate limits policy. + operationId: deleteRateLimitsPolicy + security: + - Portkey-Key: [] + parameters: + - $ref: '#/components/parameters/RateLimitsPolicyId' + responses: + '200': + description: OK + headers: + Content-Type: + schema: + type: string + example: application/json + content: + application/json: + schema: + type: object + example: {} + '400': + description: Bad request + '401': + description: Unauthorized + '403': + description: Forbidden + '404': + description: Policy not found + '500': + description: Server error - /analytics/graphs/users/requests: + /analytics/graphs/requests: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get users requests graph. Returns average requests per user across different time buckets + summary: Get requests graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -17607,16 +17639,8 @@ paths: total: type: integer description: Total requests across all data points - unique: - type: integer - description: Total unique users across all data points - avg: - type: integer - description: Average requests per user across all data points required: - total - - unique - - avg data_points: type: array items: @@ -17626,13 +17650,13 @@ paths: type: string format: date-time description: The timestamp for the data point bucket - avg: + total: type: integer - description: Average requests per user for this data point bucket + description: Total requests for this data point bucket required: - timestamp - - avg - description: An array of data points, each with a timestamp and metrics + - total + description: An array of data points, each with a timestamp and metrics object: type: string description: The type of object being returned @@ -17642,12 +17666,12 @@ paths: - data_points - object - /analytics/graphs/errors: + /analytics/graphs/cost: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get errors graph + summary: Get cost graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -17689,9 +17713,13 @@ paths: properties: total: type: integer - description: Total errors across all data points + description: Total cost in cents across all data points + avg: + type: integer + description: Average cost per request across all data points required: - total + - avg data_points: type: array items: @@ -17703,10 +17731,14 @@ paths: description: The timestamp for the data point bucket total: type: integer - description: Total errors this data point bucket + description: Total cost in cents for this data point bucket + avg: + type: integer + description: Average cost per request for this data point bucket required: - timestamp - total + - avg description: An array of data points, each with a timestamp and metrics object: type: string @@ -17717,12 +17749,12 @@ paths: - data_points - object - /analytics/graphs/errors/rate: + /analytics/graphs/latency: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get percentage error rate graph + summary: Get latency graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -17762,11 +17794,24 @@ paths: summary: type: object properties: - rate: + avg: type: integer - description: Percentage error rate across all data points + description: Average latency in ms across all data points + p50: + type: integer + description: 50th percentile latency in ms across all data points + p90: + type: integer + description: 90th percentile latency in ms across all data points + p99: + type: integer + description: 99th percentile latency in ms across all data points + required: - - rate + - avg + - p50 + - p90 + - p99 data_points: type: array items: @@ -17776,12 +17821,24 @@ paths: type: string format: date-time description: The timestamp for the data point bucket - rate: + avg: type: integer - description: Percentage error rate for this data point bucket + description: Average latency in ms for this data point bucket + p50: + type: integer + description: 50th percentile latency in ms for this data point bucket + p90: + type: integer + description: 90th percentile latency in ms for this data point bucket + p99: + type: integer + description: 99th percentile latency in ms for this data point bucket required: - timestamp - - rate + - avg + - p50 + - p90 + - p99 description: An array of data points, each with a timestamp and metrics object: type: string @@ -17792,12 +17849,12 @@ paths: - data_points - object - /analytics/graphs/errors/stacks: + /analytics/graphs/tokens: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get status code wise stacked error graph + summary: Get tokens graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -17839,9 +17896,13 @@ paths: properties: total: type: integer - description: Total errors across all data points + description: Total tokens across all data points + avg: + type: integer + description: Average tokens per request across all data points required: - total + - avg data_points: type: array items: @@ -17851,20 +17912,16 @@ paths: type: string format: date-time description: The timestamp for the data point bucket - stats: - type: array - items: - type: object - properties: - response_status_code: - type: integer - description: Response status code - count: - type: integer - description: Total occurences of this response status code + total: + type: integer + description: Total tokens for this data point bucket + avg: + type: integer + description: Average tokens per request for this data point bucket required: - timestamp - - stats + - avg + - total description: An array of data points, each with a timestamp and metrics object: type: string @@ -17875,12 +17932,12 @@ paths: - data_points - object - /analytics/graphs/errors/status-codes: + /analytics/graphs/users: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get status code wise grouped error graph. + summary: Get users graph. Returns unique user count across different time buckets parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -17920,29 +17977,26 @@ paths: summary: type: object properties: - total_errors: - type: integer - description: Total errors across all data points - unique_error_codes: + total: type: integer - description: Unique error codes across all data points + description: Total unique users across all data points required: - - total_errors - - unique_error_codes + - total data_points: type: array items: type: object properties: - status_code: - type: integer - description: Response status code - count: + timestamp: + type: string + format: date-time + description: The timestamp for the data point bucket + total: type: integer - description: Occurences of this response status code + description: Total unique users for this data point bucket required: - - status_code - - count + - timestamp + - total description: An array of data points, each with a timestamp and metrics object: type: string @@ -17953,12 +18007,12 @@ paths: - data_points - object - /analytics/graphs/requests/rescued: + /analytics/graphs/users/requests: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get retry and fallback rescued requests graph + summary: Get users requests graph. Returns average requests per user across different time buckets parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -17998,15 +18052,19 @@ paths: summary: type: object properties: - retry: + total: type: integer - description: Total requests rescued using retries across all data points - fallback: + description: Total requests across all data points + unique: type: integer - description: Total requests rescued using fallback across all data points + description: Total unique users across all data points + avg: + type: integer + description: Average requests per user across all data points required: - - retry - - fallback + - total + - unique + - avg data_points: type: array items: @@ -18016,24 +18074,12 @@ paths: type: string format: date-time description: The timestamp for the data point bucket - retry: - type: array - items: - type: object - properties: - retry_success_count: - type: integer - description: "Retry attempt count at which the request was rescued" - count: - type: integer - description: "Total requests rescued at this retry attempt" - fallback: + avg: type: integer - description: Total requests rescued using fallback for this data point bucket + description: Average requests per user for this data point bucket required: - timestamp - - retry - - fallback + - avg description: An array of data points, each with a timestamp and metrics object: type: string @@ -18044,12 +18090,12 @@ paths: - data_points - object - /analytics/graphs/cache/hit-rate: + /analytics/graphs/errors: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get cache hit rate graph + summary: Get errors graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18091,13 +18137,9 @@ paths: properties: total: type: integer - description: Total cache hits across all data points - rate: - type: integer - description: Percentage cache hit rate across all data points + description: Total errors across all data points required: - total - - rate data_points: type: array items: @@ -18107,28 +18149,12 @@ paths: type: string format: date-time description: The timestamp for the data point bucket - simple_hits: - type: integer - description: Total simple cache hits for this data point bucket - semantic_hits: - type: integer - description: Total semantic cache hits for this data point bucket - rate: - type: integer - description: Percentage cache hit rate for this data point bucket - cumulative_simple_cache_savings: - type: integer - description: Cumulative simple cache cost savings in cents based on all previous data point buckets and this bucket - cumulative_semantic_cache_savings: + total: type: integer - description: Cumulative semantic cache cost savings in cents based on all previous data point buckets and this bucket + description: Total errors this data point bucket required: - timestamp - - simple_hits - - semantic_hits - - rate - - cumulative_simple_cache_savings - - cumulative_semantic_cache_savings + - total description: An array of data points, each with a timestamp and metrics object: type: string @@ -18139,12 +18165,12 @@ paths: - data_points - object - /analytics/graphs/cache/latency: + /analytics/graphs/errors/rate: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get cache hit latency graph + summary: Get percentage error rate graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18183,6 +18209,12 @@ paths: properties: summary: type: object + properties: + rate: + type: integer + description: Percentage error rate across all data points + required: + - rate data_points: type: array items: @@ -18192,12 +18224,12 @@ paths: type: string format: date-time description: The timestamp for the data point bucket - avg: + rate: type: integer - description: Average latency (in ms) for cache hit for this data point bucket + description: Percentage error rate for this data point bucket required: - timestamp - - avg + - rate description: An array of data points, each with a timestamp and metrics object: type: string @@ -18208,12 +18240,12 @@ paths: - data_points - object - /analytics/graphs/feedbacks: + /analytics/graphs/errors/stacks: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get feedbacks graph + summary: Get status code wise stacked error graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18255,7 +18287,7 @@ paths: properties: total: type: integer - description: Total feedbacks across all data points + description: Total errors across all data points required: - total data_points: @@ -18265,14 +18297,22 @@ paths: properties: timestamp: type: string - format: date-time - description: The timestamp for the data point bucket - total: - type: integer - description: Total feedbacks for this data point bucket + format: date-time + description: The timestamp for the data point bucket + stats: + type: array + items: + type: object + properties: + response_status_code: + type: integer + description: Response status code + count: + type: integer + description: Total occurences of this response status code required: - timestamp - - total + - stats description: An array of data points, each with a timestamp and metrics object: type: string @@ -18283,12 +18323,12 @@ paths: - data_points - object - /analytics/graphs/feedbacks/scores: + /analytics/graphs/errors/status-codes: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get score-wise feedbacks distribution graph + summary: Get status code wise grouped error graph. parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18328,25 +18368,29 @@ paths: summary: type: object properties: - total: + total_errors: type: integer - description: Total feedbacks across all data points + description: Total errors across all data points + unique_error_codes: + type: integer + description: Unique error codes across all data points required: - - total + - total_errors + - unique_error_codes data_points: type: array items: type: object properties: - score: + status_code: type: integer - description: Feedback value for which total is calculated - total: + description: Response status code + count: type: integer - description: Total feedbacks for this feedback score + description: Occurences of this response status code required: - - score - - total + - status_code + - count description: An array of data points, each with a timestamp and metrics object: type: string @@ -18357,12 +18401,12 @@ paths: - data_points - object - /analytics/graphs/feedbacks/weighted: + /analytics/graphs/requests/rescued: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get weighted feedbacks graph. Weighted feedback is (value * score) + summary: Get retry and fallback rescued requests graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18402,11 +18446,15 @@ paths: summary: type: object properties: - avg: + retry: type: integer - description: Average weighted feedback across all data points + description: Total requests rescued using retries across all data points + fallback: + type: integer + description: Total requests rescued using fallback across all data points required: - - avg + - retry + - fallback data_points: type: array items: @@ -18416,12 +18464,24 @@ paths: type: string format: date-time description: The timestamp for the data point bucket - avg: + retry: + type: array + items: + type: object + properties: + retry_success_count: + type: integer + description: "Retry attempt count at which the request was rescued" + count: + type: integer + description: "Total requests rescued at this retry attempt" + fallback: type: integer - description: Average weighted feedback for this data point bucket + description: Total requests rescued using fallback for this data point bucket required: - timestamp - - avg + - retry + - fallback description: An array of data points, each with a timestamp and metrics object: type: string @@ -18432,12 +18492,12 @@ paths: - data_points - object - /analytics/graphs/feedbacks/ai-models: + /analytics/graphs/cache/hit-rate: servers: *ControlPlaneServers get: tags: - Analytics > Graphs - summary: Get feedbacks per ai_models graph + summary: Get cache hit rate graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18476,24 +18536,47 @@ paths: properties: summary: type: object + properties: + total: + type: integer + description: Total cache hits across all data points + rate: + type: integer + description: Percentage cache hit rate across all data points + required: + - total + - rate data_points: type: array items: type: object properties: - ai_model: + timestamp: type: string - description: AI model for which feedback data is calculated - total: + format: date-time + description: The timestamp for the data point bucket + simple_hits: type: integer - description: Total feedbacks for this ai_model requests - avg_weighted_feedback: + description: Total simple cache hits for this data point bucket + semantic_hits: type: integer - description: Average weighted feedback for this ai_model requests + description: Total semantic cache hits for this data point bucket + rate: + type: integer + description: Percentage cache hit rate for this data point bucket + cumulative_simple_cache_savings: + type: integer + description: Cumulative simple cache cost savings in cents based on all previous data point buckets and this bucket + cumulative_semantic_cache_savings: + type: integer + description: Cumulative semantic cache cost savings in cents based on all previous data point buckets and this bucket required: - - ai_model - - total - - avg_weighted_feedback + - timestamp + - simple_hits + - semantic_hits + - rate + - cumulative_simple_cache_savings + - cumulative_semantic_cache_savings description: An array of data points, each with a timestamp and metrics object: type: string @@ -18504,12 +18587,12 @@ paths: - data_points - object - /analytics/summary/cache: + /analytics/graphs/cache/latency: servers: *ControlPlaneServers get: tags: - - Analytics > Summary - summary: Get cache summary data for the selected time period + - Analytics > Graphs + summary: Get cache hit latency graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18548,33 +18631,37 @@ paths: properties: summary: type: object - properties: - hits: - type: integer - description: Total cache hits - avg_latency: - type: integer - description: Average latency for a cache hit - total_requests: - type: integer - description: Total requests - cache_speedup: - type: integer - description: Percentage speedup for cache hits compared to non cache hit requests + data_points: + type: array + items: + type: object + properties: + timestamp: + type: string + format: date-time + description: The timestamp for the data point bucket + avg: + type: integer + description: Average latency (in ms) for cache hit for this data point bucket + required: + - timestamp + - avg + description: An array of data points, each with a timestamp and metrics object: type: string description: The type of object being returned - enum: [analytics-summary] + enum: [analytics-graph] required: - summary + - data_points - object - /analytics/groups/users: + /analytics/graphs/feedbacks: servers: *ControlPlaneServers get: tags: - - Analytics > Groups - summary: Get metadata users grouped data. + - Analytics > Graphs + summary: Get feedbacks graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18593,8 +18680,6 @@ paths: - $ref: "#/components/parameters/Configs" - $ref: "#/components/parameters/WorkspaceSlug" - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/CurrentPage" - - $ref: "#/components/parameters/PageSize" - $ref: "#/components/parameters/Metadata" - $ref: "#/components/parameters/AiOrgModel" - $ref: "#/components/parameters/TraceId" @@ -18613,41 +18698,45 @@ paths: schema: type: object properties: - object: - type: string - enum: [list] - total: - type: integer - description: Total records present across all pages - data: + summary: + type: object + properties: + total: + type: integer + description: Total feedbacks across all data points + required: + - total + data_points: type: array items: type: object properties: - user: - type: string - description: The user for which the data is calculated - requests: - type: string - description: Total requests made by this user - cost: - type: string - description: Total cost in cents for the requests made by this user - object: + timestamp: type: string - description: The type of object being returned - enum: [analytics-group] + format: date-time + description: The timestamp for the data point bucket + total: + type: integer + description: Total feedbacks for this data point bucket + required: + - timestamp + - total + description: An array of data points, each with a timestamp and metrics + object: + type: string + description: The type of object being returned + enum: [analytics-graph] required: - - total + - summary + - data_points - object - - data - /analytics/groups/ai-models: + /analytics/graphs/feedbacks/scores: servers: *ControlPlaneServers get: tags: - - Analytics > Groups - summary: Get ai model grouped data. + - Analytics > Graphs + summary: Get score-wise feedbacks distribution graph parameters: - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" @@ -18666,8 +18755,6 @@ paths: - $ref: "#/components/parameters/Configs" - $ref: "#/components/parameters/WorkspaceSlug" - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/CurrentPage" - - $ref: "#/components/parameters/PageSize" - $ref: "#/components/parameters/Metadata" - $ref: "#/components/parameters/AiOrgModel" - $ref: "#/components/parameters/TraceId" @@ -18686,44 +18773,45 @@ paths: schema: type: object properties: - object: - type: string - enum: [list] - total: - type: integer - description: Total records present across all pages - data: + summary: + type: object + properties: + total: + type: integer + description: Total feedbacks across all data points + required: + - total + data_points: type: array items: type: object properties: - ai_model: - type: string - description: The ai model for which the data is calculated - requests: - type: string - description: Total requests made for this ai model - object: - type: string - description: The type of object being returned - enum: [analytics-group] + score: + type: integer + description: Feedback value for which total is calculated + total: + type: integer + description: Total feedbacks for this feedback score + required: + - score + - total + description: An array of data points, each with a timestamp and metrics + object: + type: string + description: The type of object being returned + enum: [analytics-graph] required: - - total + - summary + - data_points - object - - data - /analytics/groups/metadata/{metadataKey}: + /analytics/graphs/feedbacks/weighted: servers: *ControlPlaneServers get: tags: - - Analytics > Groups - summary: Get metadata key based grouped data. + - Analytics > Graphs + summary: Get weighted feedbacks graph. Weighted feedback is (value * score) parameters: - - name: metadataKey - in: path - schema: - type: string - required: true - $ref: "#/components/parameters/TimeOfGenerationMin" - $ref: "#/components/parameters/TimeOfGenerationMax" - $ref: "#/components/parameters/TotalUnitsMin" @@ -18741,8 +18829,6 @@ paths: - $ref: "#/components/parameters/Configs" - $ref: "#/components/parameters/WorkspaceSlug" - $ref: "#/components/parameters/ApiKeyIds" - - $ref: "#/components/parameters/CurrentPage" - - $ref: "#/components/parameters/PageSize" - $ref: "#/components/parameters/Metadata" - $ref: "#/components/parameters/AiOrgModel" - $ref: "#/components/parameters/TraceId" @@ -18761,251 +18847,70 @@ paths: schema: type: object properties: - object: - type: string - enum: [list] - total: - type: integer - description: Total records present across all pages - data: + summary: + type: object + properties: + avg: + type: integer + description: Average weighted feedback across all data points + required: + - avg + data_points: type: array items: type: object properties: - metadata_value: - type: string - description: Value of the metadata on which grouping has been done - requests: - type: integer - description: Total requests made with this metadata - cost: - type: integer - description: Total cost for all requests made with this metadata - avg_tokens: - type: integer - description: Average tokens per request for all requests made with this metadata - avg_weighted_feedback: - type: integer - description: Average weighted feedback for all requests made with this metadata - requests_with_feedback: - type: integer - description: Total requests with feedback - last_seen: + timestamp: type: string format: date-time - description: The last seen timestamp for this metadata - object: - type: string - description: The type of object being returned - enum: [analytics-group] - required: - - total - - object - - data - - /policies/usage-limits: - post: - tags: - - Usage Limits Policies - summary: Create Usage Limits Policy - description: Create a new usage limits policy to control total usage (cost or tokens) over a period. - operationId: createUsageLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateUsageLimitsPolicyRequest' - examples: - monthlyCostLimit: - summary: Monthly Cost Limit per API Key - value: - name: Monthly Cost Limit per API Key - conditions: - - key: workspace_id - value: workspace-123 - group_by: - - key: api_key - type: cost - credit_limit: 1000.0 - alert_threshold: 800.0 - periodic_reset: monthly - tokenLimit: - summary: Token Limit per User - value: - name: Token Limit per User - conditions: - - key: workspace_id - value: workspace-123 - group_by: - - key: metadata.user_id - type: tokens - credit_limit: 1000000 - periodic_reset: weekly - responses: - '200': - description: Policy created successfully - content: - application/json: - schema: - $ref: '#/components/schemas/CreatePolicyResponse' - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error - - get: - tags: - - Usage Limits Policies - summary: List Usage Limits Policies - description: List all usage limits policies with optional filtering. - operationId: listUsageLimitsPolicies - security: - - BearerAuth: [] - - ApiKeyAuth: [] - parameters: - - $ref: '#/components/parameters/WorkspaceIdQuery' - - name: status - in: query - description: Filter by status - required: false - schema: - type: string - enum: [active, archived] - default: active - - name: type - in: query - description: Filter by policy type - required: false - schema: - type: string - enum: [cost, tokens] - - $ref: '#/components/parameters/PageSize' - - $ref: '#/components/parameters/CurrentPage' - responses: - '200': - description: List of usage limits policies - content: - application/json: - schema: - $ref: '#/components/schemas/UsageLimitsPolicyListResponse' - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error - - /policies/usage-limits/{policyUsageLimitsId}: - get: - tags: - - Usage Limits Policies - summary: Get Usage Limits Policy - description: Get a single usage limits policy by ID. - operationId: getUsageLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] - parameters: - - $ref: '#/components/parameters/PolicyUsageLimitsId' - - name: status - in: query - description: Filter by status - required: false - schema: - type: string - enum: [active, archived] - default: active - - name: include_usage - in: query - description: Include usage information for each value key - required: false - schema: - type: boolean - default: false - responses: - '200': - description: Usage limits policy details - content: - application/json: - schema: - $ref: '#/components/schemas/UsageLimitsPolicyResponse' - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error - - put: - tags: - - Usage Limits Policies - summary: Update Usage Limits Policy - description: Update an existing usage limits policy. - operationId: updateUsageLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] - parameters: - - $ref: '#/components/parameters/PolicyUsageLimitsId' - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateUsageLimitsPolicyRequest' - example: - credit_limit: 2000.0 - alert_threshold: 1500.0 - reset_usage_for_value: api-key-123 - responses: - '200': - description: Policy updated successfully - content: - application/json: - schema: - $ref: '#/components/schemas/SuccessResponse' - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error + description: The timestamp for the data point bucket + avg: + type: integer + description: Average weighted feedback for this data point bucket + required: + - timestamp + - avg + description: An array of data points, each with a timestamp and metrics + object: + type: string + description: The type of object being returned + enum: [analytics-graph] + required: + - summary + - data_points + - object - delete: + /analytics/graphs/feedbacks/ai-models: + servers: *ControlPlaneServers + get: tags: - - Usage Limits Policies - summary: Delete Usage Limits Policy - description: Archive (soft delete) a usage limits policy. - operationId: deleteUsageLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] + - Analytics > Graphs + summary: Get feedbacks per ai_models graph parameters: - - $ref: '#/components/parameters/PolicyUsageLimitsId' + - $ref: "#/components/parameters/TimeOfGenerationMin" + - $ref: "#/components/parameters/TimeOfGenerationMax" + - $ref: "#/components/parameters/TotalUnitsMin" + - $ref: "#/components/parameters/TotalUnitsMax" + - $ref: "#/components/parameters/CostMin" + - $ref: "#/components/parameters/CostMax" + - $ref: "#/components/parameters/PromptTokenMin" + - $ref: "#/components/parameters/PromptTokenMax" + - $ref: "#/components/parameters/CompletionTokenMin" + - $ref: "#/components/parameters/CompletionTokenMax" + - $ref: "#/components/parameters/StatusCode" + - $ref: "#/components/parameters/WeightedFeedbackMin" + - $ref: "#/components/parameters/WeightedFeedbackMax" + - $ref: "#/components/parameters/VirtualKeys" + - $ref: "#/components/parameters/Configs" + - $ref: "#/components/parameters/WorkspaceSlug" + - $ref: "#/components/parameters/ApiKeyIds" + - $ref: "#/components/parameters/Metadata" + - $ref: "#/components/parameters/AiOrgModel" + - $ref: "#/components/parameters/TraceId" + - $ref: "#/components/parameters/SpanId" + - $ref: "#/components/parameters/PromptSlug" responses: - '200': + "200": description: OK headers: Content-Type: @@ -19016,188 +18921,208 @@ paths: application/json: schema: type: object - example: {} - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error - - /policies/rate-limits: - post: - tags: - - Rate Limits Policies - summary: Create Rate Limits Policy - description: Create a new rate limits policy to control the rate of requests or tokens consumed per minute, hour, or day. - operationId: createRateLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateRateLimitsPolicyRequest' - examples: - requestsPerMinute: - summary: 100 Requests per Minute per API Key - value: - name: 100 RPM per API Key - conditions: - - key: workspace_id - value: workspace-123 - group_by: - - key: api_key - type: requests - unit: rpm - value: 100 - tokensPerHour: - summary: 10K Tokens per Hour per User - value: - name: 10K Tokens per Hour per User - conditions: - - key: workspace_id - value: workspace-123 - group_by: - - key: metadata.user_id - type: tokens - unit: rph - value: 10000 - responses: - '200': - description: Policy created successfully - content: - application/json: - schema: - $ref: '#/components/schemas/CreatePolicyResponse' - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '500': - description: Server error + properties: + summary: + type: object + data_points: + type: array + items: + type: object + properties: + ai_model: + type: string + description: AI model for which feedback data is calculated + total: + type: integer + description: Total feedbacks for this ai_model requests + avg_weighted_feedback: + type: integer + description: Average weighted feedback for this ai_model requests + required: + - ai_model + - total + - avg_weighted_feedback + description: An array of data points, each with a timestamp and metrics + object: + type: string + description: The type of object being returned + enum: [analytics-graph] + required: + - summary + - data_points + - object + /analytics/summary/cache: + servers: *ControlPlaneServers get: tags: - - Rate Limits Policies - summary: List Rate Limits Policies - description: List all rate limits policies with optional filtering. - operationId: listRateLimitsPolicies - security: - - BearerAuth: [] - - ApiKeyAuth: [] + - Analytics > Summary + summary: Get cache summary data for the selected time period parameters: - - $ref: '#/components/parameters/WorkspaceIdQuery' - - name: status - in: query - description: Filter by status - required: false - schema: - type: string - enum: [active, archived] - default: active - - name: type - in: query - description: Filter by policy type - required: false - schema: - type: string - enum: [requests, tokens] - - name: unit - in: query - description: Filter by rate unit - required: false - schema: - type: string - enum: [rpm, rph, rpd] - - $ref: '#/components/parameters/PageSize' - - $ref: '#/components/parameters/CurrentPage' + - $ref: "#/components/parameters/TimeOfGenerationMin" + - $ref: "#/components/parameters/TimeOfGenerationMax" + - $ref: "#/components/parameters/TotalUnitsMin" + - $ref: "#/components/parameters/TotalUnitsMax" + - $ref: "#/components/parameters/CostMin" + - $ref: "#/components/parameters/CostMax" + - $ref: "#/components/parameters/PromptTokenMin" + - $ref: "#/components/parameters/PromptTokenMax" + - $ref: "#/components/parameters/CompletionTokenMin" + - $ref: "#/components/parameters/CompletionTokenMax" + - $ref: "#/components/parameters/StatusCode" + - $ref: "#/components/parameters/WeightedFeedbackMin" + - $ref: "#/components/parameters/WeightedFeedbackMax" + - $ref: "#/components/parameters/VirtualKeys" + - $ref: "#/components/parameters/Configs" + - $ref: "#/components/parameters/WorkspaceSlug" + - $ref: "#/components/parameters/ApiKeyIds" + - $ref: "#/components/parameters/Metadata" + - $ref: "#/components/parameters/AiOrgModel" + - $ref: "#/components/parameters/TraceId" + - $ref: "#/components/parameters/SpanId" + - $ref: "#/components/parameters/PromptSlug" responses: - '200': - description: List of rate limits policies + "200": + description: OK + headers: + Content-Type: + schema: + type: string + example: application/json content: application/json: schema: - $ref: '#/components/schemas/RateLimitsPolicyListResponse' - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error + type: object + properties: + summary: + type: object + properties: + hits: + type: integer + description: Total cache hits + avg_latency: + type: integer + description: Average latency for a cache hit + total_requests: + type: integer + description: Total requests + cache_speedup: + type: integer + description: Percentage speedup for cache hits compared to non cache hit requests + object: + type: string + description: The type of object being returned + enum: [analytics-summary] + required: + - summary + - object - /policies/rate-limits/{rateLimitsPolicyId}: + /analytics/groups/users: + servers: *ControlPlaneServers get: tags: - - Rate Limits Policies - summary: Get Rate Limits Policy - description: Get a single rate limits policy by ID. - operationId: getRateLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] - parameters: - - $ref: '#/components/parameters/RateLimitsPolicyId' - - name: status - in: query - description: Filter by status - required: false - schema: - type: string - enum: [active, archived] - default: active + - Analytics > Groups + summary: Get metadata users grouped data. + parameters: + - $ref: "#/components/parameters/TimeOfGenerationMin" + - $ref: "#/components/parameters/TimeOfGenerationMax" + - $ref: "#/components/parameters/TotalUnitsMin" + - $ref: "#/components/parameters/TotalUnitsMax" + - $ref: "#/components/parameters/CostMin" + - $ref: "#/components/parameters/CostMax" + - $ref: "#/components/parameters/PromptTokenMin" + - $ref: "#/components/parameters/PromptTokenMax" + - $ref: "#/components/parameters/CompletionTokenMin" + - $ref: "#/components/parameters/CompletionTokenMax" + - $ref: "#/components/parameters/StatusCode" + - $ref: "#/components/parameters/WeightedFeedbackMin" + - $ref: "#/components/parameters/WeightedFeedbackMax" + - $ref: "#/components/parameters/VirtualKeys" + - $ref: "#/components/parameters/Configs" + - $ref: "#/components/parameters/WorkspaceSlug" + - $ref: "#/components/parameters/ApiKeyIds" + - $ref: "#/components/parameters/CurrentPage" + - $ref: "#/components/parameters/PageSize" + - $ref: "#/components/parameters/Metadata" + - $ref: "#/components/parameters/AiOrgModel" + - $ref: "#/components/parameters/TraceId" + - $ref: "#/components/parameters/SpanId" + - $ref: "#/components/parameters/PromptSlug" responses: - '200': - description: Rate limits policy details + "200": + description: OK + headers: + Content-Type: + schema: + type: string + example: application/json content: application/json: schema: - $ref: '#/components/schemas/RateLimitsPolicyResponse' - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error + type: object + properties: + object: + type: string + enum: [list] + total: + type: integer + description: Total records present across all pages + data: + type: array + items: + type: object + properties: + user: + type: string + description: The user for which the data is calculated + requests: + type: string + description: Total requests made by this user + cost: + type: string + description: Total cost in cents for the requests made by this user + object: + type: string + description: The type of object being returned + enum: [analytics-group] + required: + - total + - object + - data - put: + /analytics/groups/ai-models: + servers: *ControlPlaneServers + get: tags: - - Rate Limits Policies - summary: Update Rate Limits Policy - description: Update an existing rate limits policy. - operationId: updateRateLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] + - Analytics > Groups + summary: Get ai model grouped data. parameters: - - $ref: '#/components/parameters/RateLimitsPolicyId' - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateRateLimitsPolicyRequest' - example: - value: 200 - unit: rph + - $ref: "#/components/parameters/TimeOfGenerationMin" + - $ref: "#/components/parameters/TimeOfGenerationMax" + - $ref: "#/components/parameters/TotalUnitsMin" + - $ref: "#/components/parameters/TotalUnitsMax" + - $ref: "#/components/parameters/CostMin" + - $ref: "#/components/parameters/CostMax" + - $ref: "#/components/parameters/PromptTokenMin" + - $ref: "#/components/parameters/PromptTokenMax" + - $ref: "#/components/parameters/CompletionTokenMin" + - $ref: "#/components/parameters/CompletionTokenMax" + - $ref: "#/components/parameters/StatusCode" + - $ref: "#/components/parameters/WeightedFeedbackMin" + - $ref: "#/components/parameters/WeightedFeedbackMax" + - $ref: "#/components/parameters/VirtualKeys" + - $ref: "#/components/parameters/Configs" + - $ref: "#/components/parameters/WorkspaceSlug" + - $ref: "#/components/parameters/ApiKeyIds" + - $ref: "#/components/parameters/CurrentPage" + - $ref: "#/components/parameters/PageSize" + - $ref: "#/components/parameters/Metadata" + - $ref: "#/components/parameters/AiOrgModel" + - $ref: "#/components/parameters/TraceId" + - $ref: "#/components/parameters/SpanId" + - $ref: "#/components/parameters/PromptSlug" responses: - '200': + "200": description: OK headers: Content-Type: @@ -19208,31 +19133,71 @@ paths: application/json: schema: type: object - example: {} - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error + properties: + object: + type: string + enum: [list] + total: + type: integer + description: Total records present across all pages + data: + type: array + items: + type: object + properties: + ai_model: + type: string + description: The ai model for which the data is calculated + requests: + type: string + description: Total requests made for this ai model + object: + type: string + description: The type of object being returned + enum: [analytics-group] + required: + - total + - object + - data - delete: + /analytics/groups/metadata/{metadataKey}: + servers: *ControlPlaneServers + get: tags: - - Rate Limits Policies - summary: Delete Rate Limits Policy - description: Delete a rate limits policy. - operationId: deleteRateLimitsPolicy - security: - - BearerAuth: [] - - ApiKeyAuth: [] + - Analytics > Groups + summary: Get metadata key based grouped data. parameters: - - $ref: '#/components/parameters/RateLimitsPolicyId' + - name: metadataKey + in: path + schema: + type: string + required: true + - $ref: "#/components/parameters/TimeOfGenerationMin" + - $ref: "#/components/parameters/TimeOfGenerationMax" + - $ref: "#/components/parameters/TotalUnitsMin" + - $ref: "#/components/parameters/TotalUnitsMax" + - $ref: "#/components/parameters/CostMin" + - $ref: "#/components/parameters/CostMax" + - $ref: "#/components/parameters/PromptTokenMin" + - $ref: "#/components/parameters/PromptTokenMax" + - $ref: "#/components/parameters/CompletionTokenMin" + - $ref: "#/components/parameters/CompletionTokenMax" + - $ref: "#/components/parameters/StatusCode" + - $ref: "#/components/parameters/WeightedFeedbackMin" + - $ref: "#/components/parameters/WeightedFeedbackMax" + - $ref: "#/components/parameters/VirtualKeys" + - $ref: "#/components/parameters/Configs" + - $ref: "#/components/parameters/WorkspaceSlug" + - $ref: "#/components/parameters/ApiKeyIds" + - $ref: "#/components/parameters/CurrentPage" + - $ref: "#/components/parameters/PageSize" + - $ref: "#/components/parameters/Metadata" + - $ref: "#/components/parameters/AiOrgModel" + - $ref: "#/components/parameters/TraceId" + - $ref: "#/components/parameters/SpanId" + - $ref: "#/components/parameters/PromptSlug" responses: - '200': + "200": description: OK headers: Content-Type: @@ -19243,17 +19208,49 @@ paths: application/json: schema: type: object - example: {} - '400': - description: Bad request - '401': - description: Unauthorized - '403': - description: Forbidden - '404': - description: Policy not found - '500': - description: Server error + properties: + object: + type: string + enum: [list] + total: + type: integer + description: Total records present across all pages + data: + type: array + items: + type: object + properties: + metadata_value: + type: string + description: Value of the metadata on which grouping has been done + requests: + type: integer + description: Total requests made with this metadata + cost: + type: integer + description: Total cost for all requests made with this metadata + avg_tokens: + type: integer + description: Average tokens per request for all requests made with this metadata + avg_weighted_feedback: + type: integer + description: Average weighted feedback for all requests made with this metadata + requests_with_feedback: + type: integer + description: Total requests with feedback + last_seen: + type: string + format: date-time + description: The last seen timestamp for this metadata + object: + type: string + description: The type of object being returned + enum: [analytics-group] + required: + - total + - object + - data + components: securitySchemes: