Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
257 changes: 249 additions & 8 deletions spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -8529,7 +8529,7 @@
"tags": [
"orgs"
],
"summary": "Register a new S3 dataset that Zoo can assume into on behalf of the caller's org.",
"summary": "Register a new org dataset.",
"description": "If the dataset lives in S3, call `/org/dataset/s3/policies` first so you can generate the trust, permission, and bucket policies scoped to your dataset before invoking this endpoint.",
"operationId": "create_org_dataset",
"requestBody": {
Expand Down Expand Up @@ -9891,6 +9891,218 @@
}
}
},
"/org/datasets/{id}/uploads": {
"post": {
"tags": [
"orgs"
],
"summary": "Upload source files into a Zoo-managed dataset.",
"description": "This endpoint accepts `multipart/form-data` where each file part becomes a source object in the dataset. Paths are normalized and must be relative.",
"operationId": "upload_org_dataset_files",
"parameters": [
{
"in": "path",
"name": "id",
"description": "The identifier.",
"required": true,
"schema": {
"$ref": "#/components/schemas/Uuid"
}
}
],
"requestBody": {
"content": {
"multipart/form-data": {
"schema": {
"type": "string",
"format": "binary"
}
}
Comment on lines +9914 to +9920
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The multipart/form-data schema is incorrectly defined for multiple file uploads. The description states "each file part becomes a source object" (line 9900), and the response includes uploaded_files (plural), but the schema is defined as a single string with binary format. This will not properly handle multiple file parts in a multipart request.

Fix: Change the schema to properly accept multiple files:

"schema": {
  "type": "object",
  "additionalProperties": {
    "type": "string",
    "format": "binary"
  }
}

Or use an array if files share the same field name:

"schema": {
  "type": "array",
  "items": {
    "type": "string",
    "format": "binary"
  }
}
Suggested change
"content": {
"multipart/form-data": {
"schema": {
"type": "string",
"format": "binary"
}
}
"content": {
"multipart/form-data": {
"schema": {
"type": "object",
"additionalProperties": {
"type": "string",
"format": "binary"
}
}
}

Spotted by Graphite Agent

Fix in Graphite


Is this helpful? React 👍 or 👎 to let us know.

},
"required": true
},
"responses": {
"202": {
"description": "successfully enqueued operation",
"headers": {
"Access-Control-Allow-Credentials": {
"description": "Access-Control-Allow-Credentials header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Access-Control-Allow-Headers": {
"description": "Access-Control-Allow-Headers header. This is a comma-separated list of headers.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Access-Control-Allow-Methods": {
"description": "Access-Control-Allow-Methods header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Access-Control-Allow-Origin": {
"description": "Access-Control-Allow-Origin header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Content-Location": {
"description": "The Content-Location header for responses that are not the final destination. This is used to indicate where the resource can be found, when it is finished.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Location": {
"description": "The location header for redirects and letting users know if there is a websocket they can listen to for status updates on their operation.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Set-Cookie": {
"description": "Set-Cookie header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"X-Api-Call-Id": {
"description": "ID for this request. We return it so that users can report this to us and help us debug their problems.",
"style": "simple",
"required": true,
"schema": {
"type": "string"
}
}
},
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/UploadOrgDatasetFilesResponse"
}
}
}
},
"4XX": {
"$ref": "#/components/responses/Error"
},
"5XX": {
"$ref": "#/components/responses/Error"
}
}
},
"options": {
"tags": [
"hidden"
],
"summary": "OPTIONS endpoint.",
"description": "This is necessary for some preflight requests, specifically POST, PUT, and DELETE.",
"operationId": "options_upload_org_dataset_files",
"parameters": [
{
"in": "path",
"name": "id",
"description": "The identifier.",
"required": true,
"schema": {
"$ref": "#/components/schemas/Uuid"
}
}
],
"responses": {
"204": {
"description": "successful operation, no content",
"headers": {
"Access-Control-Allow-Credentials": {
"description": "Access-Control-Allow-Credentials header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Access-Control-Allow-Headers": {
"description": "Access-Control-Allow-Headers header. This is a comma-separated list of headers.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Access-Control-Allow-Methods": {
"description": "Access-Control-Allow-Methods header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Access-Control-Allow-Origin": {
"description": "Access-Control-Allow-Origin header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Content-Location": {
"description": "The Content-Location header for responses that are not the final destination. This is used to indicate where the resource can be found, when it is finished.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Location": {
"description": "The location header for redirects and letting users know if there is a websocket they can listen to for status updates on their operation.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"Set-Cookie": {
"description": "Set-Cookie header.",
"style": "simple",
"schema": {
"nullable": true,
"type": "string"
}
},
"X-Api-Call-Id": {
"description": "ID for this request. We return it so that users can report this to us and help us debug their problems.",
"style": "simple",
"required": true,
"schema": {
"type": "string"
}
}
}
},
"4XX": {
"$ref": "#/components/responses/Error"
},
"5XX": {
"$ref": "#/components/responses/Error"
}
}
}
},
"/org/members": {
"get": {
"tags": [
Expand Down Expand Up @@ -32317,7 +32529,7 @@
},
"output_file": {
"nullable": true,
"description": "The output file. In the case of TextToCad this is a link to a file in a GCP bucket.",
"description": "The output directory reference for generated files. Stored as `blob://bucket/key` for new rows; legacy rows may contain a key-only value.",
"type": "string"
},
"project_name": {
Expand Down Expand Up @@ -41103,7 +41315,7 @@
},
"output_path": {
"nullable": true,
"description": "Path where the processed file output is stored, when available.",
"description": "Location reference where the processed file output is stored, when available. New records use `blob://bucket/key`; legacy records may still contain key-only values.",
"type": "string"
},
"started_at": {
Expand Down Expand Up @@ -41356,7 +41568,8 @@
"type": "object",
"properties": {
"access_role_arn": {
"description": "Identity we assume when accessing the dataset. Must be configured with the org's `aws_external_id` per AWS confused deputy guidance. See <https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html>.",
"nullable": true,
"description": "Identity we assume when accessing the dataset. Required when `provider` is `s3`; ignored for Zoo-managed datasets. Must be configured with the org's `aws_external_id` per AWS confused deputy guidance. See <https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html>.",
"type": "string"
},
"provider": {
Expand All @@ -41368,14 +41581,13 @@
]
},
"uri": {
"description": "Fully-qualified URI for the dataset contents.",
"nullable": true,
"description": "Fully-qualified URI for the dataset contents. Required when `provider` is `s3`; ignored for Zoo-managed datasets.",
"type": "string"
}
},
"required": [
"access_role_arn",
"provider",
"uri"
"provider"
]
},
"OrgDatasetStatus": {
Expand Down Expand Up @@ -44306,6 +44518,13 @@
"enum": [
"s3"
]
},
{
"description": "Zoo-managed dataset storage backed by the API's internal object store.",
"type": "string",
"enum": [
"zoo_managed"
]
}
]
},
Expand Down Expand Up @@ -47889,6 +48108,28 @@
"image"
]
},
"UploadOrgDatasetFilesResponse": {
"description": "Response payload for uploading files into a Zoo-managed dataset.",
"type": "object",
"properties": {
"queued_conversions": {
"description": "Number of conversion jobs newly queued.",
"type": "integer",
"format": "uint",
"minimum": 0
},
"uploaded_files": {
"description": "Number of files accepted and stored.",
"type": "integer",
"format": "uint",
"minimum": 0
}
},
"required": [
"queued_conversions",
"uploaded_files"
]
},
"User": {
"description": "A user.",
"type": "object",
Expand Down
Loading