From 99fbbf2c03e9b7652c102f85a908f748f87aa7da Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Fri, 30 Jan 2026 21:35:33 -0500 Subject: [PATCH 1/2] added read_binary_file tool, piggybacking on the support that now exists for embedded resources --- .../__tests__/read-binary-file.test.ts | 232 ++++++++++++++++++ src/filesystem/index.ts | 106 +++++++- 2 files changed, 334 insertions(+), 4 deletions(-) create mode 100644 src/filesystem/__tests__/read-binary-file.test.ts diff --git a/src/filesystem/__tests__/read-binary-file.test.ts b/src/filesystem/__tests__/read-binary-file.test.ts new file mode 100644 index 0000000000..ef85731c33 --- /dev/null +++ b/src/filesystem/__tests__/read-binary-file.test.ts @@ -0,0 +1,232 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs/promises'; +import * as path from 'path'; +import * as os from 'os'; +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js'; + +/** + * Integration tests for read_binary_file tool. + * Tests that binary files (Excel, PDF, images, etc.) can be read and returned as embedded resources. + */ +describe('read_binary_file tool', () => { + let client: Client; + let transport: StdioClientTransport; + let testDir: string; + + beforeEach(async () => { + // Create a temp directory for testing + // Use realpath to resolve symlinks (e.g., /var -> /private/var on macOS) + const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), 'mcp-fs-binary-test-')); + testDir = await fs.realpath(tempDir); + + // Create a minimal valid .xlsx file (ZIP format with PK header) + const xlsxPath = path.join(testDir, 'test.xlsx'); + const minimalXlsx = Buffer.from([ + 0x50, 0x4B, 0x03, 0x04, // ZIP local file header signature + 0x14, 0x00, 0x00, 0x00, 0x08, 0x00, // version, flags, compression + 0x00, 0x00, 0x00, 0x00, // time, date + 0x00, 0x00, 0x00, 0x00, // CRC-32 + 0x00, 0x00, 0x00, 0x00, // compressed size + 0x00, 0x00, 0x00, 0x00, // uncompressed size + 0x00, 0x00, // filename length + 0x00, 0x00, // extra field length + ]); + await fs.writeFile(xlsxPath, minimalXlsx); + + // Create a minimal .xls file (OLE2 format) + const xlsPath = path.join(testDir, 'test.xls'); + const minimalXls = Buffer.from([ + 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1, // OLE2 signature + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]); + await fs.writeFile(xlsPath, minimalXls); + + // Create a simple PNG file (1x1 red pixel) + const pngPath = path.join(testDir, 'test.png'); + const pngData = Buffer.from([ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature + 0x00, 0x00, 0x00, 0x0D, 0x49, 0x48, 0x44, 0x52, // IHDR chunk + 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, // 1x1 dimensions + 0x08, 0x02, 0x00, 0x00, 0x00, 0x90, 0x77, 0x53, + 0xDE, 0x00, 0x00, 0x00, 0x0C, 0x49, 0x44, 0x41, + 0x54, 0x08, 0xD7, 0x63, 0xF8, 0xCF, 0xC0, 0x00, + 0x00, 0x03, 0x01, 0x01, 0x00, 0x18, 0xDD, 0x8D, + 0xB4, 0x00, 0x00, 0x00, 0x00, 0x49, 0x45, 0x4E, + 0x44, 0xAE, 0x42, 0x60, 0x82 + ]); + await fs.writeFile(pngPath, pngData); + + // Create a PDF file + const pdfPath = path.join(testDir, 'test.pdf'); + const minimalPdf = Buffer.from('%PDF-1.4\n%EOF\n'); + await fs.writeFile(pdfPath, minimalPdf); + + // Start the MCP server + const serverPath = path.resolve(__dirname, '../dist/index.js'); + transport = new StdioClientTransport({ + command: 'node', + args: [serverPath, testDir], + }); + + client = new Client({ + name: 'test-client', + version: '1.0.0', + }, { + capabilities: {} + }); + + await client.connect(transport); + }); + + afterEach(async () => { + await client?.close(); + await fs.rm(testDir, { recursive: true, force: true }); + }); + + it('should read .xlsx file and return as embedded resource', async () => { + const xlsxPath = path.join(testDir, 'test.xlsx'); + + const result = await client.callTool({ + name: 'read_binary_file', + arguments: { path: xlsxPath } + }); + + // Check that we got content back + expect(result.content).toBeDefined(); + expect(Array.isArray(result.content)).toBe(true); + const content = result.content as Array; + expect(content.length).toBeGreaterThan(0); + + // Check the content structure - should be a resource + const contentItem = content[0]; + expect(contentItem.type).toBe('resource'); + expect(contentItem.resource).toBeDefined(); + + // Check resource properties + const resource = contentItem.resource; + expect(resource.uri).toBeDefined(); + expect(resource.uri).toContain(xlsxPath); + expect(resource.mimeType).toBe('application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'); + expect(resource.blob).toBeDefined(); + + // Blob should be valid base64 + expect(typeof resource.blob).toBe('string'); + expect(resource.blob.length).toBeGreaterThan(0); + expect(/^[A-Za-z0-9+/]*={0,2}$/.test(resource.blob)).toBe(true); + + // Verify we can decode the base64 back to original + const decoded = Buffer.from(resource.blob, 'base64'); + const original = await fs.readFile(xlsxPath); + expect(decoded.equals(original)).toBe(true); + }); + + it('should read .xls file and return as embedded resource', async () => { + const xlsPath = path.join(testDir, 'test.xls'); + + const result = await client.callTool({ + name: 'read_binary_file', + arguments: { path: xlsPath } + }); + + const content = result.content as Array; + const contentItem = content[0]; + + expect(contentItem.type).toBe('resource'); + expect(contentItem.resource.mimeType).toBe('application/vnd.ms-excel'); + expect(contentItem.resource.blob).toBeDefined(); + + // Verify data integrity + const decoded = Buffer.from(contentItem.resource.blob, 'base64'); + const original = await fs.readFile(xlsPath); + expect(decoded.equals(original)).toBe(true); + }); + + it('should read PNG file and return as embedded resource', async () => { + const pngPath = path.join(testDir, 'test.png'); + + const result = await client.callTool({ + name: 'read_binary_file', + arguments: { path: pngPath } + }); + + const content = result.content as Array; + const contentItem = content[0]; + + expect(contentItem.type).toBe('resource'); + expect(contentItem.resource.mimeType).toBe('image/png'); + expect(contentItem.resource.blob).toBeDefined(); + + // Verify data integrity + const decoded = Buffer.from(contentItem.resource.blob, 'base64'); + const original = await fs.readFile(pngPath); + expect(decoded.equals(original)).toBe(true); + }); + + it('should read PDF file and return as embedded resource', async () => { + const pdfPath = path.join(testDir, 'test.pdf'); + + const result = await client.callTool({ + name: 'read_binary_file', + arguments: { path: pdfPath } + }); + + const content = result.content as Array; + const contentItem = content[0]; + + expect(contentItem.type).toBe('resource'); + expect(contentItem.resource.mimeType).toBe('application/pdf'); + expect(contentItem.resource.blob).toBeDefined(); + + // Verify data integrity + const decoded = Buffer.from(contentItem.resource.blob, 'base64'); + const original = await fs.readFile(pdfPath); + expect(decoded.equals(original)).toBe(true); + }); + + it('should handle large files efficiently', async () => { + // Create a larger file (100KB) + const largePath = path.join(testDir, 'large.xlsx'); + const largeData = Buffer.alloc(100 * 1024); + // Add ZIP header to make it look like a valid xlsx + largeData.write('PK\x03\x04', 0); + await fs.writeFile(largePath, largeData); + + const startTime = Date.now(); + const result = await client.callTool({ + name: 'read_binary_file', + arguments: { path: largePath } + }); + const endTime = Date.now(); + + // Should complete in reasonable time + expect(endTime - startTime).toBeLessThan(2000); + + // Verify the data + const content = result.content as Array; + const contentItem = content[0]; + expect(contentItem.type).toBe('resource'); + + const decoded = Buffer.from(contentItem.resource.blob, 'base64'); + expect(decoded.length).toBe(100 * 1024); + }); + + it('should handle unknown file extensions with generic MIME type', async () => { + const unknownPath = path.join(testDir, 'test.xyz'); + await fs.writeFile(unknownPath, Buffer.from('test data')); + + const result = await client.callTool({ + name: 'read_binary_file', + arguments: { path: unknownPath } + }); + + const content = result.content as Array; + const contentItem = content[0]; + + expect(contentItem.type).toBe('resource'); + expect(contentItem.resource.mimeType).toBe('application/octet-stream'); + expect(contentItem.resource.blob).toBeDefined(); + }); +}); + +// Made with Bob diff --git a/src/filesystem/index.ts b/src/filesystem/index.ts index 48a599fae1..0c425741ba 100644 --- a/src/filesystem/index.ts +++ b/src/filesystem/index.ts @@ -264,19 +264,117 @@ server.registerTool( const mimeType = mimeTypes[extension] || "application/octet-stream"; const data = await readFileAsBase64Stream(validPath); + // Determine content type based on MIME type + // Note: MCP spec only supports "text", "image", and "audio" types + // For other binary files (like Excel, PDF, etc.), we use "image" type + // as a workaround since it supports base64 data const type = mimeType.startsWith("image/") ? "image" : mimeType.startsWith("audio/") ? "audio" - // Fallback for other binary types, not officially supported by the spec but has been used for some time - : "blob"; - const contentItem = { type: type as 'image' | 'audio' | 'blob', data, mimeType }; + : "image"; // Use "image" for all other binary types as fallback + + const contentItem = { + type: type as 'image' | 'audio', + data, + mimeType + }; + return { content: [contentItem], structuredContent: { content: [contentItem] } - } as unknown as CallToolResult; + }; } ); +server.registerTool( + "read_binary_file", + { + title: "Read Binary File", + description: + "Read any binary file (Excel, PDF, images, etc.) and return it as an embedded resource with base64-encoded content. " + + "This is the recommended way to read binary files as it properly handles all file types. " + + "Only works within allowed directories.", + inputSchema: { + path: z.string() + }, + outputSchema: { + content: z.array(z.object({ + type: z.literal("resource"), + resource: z.object({ + uri: z.string(), + mimeType: z.string(), + blob: z.string() + }) + })) + }, + annotations: { readOnlyHint: true } + }, + async (args: z.infer): Promise => { + const validPath = await validatePath(args.path); + const extension = path.extname(validPath).toLowerCase(); + + // Extended MIME type mapping for binary files + const mimeTypes: Record = { + // Images + ".png": "image/png", + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".gif": "image/gif", + ".webp": "image/webp", + ".bmp": "image/bmp", + ".svg": "image/svg+xml", + ".ico": "image/x-icon", + // Audio + ".mp3": "audio/mpeg", + ".wav": "audio/wav", + ".ogg": "audio/ogg", + ".flac": "audio/flac", + ".m4a": "audio/mp4", + // Video + ".mp4": "video/mp4", + ".webm": "video/webm", + ".mov": "video/quicktime", + // Documents + ".pdf": "application/pdf", + ".doc": "application/msword", + ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".xls": "application/vnd.ms-excel", + ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ".xlsm": "application/vnd.ms-excel.sheet.macroEnabled.12", + ".ppt": "application/vnd.ms-powerpoint", + ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", + // Archives + ".zip": "application/zip", + ".tar": "application/x-tar", + ".gz": "application/gzip", + ".7z": "application/x-7z-compressed", + ".rar": "application/vnd.rar", + }; + + const mimeType = mimeTypes[extension] || "application/octet-stream"; + const blob = await readFileAsBase64Stream(validPath); + + // Create a file:// URI for the resource + const uri = `file://${validPath}`; + + const resourceContent = { + type: "resource" as const, + resource: { + uri, + mimeType, + blob + } + }; + + return { + content: [resourceContent], + structuredContent: { + content: [resourceContent] + } + }; + } +); + server.registerTool( "read_multiple_files", From 30957b383071500fe65abe89a76f129e38442b56 Mon Sep 17 00:00:00 2001 From: Robert Yates Date: Fri, 30 Jan 2026 21:43:52 -0500 Subject: [PATCH 2/2] readme updated --- src/filesystem/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/filesystem/README.md b/src/filesystem/README.md index e9ddc2b1e2..2cc2afac83 100644 --- a/src/filesystem/README.md +++ b/src/filesystem/README.md @@ -81,6 +81,16 @@ The server's directory access control follows this flow: - `path` (string) - Streams the file and returns base64 data with the corresponding MIME type +- **read_binary_file** + - Read any binary file (Excel, PDF, images, etc.) as base64-encoded data + - Inputs: + - `path` (string) + - Returns file as an embedded resource with: + - Base64-encoded content in `blob` property + - Automatic MIME type detection + - Support for Excel (.xlsx, .xls), PDF, images, and other binary formats + - Use this for files that need to be processed as binary data + - **read_multiple_files** - Read multiple files simultaneously - Input: `paths` (string[]) @@ -190,6 +200,7 @@ The mapping for filesystem tools is: |-----------------------------|--------------|----------------|-----------------|--------------------------------------------------| | `read_text_file` | `true` | – | – | Pure read | | `read_media_file` | `true` | – | – | Pure read | +| `read_binary_file` | `true` | – | – | Pure read | | `read_multiple_files` | `true` | – | – | Pure read | | `list_directory` | `true` | – | – | Pure read | | `list_directory_with_sizes` | `true` | – | – | Pure read |