From 64dbe48f645dcbb279af856a2065ed6e6cf16bab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 13:44:51 +0000 Subject: [PATCH 1/2] Optimize detection allocations --- .../BaseFileTypeInterrogator.cs | 95 ++++++++++++------- 1 file changed, 59 insertions(+), 36 deletions(-) diff --git a/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs b/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs index d5b21eb..a54ca09 100644 --- a/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs +++ b/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.Collections.Generic; using System.IO; using System.Linq; @@ -11,6 +12,9 @@ namespace FileTypeInterrogator /// public abstract class BaseFileTypeInterrogator : IFileTypeInterrogator { + private static readonly UTF8Encoding utf8WithBomEncoding = new UTF8Encoding(true, true); + private static readonly UTF8Encoding utf8WithoutBomEncoding = new UTF8Encoding(false, true); + private static readonly byte[] utf8Bom = utf8WithBomEncoding.GetPreamble(); private readonly Lazy> lazyFileTypes; private readonly FileTypeInfo asciiFileType = new FileTypeInfo("ASCII Text", "txt", "text/plain", null); private readonly FileTypeInfo utf8FileType = new FileTypeInfo("UTF-8 Text", "txt", "text/plain", null); @@ -43,13 +47,29 @@ public FileTypeInfo DetectType(Stream inputStream) if (inputStream.CanSeek) inputStream.Position = 0; - byte[] byteBuffer = new byte[inputStream.Length]; - _ = inputStream.Read(byteBuffer, 0, byteBuffer.Length); + int bufferSize = checked((int)inputStream.Length); + byte[] byteBuffer = ArrayPool.Shared.Rent(bufferSize); + try + { + int bytesRead = 0; + while (bytesRead < bufferSize) + { + int read = inputStream.Read(byteBuffer, bytesRead, bufferSize - bytesRead); + if (read == 0) + break; - if (inputStream.CanSeek) - inputStream.Position = 0; + bytesRead += read; + } - return DetectType(byteBuffer); + return DetectType(byteBuffer, bytesRead); + } + finally + { + if (inputStream.CanSeek) + inputStream.Position = 0; + + ArrayPool.Shared.Return(byteBuffer); + } } /// @@ -58,25 +78,32 @@ public FileTypeInfo DetectType(Stream inputStream) /// The file contents to check. /// public FileTypeInfo DetectType(byte[] fileContent) + { + return DetectType(fileContent, fileContent?.Length ?? 0); + } + + private FileTypeInfo DetectType(byte[] fileContent, int length) { if (fileContent == null) throw new ArgumentNullException(nameof(fileContent)); - if (fileContent.Length == 0) + if (length == 0) throw new ArgumentException("input must not be empty"); + ReadOnlySpan input = fileContent.AsSpan(0, length); + // iterate over each type and determine if we have a match based on file signature. foreach (var fileTypeInfo in AvailableTypes) { // if we found a match return the matching filetypeinfo - if (IsMatchingType(fileContent, fileTypeInfo)) + if (IsMatchingType(input, fileTypeInfo)) return fileTypeInfo; } - if (IsAscii(fileContent)) + if (IsAscii(input)) return asciiFileType; - if (IsUTF8(fileContent, out bool hasBOM)) + if (IsUTF8(fileContent, length, out bool hasBOM)) return hasBOM ? utf8FileTypeWithBOM : utf8FileType; return null; @@ -108,11 +135,15 @@ public IEnumerable GetAvailableMimeTypes() /// public bool IsType(byte[] fileContent, string extensionAliasOrMimeType) { - foreach (var fileTypeInfo in AvailableTypes.Where(t => - t.FileType.Equals(extensionAliasOrMimeType, StringComparison.OrdinalIgnoreCase) || - t.MimeType.Equals(extensionAliasOrMimeType, StringComparison.OrdinalIgnoreCase) || - (t.Alias != null && t.Alias.Contains(extensionAliasOrMimeType, StringComparer.OrdinalIgnoreCase)))) + foreach (var fileTypeInfo in AvailableTypes) { + if (!(fileTypeInfo.FileType.Equals(extensionAliasOrMimeType, StringComparison.OrdinalIgnoreCase) || + fileTypeInfo.MimeType.Equals(extensionAliasOrMimeType, StringComparison.OrdinalIgnoreCase) || + (fileTypeInfo.Alias != null && fileTypeInfo.Alias.Contains(extensionAliasOrMimeType, StringComparer.OrdinalIgnoreCase)))) + { + continue; + } + if (IsMatchingType(fileContent, fileTypeInfo)) return true; } @@ -131,23 +162,17 @@ private static bool IsMatchingType(ReadOnlySpan input, FileTypeInfo type) // some file types have the same header // but different signature in another location, if its one of these determine what the true file type is - if (isMatch && type.SubHeader != null) + if (isMatch && type.SubHeader != null && type.SubHeader.Length > 0) { - // find all indices of matching the 1st byte of the additional sequence - var matchingIndices = new List(); - for (int i = 0; i < input.Length; i++) + isMatch = false; + for (int i = 0; i <= input.Length - type.SubHeader.Length; i++) { if (input[i] == type.SubHeader[0]) - matchingIndices.Add(i); - } - - // investigate all of them for a match - foreach (int potentialMatchingIndex in matchingIndices) - { - isMatch = FindMatch(input, type.SubHeader, potentialMatchingIndex); - - if (isMatch) - break; + { + isMatch = FindMatch(input, type.SubHeader, i); + if (isMatch) + break; + } } } @@ -231,7 +256,7 @@ private static bool IsText(byte[] input, out bool hasBOM) bool isAscii = IsAscii(input); - return isAscii || IsUTF8(input, out hasBOM); + return isAscii || IsUTF8(input, input.Length, out hasBOM); } private static bool IsAscii(ReadOnlySpan input) @@ -245,20 +270,19 @@ private static bool IsAscii(ReadOnlySpan input) return true; } - private static bool IsUTF8(byte[] input, out bool hasBOM) + private static bool IsUTF8(byte[] input, int length, out bool hasBOM) { - UTF8Encoding utf8WithBOM = new UTF8Encoding(true, true); bool isUTF8 = true; - byte[] bom = utf8WithBOM.GetPreamble(); - int bomLength = bom.Length; + int bomLength = utf8Bom.Length; hasBOM = false; - if (input.Length >= bomLength && bom.SequenceEqual(input.Take(bomLength))) + ReadOnlySpan inputSpan = input.AsSpan(0, length); + if (length >= bomLength && inputSpan.Slice(0, bomLength).SequenceEqual(utf8Bom)) { try { - utf8WithBOM.GetString(input, bomLength, input.Length - bomLength); + utf8WithBomEncoding.GetString(input, bomLength, length - bomLength); hasBOM = true; } catch (ArgumentException) @@ -270,10 +294,9 @@ private static bool IsUTF8(byte[] input, out bool hasBOM) if (isUTF8 && !hasBOM) { - UTF8Encoding utf8WithoutBOM = new UTF8Encoding(false, true); try { - utf8WithoutBOM.GetString(input, 0, input.Length); + utf8WithoutBomEncoding.GetString(input, 0, length); isUTF8 = true; } catch (ArgumentException) From 28cbbcd38d8c2f56e4e9a3399101611f1f9099ab Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 28 May 2026 13:49:51 +0000 Subject: [PATCH 2/2] Apply remaining changes --- .../BaseFileTypeInterrogator.cs | 48 ++++++++++++------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs b/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs index a54ca09..98ad0c5 100644 --- a/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs +++ b/src/FileTypeInterrogator/BaseFileTypeInterrogator.cs @@ -12,9 +12,9 @@ namespace FileTypeInterrogator /// public abstract class BaseFileTypeInterrogator : IFileTypeInterrogator { - private static readonly UTF8Encoding utf8WithBomEncoding = new UTF8Encoding(true, true); - private static readonly UTF8Encoding utf8WithoutBomEncoding = new UTF8Encoding(false, true); - private static readonly byte[] utf8Bom = utf8WithBomEncoding.GetPreamble(); + private static readonly UTF8Encoding Utf8WithBomEncoding = new UTF8Encoding(true, true); + private static readonly UTF8Encoding Utf8WithoutBomEncoding = new UTF8Encoding(false, true); + private static readonly byte[] Utf8Bom = Utf8WithBomEncoding.GetPreamble(); private readonly Lazy> lazyFileTypes; private readonly FileTypeInfo asciiFileType = new FileTypeInfo("ASCII Text", "txt", "text/plain", null); private readonly FileTypeInfo utf8FileType = new FileTypeInfo("UTF-8 Text", "txt", "text/plain", null); @@ -47,7 +47,11 @@ public FileTypeInfo DetectType(Stream inputStream) if (inputStream.CanSeek) inputStream.Position = 0; - int bufferSize = checked((int)inputStream.Length); + long streamLength = inputStream.Length; + if (streamLength > int.MaxValue) + throw new NotSupportedException("Streams larger than 2 GB are not supported."); + + int bufferSize = (int)streamLength; byte[] byteBuffer = ArrayPool.Shared.Rent(bufferSize); try { @@ -79,7 +83,10 @@ public FileTypeInfo DetectType(Stream inputStream) /// public FileTypeInfo DetectType(byte[] fileContent) { - return DetectType(fileContent, fileContent?.Length ?? 0); + if (fileContent == null) + throw new ArgumentNullException(nameof(fileContent)); + + return DetectType(fileContent, fileContent.Length); } private FileTypeInfo DetectType(byte[] fileContent, int length) @@ -134,18 +141,24 @@ public IEnumerable GetAvailableMimeTypes() /// The file type to validate. /// public bool IsType(byte[] fileContent, string extensionAliasOrMimeType) + { + if (fileContent == null) + throw new ArgumentNullException(nameof(fileContent)); + + return IsType(fileContent, fileContent.Length, extensionAliasOrMimeType); + } + + private bool IsType(byte[] fileContent, int length, string extensionAliasOrMimeType) { foreach (var fileTypeInfo in AvailableTypes) { - if (!(fileTypeInfo.FileType.Equals(extensionAliasOrMimeType, StringComparison.OrdinalIgnoreCase) || + if (fileTypeInfo.FileType.Equals(extensionAliasOrMimeType, StringComparison.OrdinalIgnoreCase) || fileTypeInfo.MimeType.Equals(extensionAliasOrMimeType, StringComparison.OrdinalIgnoreCase) || - (fileTypeInfo.Alias != null && fileTypeInfo.Alias.Contains(extensionAliasOrMimeType, StringComparer.OrdinalIgnoreCase)))) + (fileTypeInfo.Alias != null && fileTypeInfo.Alias.Contains(extensionAliasOrMimeType, StringComparer.OrdinalIgnoreCase))) { - continue; + if (IsMatchingType(fileContent.AsSpan(0, length), fileTypeInfo)) + return true; } - - if (IsMatchingType(fileContent, fileTypeInfo)) - return true; } if (extensionAliasOrMimeType.Equals("txt", StringComparison.OrdinalIgnoreCase) || @@ -162,10 +175,11 @@ private static bool IsMatchingType(ReadOnlySpan input, FileTypeInfo type) // some file types have the same header // but different signature in another location, if its one of these determine what the true file type is - if (isMatch && type.SubHeader != null && type.SubHeader.Length > 0) + int subHeaderLength = type.SubHeader?.Length ?? 0; + if (isMatch && subHeaderLength > 0) { isMatch = false; - for (int i = 0; i <= input.Length - type.SubHeader.Length; i++) + for (int i = 0; i <= input.Length - subHeaderLength; i++) { if (input[i] == type.SubHeader[0]) { @@ -273,16 +287,16 @@ private static bool IsAscii(ReadOnlySpan input) private static bool IsUTF8(byte[] input, int length, out bool hasBOM) { bool isUTF8 = true; - int bomLength = utf8Bom.Length; + int bomLength = Utf8Bom.Length; hasBOM = false; ReadOnlySpan inputSpan = input.AsSpan(0, length); - if (length >= bomLength && inputSpan.Slice(0, bomLength).SequenceEqual(utf8Bom)) + if (length >= bomLength && inputSpan.Slice(0, bomLength).SequenceEqual(Utf8Bom)) { try { - utf8WithBomEncoding.GetString(input, bomLength, length - bomLength); + Utf8WithBomEncoding.GetString(input, bomLength, length - bomLength); hasBOM = true; } catch (ArgumentException) @@ -296,7 +310,7 @@ private static bool IsUTF8(byte[] input, int length, out bool hasBOM) { try { - utf8WithoutBomEncoding.GetString(input, 0, length); + Utf8WithoutBomEncoding.GetString(input, 0, length); isUTF8 = true; } catch (ArgumentException)