diff --git a/src/ImageSharp/Compression/Zlib/ChunkedReadStream.cs b/src/ImageSharp/Compression/Zlib/ChunkedReadStream.cs new file mode 100644 index 0000000000..b697327fff --- /dev/null +++ b/src/ImageSharp/Compression/Zlib/ChunkedReadStream.cs @@ -0,0 +1,119 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using SixLabors.ImageSharp.IO; + +namespace SixLabors.ImageSharp.Compression.Zlib; + +/// +/// A read-only stream over a sequence of length-delimited segments. Bytes are +/// pulled from the inner stream up to the current segment's remaining length; +/// when the segment is exhausted the supplied delegate is invoked to advance +/// to the next segment and return its length. The inner stream is not owned +/// and is not disposed. +/// +internal sealed class ChunkedReadStream : Stream +{ + private static readonly Func GetDataNoOp = () => 0; + + private readonly BufferedReadStream innerStream; + private readonly Func getData; + private int currentDataRemaining; + + public ChunkedReadStream(BufferedReadStream innerStream) + : this(innerStream, GetDataNoOp) + { + } + + public ChunkedReadStream(BufferedReadStream innerStream, Func getData) + { + this.innerStream = innerStream; + this.getData = getData; + } + + /// + public override bool CanRead => this.innerStream.CanRead; + + /// + public override bool CanSeek => false; + + /// + public override bool CanWrite => throw new NotSupportedException(); + + /// + public override long Length => throw new NotSupportedException(); + + /// + public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); } + + /// + /// Sets the number of bytes available to read from the current segment. + /// Must be called before reading each segment. + /// + public void SetCurrentSegmentLength(int bytes) => this.currentDataRemaining = bytes; + + /// + public override void Flush() => throw new NotSupportedException(); + + /// + public override int ReadByte() + { + if (this.currentDataRemaining is 0) + { + this.currentDataRemaining = this.getData(); + if (this.currentDataRemaining is 0) + { + return -1; + } + } + + int value = this.innerStream.ReadByte(); + if (value is not -1) + { + this.currentDataRemaining--; + } + + return value; + } + + /// + public override int Read(byte[] buffer, int offset, int count) + { + // Decrement currentDataRemaining only by bytes actually returned by + // innerStream.Read; a short read otherwise underflows the segment + // counter and triggers getData() before the segment is truly drained. + int totalBytesRead = 0; + while (totalBytesRead < count) + { + if (this.currentDataRemaining is 0) + { + this.currentDataRemaining = this.getData(); + if (this.currentDataRemaining is 0) + { + break; + } + } + + int bytesToRead = Math.Min(count - totalBytesRead, this.currentDataRemaining); + int bytesRead = this.innerStream.Read(buffer, offset + totalBytesRead, bytesToRead); + if (bytesRead is 0) + { + break; + } + + this.currentDataRemaining -= bytesRead; + totalBytesRead += bytesRead; + } + + return totalBytesRead; + } + + /// + public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); + + /// + public override void SetLength(long value) => throw new NotSupportedException(); + + /// + public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); +} diff --git a/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs b/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs index 513171b179..11f34dac8a 100644 --- a/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs +++ b/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs @@ -8,9 +8,11 @@ namespace SixLabors.ImageSharp.Compression.Zlib; /// -/// Provides methods and properties for deframing streams from PNGs. +/// Reads chunked input, parses the zlib CMF/FLG header, and exposes a +/// over the remaining DEFLATE payload. The +/// Adler-32 trailer is not validated. /// -internal sealed class ZlibInflateStream : Stream +internal sealed class ZlibInflateStream : IDisposable { /// /// Used to read the Adler-32 and Crc-32 checksums. @@ -19,94 +21,13 @@ internal sealed class ZlibInflateStream : Stream /// private static readonly byte[] ChecksumBuffer = new byte[4]; - /// - /// A default delegate to get more data from the inner stream. - /// - private static readonly Func GetDataNoOp = () => 0; - - /// - /// The inner raw memory stream. - /// - private readonly BufferedReadStream innerStream; - - /// - /// A value indicating whether this instance of the given entity has been disposed. - /// - /// if this instance has been disposed; otherwise, . - /// - /// If the entity is disposed, it must not be disposed a second - /// time. The isDisposed field is set the first time the entity - /// is disposed. If the isDisposed field is true, then the Dispose() - /// method will not dispose again. This help not to prolong the entity's - /// life in the Garbage Collector. - /// - private bool isDisposed; - - /// - /// The current data remaining to be read. - /// - private int currentDataRemaining; - - /// - /// Delegate to get more data once we've exhausted the current data remaining. - /// - private readonly Func getData; - - /// - /// When true, the inflated payload is treated as a raw DEFLATE stream with no zlib - /// CMF/FLG header (and no Adler-32 trailer). This is required to decode IDATs in - /// Apple's proprietary CgBI PNG variant. - /// - private readonly bool noHeader; + private readonly ChunkedReadStream segmentStream; - /// - /// Initializes a new instance of the class. - /// - /// The inner raw stream. public ZlibInflateStream(BufferedReadStream innerStream) - : this(innerStream, GetDataNoOp, noHeader: false) - { - } + => this.segmentStream = new ChunkedReadStream(innerStream); - /// - /// Initializes a new instance of the class. - /// - /// The inner raw stream. - /// A delegate to get more data from the inner stream. public ZlibInflateStream(BufferedReadStream innerStream, Func getData) - : this(innerStream, getData, noHeader: false) - { - } - - /// - /// Initializes a new instance of the class. - /// - /// The inner raw stream. - /// A delegate to get more data from the inner stream. - /// - /// When , the payload is treated as raw DEFLATE with no zlib header. - /// - public ZlibInflateStream(BufferedReadStream innerStream, Func getData, bool noHeader) - { - this.innerStream = innerStream; - this.getData = getData; - this.noHeader = noHeader; - } - - /// - public override bool CanRead => this.innerStream.CanRead; - - /// - public override bool CanSeek => false; - - /// - public override bool CanWrite => throw new NotSupportedException(); - - /// - public override long Length => throw new NotSupportedException(); - - /// - public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); } + => this.segmentStream = new ChunkedReadStream(innerStream, getData); /// /// Gets the compressed stream over the deframed inner stream. @@ -114,15 +35,16 @@ public ZlibInflateStream(BufferedReadStream innerStream, Func getData, bool public DeflateStream? CompressedStream { get; private set; } /// - /// Adds new bytes from a frame found in the original stream. + /// Sets the length of the next segment of compressed input and, on first + /// call, parses the zlib header. /// - /// The current remaining data according to the chunk length. - /// Whether the chunk to be inflated is a critical chunk. + /// The remaining data length for the current segment. + /// Whether to throw on a malformed zlib header. /// The . [MemberNotNullWhen(true, nameof(CompressedStream))] public bool AllocateNewBytes(int bytes, bool isCriticalChunk) { - this.currentDataRemaining = bytes; + this.segmentStream.SetCurrentSegmentLength(bytes); if (this.CompressedStream is null) { return this.InitializeInflateStream(isCriticalChunk); @@ -131,114 +53,15 @@ public bool AllocateNewBytes(int bytes, bool isCriticalChunk) return true; } - /// - public override void Flush() => throw new NotSupportedException(); - - /// - public override int ReadByte() + public void Dispose() { - this.currentDataRemaining--; - return this.innerStream.ReadByte(); - } - - /// - public override int Read(byte[] buffer, int offset, int count) - { - if (this.currentDataRemaining is 0) - { - // Last buffer was read in its entirety, let's make sure we don't actually have more in additional IDAT chunks. - this.currentDataRemaining = this.getData(); - - if (this.currentDataRemaining is 0) - { - return 0; - } - } - - int bytesToRead = Math.Min(count, this.currentDataRemaining); - this.currentDataRemaining -= bytesToRead; - int totalBytesRead = this.innerStream.Read(buffer, offset, bytesToRead); - long innerStreamLength = this.innerStream.Length; - - // Keep reading data until we've reached the end of the stream or filled the buffer. - int bytesRead = 0; - offset += totalBytesRead; - while (this.currentDataRemaining is 0 && totalBytesRead < count) - { - this.currentDataRemaining = this.getData(); - - if (this.currentDataRemaining is 0) - { - return totalBytesRead; - } - - offset += bytesRead; - - if (offset >= innerStreamLength || offset >= count) - { - return totalBytesRead; - } - - bytesToRead = Math.Min(count - totalBytesRead, this.currentDataRemaining); - this.currentDataRemaining -= bytesToRead; - bytesRead = this.innerStream.Read(buffer, offset, bytesToRead); - if (bytesRead == 0) - { - return totalBytesRead; - } - - totalBytesRead += bytesRead; - } - - return totalBytesRead; - } - - /// - public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException(); - - /// - public override void SetLength(long value) => throw new NotSupportedException(); - - /// - public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException(); - - /// - protected override void Dispose(bool disposing) - { - if (this.isDisposed) - { - return; - } - - if (disposing) - { - // Dispose managed resources. - if (this.CompressedStream != null) - { - this.CompressedStream.Dispose(); - this.CompressedStream = null; - } - } - - base.Dispose(disposing); - - // Call the appropriate methods to clean up - // unmanaged resources here. - // Note disposing is done. - this.isDisposed = true; + this.CompressedStream?.Dispose(); + this.segmentStream?.Dispose(); } [MemberNotNullWhen(true, nameof(CompressedStream))] private bool InitializeInflateStream(bool isCriticalChunk) { - // Apple CgBI IDATs omit the zlib CMF/FLG header and the Adler-32 trailer, - // wrapping a raw DEFLATE payload directly. Skip the header parsing in that mode. - if (this.noHeader) - { - this.CompressedStream = new DeflateStream(this, CompressionMode.Decompress, true); - return true; - } - // Read the zlib header : http://tools.ietf.org/html/rfc1950 // CMF(Compression Method and flags) // This byte is divided into a 4 - bit compression method and a @@ -250,9 +73,8 @@ private bool InitializeInflateStream(bool isCriticalChunk) // +---+---+ // |CMF|FLG| // +---+---+ - int cmf = this.innerStream.ReadByte(); - int flag = this.innerStream.ReadByte(); - this.currentDataRemaining -= 2; + int cmf = this.segmentStream.ReadByte(); + int flag = this.segmentStream.ReadByte(); if (cmf == -1 || flag == -1) { return false; @@ -290,16 +112,13 @@ private bool InitializeInflateStream(bool isCriticalChunk) { // We don't need this for inflate so simply skip by the next four bytes. // https://tools.ietf.org/html/rfc1950#page-6 - if (this.innerStream.Read(ChecksumBuffer, 0, 4) != 4) + if (this.segmentStream.Read(ChecksumBuffer, 0, 4) != 4) { return false; } - - this.currentDataRemaining -= 4; } - // Initialize the deflate BufferedReadStream. - this.CompressedStream = new DeflateStream(this, CompressionMode.Decompress, true); + this.CompressedStream = new DeflateStream(this.segmentStream, CompressionMode.Decompress, leaveOpen: true); return true; } diff --git a/src/ImageSharp/Formats/Png/PngCgbiProcessor.cs b/src/ImageSharp/Formats/Png/PngCgbiProcessor.cs new file mode 100644 index 0000000000..e4847ad67f --- /dev/null +++ b/src/ImageSharp/Formats/Png/PngCgbiProcessor.cs @@ -0,0 +1,319 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using SixLabors.ImageSharp.Common.Helpers; +using SixLabors.ImageSharp.PixelFormats; +using static SixLabors.ImageSharp.SimdUtils; + +namespace SixLabors.ImageSharp.Formats.Png; + +/// +/// Reverses the pixel mangling applied by Apple's CgBI PNG variant. CgBI files +/// (emitted by pngcrush -iphone) swap channel order from RGB(A) to BGR(A) +/// and premultiply RGB samples by alpha. This converts a defiltered scanline back +/// to standard PNG semantics in place so the existing scanline processors can +/// consume it unchanged. CgBI is only emitted for 8-bit truecolor (with or +/// without alpha); other color types are left alone. +/// +/// +/// See https://theapplewiki.com/wiki/PNG_CgBI_Format +/// +internal static class PngCgbiProcessor +{ + // Per-pixel byte indices that swap CgBI's BGRA layout to Rgba32's RGBA. + // MMShuffle3012 expands to [2, 1, 0, 3] per 4-byte pixel; the same 64-byte + // sequence seeds all three shuffle masks (Vector128/256 take a leading slice). + private static readonly byte[] BgraToRgbaShuffleBytes = BuildShuffleBytes(); + + private static readonly Vector128 BgraToRgbaShuffle128 = Vector128.Create(new ReadOnlySpan(BgraToRgbaShuffleBytes, 0, Vector128.Count)); + + private static readonly Vector256 BgraToRgbaShuffle256 = Vector256.Create(new ReadOnlySpan(BgraToRgbaShuffleBytes, 0, Vector256.Count)); + + private static readonly Vector512 BgraToRgbaShuffle512 = Vector512.Create(BgraToRgbaShuffleBytes); + + /// + /// Applies the inverse of Apple's CgBI pixel mangling to a defiltered scanline in place. + /// + /// The configuration used by the Rgb24 R/B swap. + /// The defiltered pixel bytes (without the leading filter byte). + /// The PNG color type from IHDR. + public static void ApplyTransform(Configuration configuration, Span scanline, PngColorType colorType) + { + if (colorType == PngColorType.RgbWithAlpha) + { + Span pixels = MemoryMarshal.Cast(scanline); + int i = 0; + + if (Vector512.IsHardwareAccelerated && pixels.Length >= Vector512.Count) + { + i = ApplyTransformVector512(scanline, pixels.Length); + } + + if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && (pixels.Length - i) >= Vector256.Count) + { + i = ApplyTransformVector256(scanline, i, pixels.Length); + } + + if (Vector128.IsHardwareAccelerated && (pixels.Length - i) >= Vector128.Count) + { + i = ApplyTransformVector128(scanline, i, pixels.Length); + } + + for (; i < pixels.Length; i++) + { + ref Rgba32 pixel = ref pixels[i]; + pixel = new Rgba32(pixel.B, pixel.G, pixel.R, pixel.A); + UndoPremultiplicationScalar(ref pixel); + } + } + else if (colorType == PngColorType.Rgb) + { + // No alpha channel, so just swap R and B using built in SIMD-optimized pixel operations. + Span target = MemoryMarshal.Cast(scanline); + PixelOperations.Instance.FromBgr24Bytes(configuration, scanline, target, target.Length); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void UndoPremultiplicationScalar(ref Rgba32 pixel) + { + byte a = pixel.A; + if (a is 0 or byte.MaxValue) + { + return; + } + + // Reverse: c' = c * a / 255 => c = round(c' * 255 / a) + int half = a >> 1; + byte r = (byte)Math.Min(byte.MaxValue, ((pixel.R * byte.MaxValue) + half) / a); + byte g = (byte)Math.Min(byte.MaxValue, ((pixel.G * byte.MaxValue) + half) / a); + byte b = (byte)Math.Min(byte.MaxValue, ((pixel.B * byte.MaxValue) + half) / a); + pixel = new Rgba32(r, g, b, a); + } + + internal static int ApplyTransformVector512(Span scanline, int pixelCount) + { + ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline); + int i = 0; + + // Indices stay within their own 4-byte pixel, so the per-pixel pattern + // is also valid under the per-128-bit-lane vpshufb that ShuffleNative + // selects on AVX-512BW hosts. + Vector512 shuffleMask = BgraToRgbaShuffle512; + + Vector512 zero = Vector512.Zero; + Vector512 one = Vector512.One; + Vector512 byteMax = Vector512.Create((int)byte.MaxValue); + + for (; i <= pixelCount - Vector512.Count; i += Vector512.Count) + { + ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf()); + Vector512 bgra = Unsafe.ReadUnaligned>(ref blockRef); + Vector512 rgba = Vector512_.ShuffleNative(bgra, shuffleMask); + Vector512 packed = rgba.AsInt32(); + Vector512 alpha = Vector512.ShiftRightLogical(packed, 24); + + // Fully transparent and fully opaque pixels are identity cases for + // unpremultiplication. Masking them keeps the scalar behavior and lets + // safeAlpha avoid dividing by zero for alpha == 0. + Vector512 partialMask = ~(Vector512.Equals(alpha, zero) | Vector512.Equals(alpha, byteMax)); + + Vector512 r = packed & byteMax; + Vector512 g = Vector512.ShiftRightLogical(packed, 8) & byteMax; + Vector512 b = Vector512.ShiftRightLogical(packed, 16) & byteMax; + + Vector512 safeAlpha = Vector512.ConditionalSelect(partialMask, alpha, one); + Vector512 halfAlpha = Vector512.ShiftRightLogical(safeAlpha, 1); + Vector512 safeAlphaF = Vector512.ConvertToSingle(safeAlpha); + + // ConvertToInt32 truncates toward zero (cvttps2dq / fcvtzs); since + // every quotient here is non-negative, that matches the scalar + // ((c * 255) + (a >> 1)) / a integer-division floor. + Vector512 unpremultipliedR = Vector512.Min( + byteMax, + Vector512.ConvertToInt32(Vector512.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF)); + + Vector512 unpremultipliedG = Vector512.Min( + byteMax, + Vector512.ConvertToInt32(Vector512.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF)); + + Vector512 unpremultipliedB = Vector512.Min( + byteMax, + Vector512.ConvertToInt32(Vector512.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF)); + + // ConditionalSelect applies the expensive unpremultiply only to pixels + // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the + // shuffled channel values exactly as the scalar path does. + Vector512 finalR = Vector512.ConditionalSelect(partialMask, unpremultipliedR, r); + Vector512 finalG = Vector512.ConditionalSelect(partialMask, unpremultipliedG, g); + Vector512 finalB = Vector512.ConditionalSelect(partialMask, unpremultipliedB, b); + + // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so + // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3 + // recreates the in-memory RGBA bytes for the unaligned store. + Vector512 result = + finalR | + Vector512.ShiftLeft(finalG, 8) | + Vector512.ShiftLeft(finalB, 16) | + Vector512.ShiftLeft(alpha, 24); + + Unsafe.WriteUnaligned(ref blockRef, result.AsByte()); + } + + return i; + } + + internal static int ApplyTransformVector256(Span scanline, int startPixel, int pixelCount) + { + ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline); + int i = startPixel; + + // vpshufb is 128-bit lane-local and uses only the low 4 bits of each + // index, so the same per-pixel [2,1,0,3] pattern in both lanes keeps + // every byte inside its own lane. + Vector256 shuffleMask = BgraToRgbaShuffle256; + + Vector256 zero = Vector256.Zero; + Vector256 one = Vector256.One; + Vector256 byteMax = Vector256.Create((int)byte.MaxValue); + + for (; i <= pixelCount - Vector256.Count; i += Vector256.Count) + { + ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf()); + Vector256 bgra = Unsafe.ReadUnaligned>(ref blockRef); + Vector256 rgba = Vector256_.ShufflePerLane(bgra, shuffleMask); + Vector256 packed = rgba.AsInt32(); + Vector256 alpha = Vector256.ShiftRightLogical(packed, 24); + + // Fully transparent and fully opaque pixels are identity cases for + // unpremultiplication. Masking them keeps the scalar behavior and lets + // safeAlpha avoid dividing by zero for alpha == 0. + Vector256 partialMask = ~(Vector256.Equals(alpha, zero) | Vector256.Equals(alpha, byteMax)); + + Vector256 r = packed & byteMax; + Vector256 g = Vector256.ShiftRightLogical(packed, 8) & byteMax; + Vector256 b = Vector256.ShiftRightLogical(packed, 16) & byteMax; + + Vector256 safeAlpha = Vector256.ConditionalSelect(partialMask, alpha, one); + Vector256 halfAlpha = Vector256.ShiftRightLogical(safeAlpha, 1); + Vector256 safeAlphaF = Vector256.ConvertToSingle(safeAlpha); + + // ConvertToInt32 truncates toward zero (cvttps2dq / fcvtzs); since + // every quotient here is non-negative, that matches the scalar + // ((c * 255) + (a >> 1)) / a integer-division floor. + Vector256 unpremultipliedR = Vector256.Min( + byteMax, + Vector256.ConvertToInt32(Vector256.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF)); + + Vector256 unpremultipliedG = Vector256.Min( + byteMax, + Vector256.ConvertToInt32(Vector256.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF)); + + Vector256 unpremultipliedB = Vector256.Min( + byteMax, + Vector256.ConvertToInt32(Vector256.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF)); + + // ConditionalSelect applies the expensive unpremultiply only to pixels + // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the + // shuffled channel values exactly as the scalar path does. + Vector256 finalR = Vector256.ConditionalSelect(partialMask, unpremultipliedR, r); + Vector256 finalG = Vector256.ConditionalSelect(partialMask, unpremultipliedG, g); + Vector256 finalB = Vector256.ConditionalSelect(partialMask, unpremultipliedB, b); + + // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so + // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3 + // recreates the in-memory RGBA bytes for the unaligned store. + Vector256 result = + finalR | + Vector256.ShiftLeft(finalG, 8) | + Vector256.ShiftLeft(finalB, 16) | + Vector256.ShiftLeft(alpha, 24); + + Unsafe.WriteUnaligned(ref blockRef, result.AsByte()); + } + + return i; + } + + internal static int ApplyTransformVector128(Span scanline, int startPixel, int pixelCount) + { + ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline); + int i = startPixel; + + Vector128 shuffleMask = BgraToRgbaShuffle128; + + Vector128 zero = Vector128.Zero; + Vector128 one = Vector128.One; + Vector128 byteMax = Vector128.Create((int)byte.MaxValue); + + for (; i <= pixelCount - Vector128.Count; i += Vector128.Count) + { + ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf()); + Vector128 bgra = Unsafe.ReadUnaligned>(ref blockRef); + Vector128 rgba = Vector128_.ShuffleNative(bgra, shuffleMask); + Vector128 packed = rgba.AsInt32(); + Vector128 alpha = Vector128.ShiftRightLogical(packed, 24); + + // Fully transparent and fully opaque pixels are identity cases for + // unpremultiplication. Masking them keeps the scalar behavior and lets + // safeAlpha avoid dividing by zero for alpha == 0. + Vector128 partialMask = ~(Vector128.Equals(alpha, zero) | Vector128.Equals(alpha, byteMax)); + + Vector128 r = packed & byteMax; + Vector128 g = Vector128.ShiftRightLogical(packed, 8) & byteMax; + Vector128 b = Vector128.ShiftRightLogical(packed, 16) & byteMax; + + Vector128 safeAlpha = Vector128.ConditionalSelect(partialMask, alpha, one); + Vector128 halfAlpha = Vector128.ShiftRightLogical(safeAlpha, 1); + Vector128 safeAlphaF = Vector128.ConvertToSingle(safeAlpha); + + // ConvertToInt32 truncates toward zero (cvttps2dq / fcvtzs); since + // every quotient here is non-negative, that matches the scalar + // ((c * 255) + (a >> 1)) / a integer-division floor. + Vector128 unpremultipliedR = Vector128.Min( + byteMax, + Vector128.ConvertToInt32(Vector128.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF)); + + Vector128 unpremultipliedG = Vector128.Min( + byteMax, + Vector128.ConvertToInt32(Vector128.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF)); + + Vector128 unpremultipliedB = Vector128.Min( + byteMax, + Vector128.ConvertToInt32(Vector128.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF)); + + // ConditionalSelect applies the expensive unpremultiply only to pixels + // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the + // shuffled channel values exactly as the scalar path does. + Vector128 finalR = Vector128.ConditionalSelect(partialMask, unpremultipliedR, r); + Vector128 finalG = Vector128.ConditionalSelect(partialMask, unpremultipliedG, g); + Vector128 finalB = Vector128.ConditionalSelect(partialMask, unpremultipliedB, b); + + // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so + // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3 + // recreates the in-memory RGBA bytes for the unaligned store. + Vector128 result = + finalR | + Vector128.ShiftLeft(finalG, 8) | + Vector128.ShiftLeft(finalB, 16) | + Vector128.ShiftLeft(alpha, 24); + + Unsafe.WriteUnaligned(ref blockRef, result.AsByte()); + } + + return i; + } + + private static byte[] BuildShuffleBytes() + { + byte[] bytes = new byte[Vector512.Count]; + Span span = bytes; + Shuffle.MMShuffleSpan(ref span, Shuffle.MMShuffle3012); + + return bytes; + } +} diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs index 84245254a2..f3e2bbdbe0 100644 --- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs +++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs @@ -9,8 +9,6 @@ using System.IO.Hashing; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; using System.Text; using SixLabors.ImageSharp.Common.Helpers; using SixLabors.ImageSharp.Compression.Zlib; @@ -767,7 +765,7 @@ private int CalculateScanlineLength(int width) /// The length of the chunk that containing the compressed scanline data. /// The pixel data. /// The png metadata - /// A delegate to get more data from the inner stream for . + /// A delegate to get more data from the inner stream when chunk boundaries are crossed. /// The frame control /// The cancellation token. private void ReadScanlines( @@ -779,14 +777,34 @@ private void ReadScanlines( CancellationToken cancellationToken) where TPixel : unmanaged, IPixel { - using ZlibInflateStream inflateStream = new(this.currentStream, getData, noHeader: this.isCgbi); + // CgBI IDATs wrap a raw DEFLATE payload directly (no zlib CMF/FLG header + // and no Adler-32 trailer); skip the zlib header parser entirely. + if (this.isCgbi) + { + using ChunkedReadStream segmentStream = new(this.currentStream, getData); + segmentStream.SetCurrentSegmentLength(chunkLength); + using DeflateStream cgbiDataStream = new(segmentStream, CompressionMode.Decompress, leaveOpen: true); + this.DecodeFromDeflate(cgbiDataStream, image, pngMetadata, frameControl, cancellationToken); + return; + } + + using ZlibInflateStream inflateStream = new(this.currentStream, getData); if (!inflateStream.AllocateNewBytes(chunkLength, !this.hasImageData)) { return; } - DeflateStream dataStream = inflateStream.CompressedStream!; + this.DecodeFromDeflate(inflateStream.CompressedStream!, image, pngMetadata, frameControl, cancellationToken); + } + private void DecodeFromDeflate( + DeflateStream dataStream, + ImageFrame image, + PngMetadata pngMetadata, + in FrameControl frameControl, + CancellationToken cancellationToken) + where TPixel : unmanaged, IPixel + { if (this.header.InterlaceMethod is PngInterlaceMode.Adam7) { this.DecodeInterlacedPixelData(frameControl, dataStream, image, pngMetadata, cancellationToken); @@ -902,7 +920,7 @@ private void DecodePixelDataCore( if (this.isCgbi) { - this.ApplyCgbiTransform(scanSpan[1..], this.pngColorType); + PngCgbiProcessor.ApplyTransform(this.configuration, scanSpan[1..], this.pngColorType); } this.ProcessDefilteredScanline(frameControl, currentRow, scanSpan, imageFrame, pngMetadata, blendRowBuffer); @@ -1037,7 +1055,7 @@ private void DecodeInterlacedPixelDataCore( if (this.isCgbi) { - this.ApplyCgbiTransform(scanSpan[1..], this.pngColorType); + PngCgbiProcessor.ApplyTransform(this.configuration, scanSpan[1..], this.pngColorType); } Span rowSpan = imageBuffer.DangerousGetRowSpan(currentRow); @@ -1431,6 +1449,22 @@ private void ReadHeaderChunk(PngMetadata pngMetadata, ReadOnlySpan data) this.pngColorType = this.header.ColorType; this.Dimensions = new Size(this.header.Width, this.header.Height); + + // Apple's pngcrush emits the CgBI chunk before IHDR, so the header + // compatibility check is deferred until both chunks have been seen. + if (this.isCgbi) + { + ThrowIfInvalidCgbiContent(this.header); + } + } + + private static void ThrowIfInvalidCgbiContent(in PngHeader header) + { + if (header.BitDepth != 8 || (header.ColorType is not PngColorType.Rgb and not PngColorType.RgbWithAlpha)) + { + PngThrowHelper.ThrowInvalidImageContentException( + $"CgBI is only supported for 8-bit truecolor images. Was bit depth '{header.BitDepth}', color type '{header.ColorType}'."); + } } /// @@ -2493,303 +2527,4 @@ private static bool IsXmpTextData(ReadOnlySpan keywordBytes) private void SwapScanlineBuffers() => (this.scanline, this.previousScanline) = (this.previousScanline, this.scanline); - - /// - /// Applies the inverse of Apple's CgBI pixel mangling to a defiltered scanline. - /// CgBI PNGs are emitted by pngcrush -iphone with channel order swapped - /// from RGB(A) to BGR(A) and RGB samples premultiplied by alpha. This converts - /// the bytes back to standard PNG semantics in place so the existing scanline - /// processors can consume them unchanged. CgBI is only emitted for 8-bit - /// truecolor (with or without alpha); other color types are left alone. - /// - /// - /// See https://theapplewiki.com/wiki/PNG_CgBI_Format - /// - /// The defiltered pixel bytes (without the leading filter byte). - /// The PNG color type from IHDR. - private void ApplyCgbiTransform(Span scanline, PngColorType colorType) - { - if (colorType == PngColorType.RgbWithAlpha) - { - Span pixels = MemoryMarshal.Cast(scanline); - int i = 0; - - if (Vector512.IsHardwareAccelerated && pixels.Length >= 16) - { - i = ApplyCgbiTransformVector512(scanline, pixels.Length); - } - - if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && (pixels.Length - i) >= 8) - { - i = ApplyCgbiTransformVector256(scanline, i, pixels.Length); - } - - if (Vector128.IsHardwareAccelerated && (pixels.Length - i) >= 4) - { - i = ApplyCgbiTransformVector128(scanline, i, pixels.Length); - } - - for (; i < pixels.Length; i++) - { - ref Rgba32 pixel = ref pixels[i]; - pixel = new Rgba32(pixel.B, pixel.G, pixel.R, pixel.A); - UndoCgbiPremultiplicationScalar(ref pixel); - } - } - else if (colorType == PngColorType.Rgb) - { - // No alpha channel, so just swap R and B using built in SIMD-optimized pixel operations. - Span target = MemoryMarshal.Cast(scanline); - PixelOperations.Instance.FromBgr24Bytes(this.configuration, scanline, target, target.Length); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void UndoCgbiPremultiplicationScalar(ref Rgba32 pixel) - { - byte a = pixel.A; - if (a is 0 or byte.MaxValue) - { - return; - } - - // Reverse: c' = c * a / 255 => c = round(c' * 255 / a) - int half = a >> 1; - byte r = (byte)Math.Min(byte.MaxValue, ((pixel.R * byte.MaxValue) + half) / a); - byte g = (byte)Math.Min(byte.MaxValue, ((pixel.G * byte.MaxValue) + half) / a); - byte b = (byte)Math.Min(byte.MaxValue, ((pixel.B * byte.MaxValue) + half) / a); - pixel = new Rgba32(r, g, b, a); - } - - private static int ApplyCgbiTransformVector512(Span scanline, int pixelCount) - { - ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline); - int i = 0; - - Span temp = stackalloc byte[Vector512.Count]; - SimdUtils.Shuffle.MMShuffleSpan(ref temp, SimdUtils.Shuffle.MMShuffle3012); - - // MMShuffle3012 expands to [2, 1, 0, 3] for each 4-byte pixel, converting - // CgBI's BGRA byte order to Rgba32's RGBA layout while keeping alpha in place. - // The generated mask only swaps bytes inside each pixel, so it remains - // correct for the optimized 512-bit byte shuffle helper. - Vector512 shuffleMask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - - Vector512 zero = Vector512.Zero; - Vector512 one = Vector512.One; - Vector512 byteMask = Vector512.Create(0xFF); - Vector512 opaque = Vector512.Create(0xFF); - Vector512 byteMax = Vector512.Create((int)byte.MaxValue); - - for (; i <= pixelCount - 16; i += 16) - { - ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf()); - Vector512 bgra = Unsafe.ReadUnaligned>(ref blockRef); - Vector512 rgba = Vector512_.ShuffleNative(bgra, shuffleMask); - Vector512 packed = rgba.AsInt32(); - Vector512 alpha = Vector512.ShiftRightLogical(packed, 24); - - // Fully transparent and fully opaque pixels are identity cases for - // unpremultiplication. Masking them keeps the scalar behavior and lets - // safeAlpha avoid dividing by zero for alpha == 0. - Vector512 partialMask = ~(Vector512.Equals(alpha, zero) | Vector512.Equals(alpha, opaque)); - - Vector512 r = packed & byteMask; - Vector512 g = Vector512.ShiftRightLogical(packed, 8) & byteMask; - Vector512 b = Vector512.ShiftRightLogical(packed, 16) & byteMask; - - Vector512 safeAlpha = Vector512.ConditionalSelect(partialMask, alpha, one); - Vector512 halfAlpha = Vector512.ShiftRightLogical(safeAlpha, 1); - Vector512 safeAlphaF = Vector512.ConvertToSingle(safeAlpha); - - // The scalar path computes ((c * 255) + (a >> 1)) / a with integer - // division. Floor the positive quotient before converting so SIMD does - // not use the default round-to-nearest conversion and drift by one. - Vector512 unpremultipliedR = Vector512.Min( - byteMax, - Vector512.ConvertToInt32(Vector512.Floor(Vector512.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF))); - - Vector512 unpremultipliedG = Vector512.Min( - byteMax, - Vector512.ConvertToInt32(Vector512.Floor(Vector512.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF))); - - Vector512 unpremultipliedB = Vector512.Min( - byteMax, - Vector512.ConvertToInt32(Vector512.Floor(Vector512.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF))); - - // ConditionalSelect applies the expensive unpremultiply only to pixels - // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the - // shuffled channel values exactly as the scalar path does. - Vector512 finalR = Vector512.ConditionalSelect(partialMask, unpremultipliedR, r); - Vector512 finalG = Vector512.ConditionalSelect(partialMask, unpremultipliedG, g); - Vector512 finalB = Vector512.ConditionalSelect(partialMask, unpremultipliedB, b); - - // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so - // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3 - // recreates the in-memory RGBA bytes for the unaligned store. - Vector512 result = - finalR | - Vector512.ShiftLeft(finalG, 8) | - Vector512.ShiftLeft(finalB, 16) | - Vector512.ShiftLeft(alpha, 24); - - Unsafe.WriteUnaligned(ref blockRef, result.AsByte()); - } - - return i; - } - - private static int ApplyCgbiTransformVector256(Span scanline, int startPixel, int pixelCount) - { - ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline); - int i = startPixel; - - Span temp = stackalloc byte[Vector512.Count]; - SimdUtils.Shuffle.MMShuffleSpan(ref temp, SimdUtils.Shuffle.MMShuffle3012); - - // MMShuffle3012 expands to [2, 1, 0, 3] for each 4-byte pixel, converting - // CgBI's BGRA byte order to Rgba32's RGBA layout while keeping alpha in place. - // Avx2.Shuffle is 128-bit lane-local, and the generated mask repeats inside - // each lane, so no byte ever needs to cross the lane boundary. - Vector256 shuffleMask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - - Vector256 zero = Vector256.Zero; - Vector256 one = Vector256.One; - Vector256 byteMask = Vector256.Create(0xFF); - Vector256 opaque = Vector256.Create(0xFF); - Vector256 byteMax = Vector256.Create((int)byte.MaxValue); - - for (; i <= pixelCount - 8; i += 8) - { - ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf()); - Vector256 bgra = Unsafe.ReadUnaligned>(ref blockRef); - Vector256 rgba = Vector256_.ShufflePerLane(bgra, shuffleMask); - Vector256 packed = rgba.AsInt32(); - Vector256 alpha = Vector256.ShiftRightLogical(packed, 24); - - // Fully transparent and fully opaque pixels are identity cases for - // unpremultiplication. Masking them keeps the scalar behavior and lets - // safeAlpha avoid dividing by zero for alpha == 0. - Vector256 partialMask = ~(Vector256.Equals(alpha, zero) | Vector256.Equals(alpha, opaque)); - - Vector256 r = packed & byteMask; - Vector256 g = Vector256.ShiftRightLogical(packed, 8) & byteMask; - Vector256 b = Vector256.ShiftRightLogical(packed, 16) & byteMask; - - Vector256 safeAlpha = Vector256.ConditionalSelect(partialMask, alpha, one); - Vector256 halfAlpha = Vector256.ShiftRightLogical(safeAlpha, 1); - Vector256 safeAlphaF = Vector256.ConvertToSingle(safeAlpha); - - // The scalar path computes ((c * 255) + (a >> 1)) / a with integer - // division. Floor the positive quotient before converting so SIMD does - // not use the default round-to-nearest conversion and drift by one. - Vector256 unpremultipliedR = Vector256.Min( - byteMax, - Vector256.ConvertToInt32(Vector256.Floor(Vector256.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF))); - - Vector256 unpremultipliedG = Vector256.Min( - byteMax, - Vector256.ConvertToInt32(Vector256.Floor(Vector256.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF))); - - Vector256 unpremultipliedB = Vector256.Min( - byteMax, - Vector256.ConvertToInt32(Vector256.Floor(Vector256.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF))); - - // ConditionalSelect applies the expensive unpremultiply only to pixels - // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the - // shuffled channel values exactly as the scalar path does. - Vector256 finalR = Vector256.ConditionalSelect(partialMask, unpremultipliedR, r); - Vector256 finalG = Vector256.ConditionalSelect(partialMask, unpremultipliedG, g); - Vector256 finalB = Vector256.ConditionalSelect(partialMask, unpremultipliedB, b); - - // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so - // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3 - // recreates the in-memory RGBA bytes for the unaligned store. - Vector256 result = - finalR | - Vector256.ShiftLeft(finalG, 8) | - Vector256.ShiftLeft(finalB, 16) | - Vector256.ShiftLeft(alpha, 24); - - Unsafe.WriteUnaligned(ref blockRef, result.AsByte()); - } - - return i; - } - - private static int ApplyCgbiTransformVector128(Span scanline, int startPixel, int pixelCount) - { - ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline); - int i = startPixel; - - Span temp = stackalloc byte[Vector512.Count]; - SimdUtils.Shuffle.MMShuffleSpan(ref temp, SimdUtils.Shuffle.MMShuffle3012); - - // MMShuffle3012 expands to [2, 1, 0, 3] for each 4-byte pixel, converting - // CgBI's BGRA byte order to Rgba32's RGBA layout while keeping alpha in place. - Vector128 shuffleMask = Unsafe.As>(ref MemoryMarshal.GetReference(temp)); - - Vector128 zero = Vector128.Zero; - Vector128 one = Vector128.One; - Vector128 byteMask = Vector128.Create(0xFF); - Vector128 opaque = Vector128.Create(0xFF); - Vector128 byteMax = Vector128.Create((int)byte.MaxValue); - - for (; i <= pixelCount - 4; i += 4) - { - ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf()); - Vector128 bgra = Unsafe.ReadUnaligned>(ref blockRef); - Vector128 rgba = Vector128_.ShuffleNative(bgra, shuffleMask); - Vector128 packed = rgba.AsInt32(); - Vector128 alpha = Vector128.ShiftRightLogical(packed, 24); - - // Fully transparent and fully opaque pixels are identity cases for - // unpremultiplication. Masking them keeps the scalar behavior and lets - // safeAlpha avoid dividing by zero for alpha == 0. - Vector128 partialMask = ~(Vector128.Equals(alpha, zero) | Vector128.Equals(alpha, opaque)); - - Vector128 r = packed & byteMask; - Vector128 g = Vector128.ShiftRightLogical(packed, 8) & byteMask; - Vector128 b = Vector128.ShiftRightLogical(packed, 16) & byteMask; - - Vector128 safeAlpha = Vector128.ConditionalSelect(partialMask, alpha, one); - Vector128 halfAlpha = Vector128.ShiftRightLogical(safeAlpha, 1); - Vector128 safeAlphaF = Vector128.ConvertToSingle(safeAlpha); - - // The scalar path computes ((c * 255) + (a >> 1)) / a with integer - // division. Floor the positive quotient before converting so SIMD does - // not use the default round-to-nearest conversion and drift by one. - Vector128 unpremultipliedR = Vector128.Min( - byteMax, - Vector128.ConvertToInt32(Vector128.Floor(Vector128.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF))); - - Vector128 unpremultipliedG = Vector128.Min( - byteMax, - Vector128.ConvertToInt32(Vector128.Floor(Vector128.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF))); - - Vector128 unpremultipliedB = Vector128.Min( - byteMax, - Vector128.ConvertToInt32(Vector128.Floor(Vector128.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF))); - - // ConditionalSelect applies the expensive unpremultiply only to pixels - // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the - // shuffled channel values exactly as the scalar path does. - Vector128 finalR = Vector128.ConditionalSelect(partialMask, unpremultipliedR, r); - Vector128 finalG = Vector128.ConditionalSelect(partialMask, unpremultipliedG, g); - Vector128 finalB = Vector128.ConditionalSelect(partialMask, unpremultipliedB, b); - - // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so - // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3 - // recreates the in-memory RGBA bytes for the unaligned store. - Vector128 result = - finalR | - Vector128.ShiftLeft(finalG, 8) | - Vector128.ShiftLeft(finalB, 16) | - Vector128.ShiftLeft(alpha, 24); - - Unsafe.WriteUnaligned(ref blockRef, result.AsByte()); - } - - return i; - } } diff --git a/tests/ImageSharp.Tests/Formats/Png/PngCgbiProcessorTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngCgbiProcessorTests.cs new file mode 100644 index 0000000000..426afb6d42 --- /dev/null +++ b/tests/ImageSharp.Tests/Formats/Png/PngCgbiProcessorTests.cs @@ -0,0 +1,174 @@ +// Copyright (c) Six Labors. +// Licensed under the Six Labors Split License. + +using System.Runtime.InteropServices; +using SixLabors.ImageSharp.Formats.Png; +using SixLabors.ImageSharp.PixelFormats; + +namespace SixLabors.ImageSharp.Tests.Formats.Png; + +[Trait("Format", "Png")] +public class PngCgbiProcessorTests +{ + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(3)] + [InlineData(4)] + [InlineData(7)] + [InlineData(8)] + [InlineData(15)] + [InlineData(16)] + [InlineData(17)] + [InlineData(31)] + [InlineData(32)] + [InlineData(33)] + [InlineData(64)] + public void ApplyTransform_RgbWithAlpha_MatchesScalar(int pixelCount) + { + // Drives the full V512/V256/V128/scalar dispatch, so it covers each + // path that is hardware-accelerated on the host plus the scalar tail. + byte[] input = CreateBgraScanline(pixelCount); + byte[] processorOutput = (byte[])input.Clone(); + byte[] scalarOutput = (byte[])input.Clone(); + + PngCgbiProcessor.ApplyTransform(Configuration.Default, processorOutput, PngColorType.RgbWithAlpha); + ApplyCgbiTransformScalarReference(scalarOutput); + + Assert.Equal(scalarOutput, processorOutput); + } + + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(3)] + [InlineData(4)] + [InlineData(7)] + [InlineData(8)] + [InlineData(15)] + [InlineData(16)] + [InlineData(17)] + [InlineData(31)] + [InlineData(32)] + [InlineData(33)] + [InlineData(64)] + public void ApplyTransformVector512_MatchesScalar(int pixelCount) => + // Vector512 uses Vector512_.ShuffleNative which falls back to the software + // Vector512.Shuffle when Avx512BW is unavailable, so the body runs regardless + // of whether Vector512 is hardware-accelerated on the host. + AssertVectorMatchesScalar( + pixelCount, + scanline => PngCgbiProcessor.ApplyTransformVector512(scanline, scanline.Length / 4), + blockSize: 16); + + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(3)] + [InlineData(4)] + [InlineData(7)] + [InlineData(8)] + [InlineData(15)] + [InlineData(16)] + [InlineData(17)] + [InlineData(31)] + [InlineData(32)] + [InlineData(64)] + public void ApplyTransformVector256_MatchesScalar(int pixelCount) => AssertVectorMatchesScalar( + pixelCount, + scanline => PngCgbiProcessor.ApplyTransformVector256(scanline, 0, scanline.Length / 4), + blockSize: 8); + + [Theory] + [InlineData(0)] + [InlineData(1)] + [InlineData(3)] + [InlineData(4)] + [InlineData(7)] + [InlineData(8)] + [InlineData(15)] + [InlineData(16)] + [InlineData(64)] + public void ApplyTransformVector128_MatchesScalar(int pixelCount) => AssertVectorMatchesScalar( + pixelCount, + scanline => PngCgbiProcessor.ApplyTransformVector128(scanline, 0, scanline.Length / 4), + blockSize: 4); + + private static void AssertVectorMatchesScalar(int pixelCount, Func applyVector, int blockSize) + { + byte[] input = CreateBgraScanline(pixelCount); + byte[] vectorOutput = (byte[])input.Clone(); + byte[] scalarOutput = (byte[])input.Clone(); + + int processed = applyVector(vectorOutput); + + int expectedProcessed = (pixelCount / blockSize) * blockSize; + Assert.Equal(expectedProcessed, processed); + + // The vector path is responsible for whole blocks only; remaining pixels are + // handled by the scalar tail in ApplyTransform. Run the scalar reference + // over every pixel and compare the prefix the vector path actually wrote. + ApplyCgbiTransformScalarReference(scalarOutput); + + Span vectorProcessed = vectorOutput.AsSpan(0, processed * 4); + Span scalarProcessed = scalarOutput.AsSpan(0, processed * 4); + Assert.True(vectorProcessed.SequenceEqual(scalarProcessed), $"Mismatch at pixelCount={pixelCount}"); + + // Pixels past the vector's processed prefix must be untouched. + Span vectorTail = vectorOutput.AsSpan(processed * 4); + Span inputTail = input.AsSpan(processed * 4); + Assert.True(vectorTail.SequenceEqual(inputTail)); + } + + private static byte[] CreateBgraScanline(int pixelCount) + { + // Deterministic mix of edge cases (a=0, a=255, partial alpha) and varied channels. + byte[] bytes = new byte[pixelCount * 4]; + for (int p = 0; p < pixelCount; p++) + { + byte a = (p % 7) switch + { + 0 => byte.MinValue, + 1 => byte.MaxValue, + _ => (byte)((((p * 37) + 23) & 0xFF) | 1) // never zero + }; + + // CgBI premultiplied BGRA: c' = c * a / 255 + byte r = (byte)((p * 13) & 0xFF); + byte g = (byte)((p * 29) & 0xFF); + byte b = (byte)((p * 53) & 0xFF); + r = (byte)((r * a) / byte.MaxValue); + g = (byte)((g * a) / byte.MaxValue); + b = (byte)((b * a) / byte.MaxValue); + + bytes[(p * 4) + 0] = b; + bytes[(p * 4) + 1] = g; + bytes[(p * 4) + 2] = r; + bytes[(p * 4) + 3] = a; + } + + return bytes; + } + + private static void ApplyCgbiTransformScalarReference(Span scanline) + { + Span pixels = MemoryMarshal.Cast(scanline); + for (int i = 0; i < pixels.Length; i++) + { + ref Rgba32 pixel = ref pixels[i]; + pixel = new Rgba32(pixel.B, pixel.G, pixel.R, pixel.A); + + byte a = pixel.A; + if (a is 0 or byte.MaxValue) + { + continue; + } + + int half = a >> 1; + byte r = (byte)Math.Min(byte.MaxValue, ((pixel.R * byte.MaxValue) + half) / a); + byte g = (byte)Math.Min(byte.MaxValue, ((pixel.G * byte.MaxValue) + half) / a); + byte b = (byte)Math.Min(byte.MaxValue, ((pixel.B * byte.MaxValue) + half) / a); + pixel = new Rgba32(r, g, b, a); + } + } +} diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs index 2e452b896d..2fbbe695e9 100644 --- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs +++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs @@ -767,6 +767,28 @@ public void Identify_AppleCgBI(string imagePath, int expectedWidth, int expected Assert.Equal(expectedHeight, imageInfo.Height); } + [Theory] + [InlineData(TestImages.Png.Cgbi.BitDepth16)] + [InlineData(TestImages.Png.Cgbi.Palette)] + public void Identify_CgBI_IncompatibleHeader_ThrowsInvalidImageContentException(string imagePath) + { + TestFile testFile = TestFile.Create(imagePath); + using MemoryStream stream = new(testFile.Bytes, false); + InvalidImageContentException ex = Assert.Throws(() => Image.Identify(stream)); + Assert.Contains("CgBI is only supported for 8-bit truecolor images", ex.Message); + } + + [Theory] + [WithFile(TestImages.Png.Cgbi.BitDepth16, PixelTypes.Rgba32)] + [WithFile(TestImages.Png.Cgbi.Palette, PixelTypes.Rgba32)] + public void Decode_CgBI_IncompatibleHeader_ThrowsInvalidImageContentException(TestImageProvider provider) + where TPixel : unmanaged, IPixel + { + InvalidImageContentException ex = Assert.Throws( + () => { using Image image = provider.GetImage(PngDecoder.Instance); }); + Assert.Contains("CgBI is only supported for 8-bit truecolor images", ex.Message); + } + [Theory] [WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)] [WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)] diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs index 1b6ae56850..7b43ab262c 100644 --- a/tests/ImageSharp.Tests/TestImages.cs +++ b/tests/ImageSharp.Tests/TestImages.cs @@ -189,6 +189,10 @@ public static class Cgbi // Issue 410: https://github.com/SixLabors/ImageSharp/issues/410 public const string Issue410 = "Png/issues/Issue_410.png"; + + // Synthetic fixtures derived from colors.png to exercise CgBI validation. + public const string BitDepth16 = "Png/cgbi/colors-cgbi-bitdepth16.png"; + public const string Palette = "Png/cgbi/colors-cgbi-palette.png"; } public static class Bad diff --git a/tests/Images/Input/Png/cgbi/colors-cgbi-bitdepth16.png b/tests/Images/Input/Png/cgbi/colors-cgbi-bitdepth16.png new file mode 100644 index 0000000000..18cfa9246d --- /dev/null +++ b/tests/Images/Input/Png/cgbi/colors-cgbi-bitdepth16.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59610bc03f6ca867e5f71c574b3a0d1942c9e3a230c8a32bf3007cb82f286866 +size 12853 diff --git a/tests/Images/Input/Png/cgbi/colors-cgbi-palette.png b/tests/Images/Input/Png/cgbi/colors-cgbi-palette.png new file mode 100644 index 0000000000..f6406559b1 --- /dev/null +++ b/tests/Images/Input/Png/cgbi/colors-cgbi-palette.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a2f20c69ae423523a8f41887e3f37257a338f2220c2ea44d35c87daf8c3aa3 +size 12853