diff --git a/src/ImageSharp/Compression/Zlib/ChunkedReadStream.cs b/src/ImageSharp/Compression/Zlib/ChunkedReadStream.cs
new file mode 100644
index 0000000000..b697327fff
--- /dev/null
+++ b/src/ImageSharp/Compression/Zlib/ChunkedReadStream.cs
@@ -0,0 +1,119 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using SixLabors.ImageSharp.IO;
+
+namespace SixLabors.ImageSharp.Compression.Zlib;
+
+///
+/// A read-only stream over a sequence of length-delimited segments. Bytes are
+/// pulled from the inner stream up to the current segment's remaining length;
+/// when the segment is exhausted the supplied delegate is invoked to advance
+/// to the next segment and return its length. The inner stream is not owned
+/// and is not disposed.
+///
+internal sealed class ChunkedReadStream : Stream
+{
+ private static readonly Func GetDataNoOp = () => 0;
+
+ private readonly BufferedReadStream innerStream;
+ private readonly Func getData;
+ private int currentDataRemaining;
+
+ public ChunkedReadStream(BufferedReadStream innerStream)
+ : this(innerStream, GetDataNoOp)
+ {
+ }
+
+ public ChunkedReadStream(BufferedReadStream innerStream, Func getData)
+ {
+ this.innerStream = innerStream;
+ this.getData = getData;
+ }
+
+ ///
+ public override bool CanRead => this.innerStream.CanRead;
+
+ ///
+ public override bool CanSeek => false;
+
+ ///
+ public override bool CanWrite => throw new NotSupportedException();
+
+ ///
+ public override long Length => throw new NotSupportedException();
+
+ ///
+ public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); }
+
+ ///
+ /// Sets the number of bytes available to read from the current segment.
+ /// Must be called before reading each segment.
+ ///
+ public void SetCurrentSegmentLength(int bytes) => this.currentDataRemaining = bytes;
+
+ ///
+ public override void Flush() => throw new NotSupportedException();
+
+ ///
+ public override int ReadByte()
+ {
+ if (this.currentDataRemaining is 0)
+ {
+ this.currentDataRemaining = this.getData();
+ if (this.currentDataRemaining is 0)
+ {
+ return -1;
+ }
+ }
+
+ int value = this.innerStream.ReadByte();
+ if (value is not -1)
+ {
+ this.currentDataRemaining--;
+ }
+
+ return value;
+ }
+
+ ///
+ public override int Read(byte[] buffer, int offset, int count)
+ {
+ // Decrement currentDataRemaining only by bytes actually returned by
+ // innerStream.Read; a short read otherwise underflows the segment
+ // counter and triggers getData() before the segment is truly drained.
+ int totalBytesRead = 0;
+ while (totalBytesRead < count)
+ {
+ if (this.currentDataRemaining is 0)
+ {
+ this.currentDataRemaining = this.getData();
+ if (this.currentDataRemaining is 0)
+ {
+ break;
+ }
+ }
+
+ int bytesToRead = Math.Min(count - totalBytesRead, this.currentDataRemaining);
+ int bytesRead = this.innerStream.Read(buffer, offset + totalBytesRead, bytesToRead);
+ if (bytesRead is 0)
+ {
+ break;
+ }
+
+ this.currentDataRemaining -= bytesRead;
+ totalBytesRead += bytesRead;
+ }
+
+ return totalBytesRead;
+ }
+
+ ///
+ public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
+
+ ///
+ public override void SetLength(long value) => throw new NotSupportedException();
+
+ ///
+ public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();
+}
diff --git a/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs b/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs
index 513171b179..11f34dac8a 100644
--- a/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs
+++ b/src/ImageSharp/Compression/Zlib/ZlibInflateStream.cs
@@ -8,9 +8,11 @@
namespace SixLabors.ImageSharp.Compression.Zlib;
///
-/// Provides methods and properties for deframing streams from PNGs.
+/// Reads chunked input, parses the zlib CMF/FLG header, and exposes a
+/// over the remaining DEFLATE payload. The
+/// Adler-32 trailer is not validated.
///
-internal sealed class ZlibInflateStream : Stream
+internal sealed class ZlibInflateStream : IDisposable
{
///
/// Used to read the Adler-32 and Crc-32 checksums.
@@ -19,94 +21,13 @@ internal sealed class ZlibInflateStream : Stream
///
private static readonly byte[] ChecksumBuffer = new byte[4];
- ///
- /// A default delegate to get more data from the inner stream.
- ///
- private static readonly Func GetDataNoOp = () => 0;
-
- ///
- /// The inner raw memory stream.
- ///
- private readonly BufferedReadStream innerStream;
-
- ///
- /// A value indicating whether this instance of the given entity has been disposed.
- ///
- /// if this instance has been disposed; otherwise, .
- ///
- /// If the entity is disposed, it must not be disposed a second
- /// time. The isDisposed field is set the first time the entity
- /// is disposed. If the isDisposed field is true, then the Dispose()
- /// method will not dispose again. This help not to prolong the entity's
- /// life in the Garbage Collector.
- ///
- private bool isDisposed;
-
- ///
- /// The current data remaining to be read.
- ///
- private int currentDataRemaining;
-
- ///
- /// Delegate to get more data once we've exhausted the current data remaining.
- ///
- private readonly Func getData;
-
- ///
- /// When true, the inflated payload is treated as a raw DEFLATE stream with no zlib
- /// CMF/FLG header (and no Adler-32 trailer). This is required to decode IDATs in
- /// Apple's proprietary CgBI PNG variant.
- ///
- private readonly bool noHeader;
+ private readonly ChunkedReadStream segmentStream;
- ///
- /// Initializes a new instance of the class.
- ///
- /// The inner raw stream.
public ZlibInflateStream(BufferedReadStream innerStream)
- : this(innerStream, GetDataNoOp, noHeader: false)
- {
- }
+ => this.segmentStream = new ChunkedReadStream(innerStream);
- ///
- /// Initializes a new instance of the class.
- ///
- /// The inner raw stream.
- /// A delegate to get more data from the inner stream.
public ZlibInflateStream(BufferedReadStream innerStream, Func getData)
- : this(innerStream, getData, noHeader: false)
- {
- }
-
- ///
- /// Initializes a new instance of the class.
- ///
- /// The inner raw stream.
- /// A delegate to get more data from the inner stream.
- ///
- /// When , the payload is treated as raw DEFLATE with no zlib header.
- ///
- public ZlibInflateStream(BufferedReadStream innerStream, Func getData, bool noHeader)
- {
- this.innerStream = innerStream;
- this.getData = getData;
- this.noHeader = noHeader;
- }
-
- ///
- public override bool CanRead => this.innerStream.CanRead;
-
- ///
- public override bool CanSeek => false;
-
- ///
- public override bool CanWrite => throw new NotSupportedException();
-
- ///
- public override long Length => throw new NotSupportedException();
-
- ///
- public override long Position { get => throw new NotSupportedException(); set => throw new NotSupportedException(); }
+ => this.segmentStream = new ChunkedReadStream(innerStream, getData);
///
/// Gets the compressed stream over the deframed inner stream.
@@ -114,15 +35,16 @@ public ZlibInflateStream(BufferedReadStream innerStream, Func getData, bool
public DeflateStream? CompressedStream { get; private set; }
///
- /// Adds new bytes from a frame found in the original stream.
+ /// Sets the length of the next segment of compressed input and, on first
+ /// call, parses the zlib header.
///
- /// The current remaining data according to the chunk length.
- /// Whether the chunk to be inflated is a critical chunk.
+ /// The remaining data length for the current segment.
+ /// Whether to throw on a malformed zlib header.
/// The .
[MemberNotNullWhen(true, nameof(CompressedStream))]
public bool AllocateNewBytes(int bytes, bool isCriticalChunk)
{
- this.currentDataRemaining = bytes;
+ this.segmentStream.SetCurrentSegmentLength(bytes);
if (this.CompressedStream is null)
{
return this.InitializeInflateStream(isCriticalChunk);
@@ -131,114 +53,15 @@ public bool AllocateNewBytes(int bytes, bool isCriticalChunk)
return true;
}
- ///
- public override void Flush() => throw new NotSupportedException();
-
- ///
- public override int ReadByte()
+ public void Dispose()
{
- this.currentDataRemaining--;
- return this.innerStream.ReadByte();
- }
-
- ///
- public override int Read(byte[] buffer, int offset, int count)
- {
- if (this.currentDataRemaining is 0)
- {
- // Last buffer was read in its entirety, let's make sure we don't actually have more in additional IDAT chunks.
- this.currentDataRemaining = this.getData();
-
- if (this.currentDataRemaining is 0)
- {
- return 0;
- }
- }
-
- int bytesToRead = Math.Min(count, this.currentDataRemaining);
- this.currentDataRemaining -= bytesToRead;
- int totalBytesRead = this.innerStream.Read(buffer, offset, bytesToRead);
- long innerStreamLength = this.innerStream.Length;
-
- // Keep reading data until we've reached the end of the stream or filled the buffer.
- int bytesRead = 0;
- offset += totalBytesRead;
- while (this.currentDataRemaining is 0 && totalBytesRead < count)
- {
- this.currentDataRemaining = this.getData();
-
- if (this.currentDataRemaining is 0)
- {
- return totalBytesRead;
- }
-
- offset += bytesRead;
-
- if (offset >= innerStreamLength || offset >= count)
- {
- return totalBytesRead;
- }
-
- bytesToRead = Math.Min(count - totalBytesRead, this.currentDataRemaining);
- this.currentDataRemaining -= bytesToRead;
- bytesRead = this.innerStream.Read(buffer, offset, bytesToRead);
- if (bytesRead == 0)
- {
- return totalBytesRead;
- }
-
- totalBytesRead += bytesRead;
- }
-
- return totalBytesRead;
- }
-
- ///
- public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
-
- ///
- public override void SetLength(long value) => throw new NotSupportedException();
-
- ///
- public override void Write(byte[] buffer, int offset, int count) => throw new NotSupportedException();
-
- ///
- protected override void Dispose(bool disposing)
- {
- if (this.isDisposed)
- {
- return;
- }
-
- if (disposing)
- {
- // Dispose managed resources.
- if (this.CompressedStream != null)
- {
- this.CompressedStream.Dispose();
- this.CompressedStream = null;
- }
- }
-
- base.Dispose(disposing);
-
- // Call the appropriate methods to clean up
- // unmanaged resources here.
- // Note disposing is done.
- this.isDisposed = true;
+ this.CompressedStream?.Dispose();
+ this.segmentStream?.Dispose();
}
[MemberNotNullWhen(true, nameof(CompressedStream))]
private bool InitializeInflateStream(bool isCriticalChunk)
{
- // Apple CgBI IDATs omit the zlib CMF/FLG header and the Adler-32 trailer,
- // wrapping a raw DEFLATE payload directly. Skip the header parsing in that mode.
- if (this.noHeader)
- {
- this.CompressedStream = new DeflateStream(this, CompressionMode.Decompress, true);
- return true;
- }
-
// Read the zlib header : http://tools.ietf.org/html/rfc1950
// CMF(Compression Method and flags)
// This byte is divided into a 4 - bit compression method and a
@@ -250,9 +73,8 @@ private bool InitializeInflateStream(bool isCriticalChunk)
// +---+---+
// |CMF|FLG|
// +---+---+
- int cmf = this.innerStream.ReadByte();
- int flag = this.innerStream.ReadByte();
- this.currentDataRemaining -= 2;
+ int cmf = this.segmentStream.ReadByte();
+ int flag = this.segmentStream.ReadByte();
if (cmf == -1 || flag == -1)
{
return false;
@@ -290,16 +112,13 @@ private bool InitializeInflateStream(bool isCriticalChunk)
{
// We don't need this for inflate so simply skip by the next four bytes.
// https://tools.ietf.org/html/rfc1950#page-6
- if (this.innerStream.Read(ChecksumBuffer, 0, 4) != 4)
+ if (this.segmentStream.Read(ChecksumBuffer, 0, 4) != 4)
{
return false;
}
-
- this.currentDataRemaining -= 4;
}
- // Initialize the deflate BufferedReadStream.
- this.CompressedStream = new DeflateStream(this, CompressionMode.Decompress, true);
+ this.CompressedStream = new DeflateStream(this.segmentStream, CompressionMode.Decompress, leaveOpen: true);
return true;
}
diff --git a/src/ImageSharp/Formats/Png/PngCgbiProcessor.cs b/src/ImageSharp/Formats/Png/PngCgbiProcessor.cs
new file mode 100644
index 0000000000..e4847ad67f
--- /dev/null
+++ b/src/ImageSharp/Formats/Png/PngCgbiProcessor.cs
@@ -0,0 +1,319 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using SixLabors.ImageSharp.Common.Helpers;
+using SixLabors.ImageSharp.PixelFormats;
+using static SixLabors.ImageSharp.SimdUtils;
+
+namespace SixLabors.ImageSharp.Formats.Png;
+
+///
+/// Reverses the pixel mangling applied by Apple's CgBI PNG variant. CgBI files
+/// (emitted by pngcrush -iphone) swap channel order from RGB(A) to BGR(A)
+/// and premultiply RGB samples by alpha. This converts a defiltered scanline back
+/// to standard PNG semantics in place so the existing scanline processors can
+/// consume it unchanged. CgBI is only emitted for 8-bit truecolor (with or
+/// without alpha); other color types are left alone.
+///
+///
+/// See https://theapplewiki.com/wiki/PNG_CgBI_Format
+///
+internal static class PngCgbiProcessor
+{
+ // Per-pixel byte indices that swap CgBI's BGRA layout to Rgba32's RGBA.
+ // MMShuffle3012 expands to [2, 1, 0, 3] per 4-byte pixel; the same 64-byte
+ // sequence seeds all three shuffle masks (Vector128/256 take a leading slice).
+ private static readonly byte[] BgraToRgbaShuffleBytes = BuildShuffleBytes();
+
+ private static readonly Vector128 BgraToRgbaShuffle128 = Vector128.Create(new ReadOnlySpan(BgraToRgbaShuffleBytes, 0, Vector128.Count));
+
+ private static readonly Vector256 BgraToRgbaShuffle256 = Vector256.Create(new ReadOnlySpan(BgraToRgbaShuffleBytes, 0, Vector256.Count));
+
+ private static readonly Vector512 BgraToRgbaShuffle512 = Vector512.Create(BgraToRgbaShuffleBytes);
+
+ ///
+ /// Applies the inverse of Apple's CgBI pixel mangling to a defiltered scanline in place.
+ ///
+ /// The configuration used by the Rgb24 R/B swap.
+ /// The defiltered pixel bytes (without the leading filter byte).
+ /// The PNG color type from IHDR.
+ public static void ApplyTransform(Configuration configuration, Span scanline, PngColorType colorType)
+ {
+ if (colorType == PngColorType.RgbWithAlpha)
+ {
+ Span pixels = MemoryMarshal.Cast(scanline);
+ int i = 0;
+
+ if (Vector512.IsHardwareAccelerated && pixels.Length >= Vector512.Count)
+ {
+ i = ApplyTransformVector512(scanline, pixels.Length);
+ }
+
+ if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && (pixels.Length - i) >= Vector256.Count)
+ {
+ i = ApplyTransformVector256(scanline, i, pixels.Length);
+ }
+
+ if (Vector128.IsHardwareAccelerated && (pixels.Length - i) >= Vector128.Count)
+ {
+ i = ApplyTransformVector128(scanline, i, pixels.Length);
+ }
+
+ for (; i < pixels.Length; i++)
+ {
+ ref Rgba32 pixel = ref pixels[i];
+ pixel = new Rgba32(pixel.B, pixel.G, pixel.R, pixel.A);
+ UndoPremultiplicationScalar(ref pixel);
+ }
+ }
+ else if (colorType == PngColorType.Rgb)
+ {
+ // No alpha channel, so just swap R and B using built in SIMD-optimized pixel operations.
+ Span target = MemoryMarshal.Cast(scanline);
+ PixelOperations.Instance.FromBgr24Bytes(configuration, scanline, target, target.Length);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void UndoPremultiplicationScalar(ref Rgba32 pixel)
+ {
+ byte a = pixel.A;
+ if (a is 0 or byte.MaxValue)
+ {
+ return;
+ }
+
+ // Reverse: c' = c * a / 255 => c = round(c' * 255 / a)
+ int half = a >> 1;
+ byte r = (byte)Math.Min(byte.MaxValue, ((pixel.R * byte.MaxValue) + half) / a);
+ byte g = (byte)Math.Min(byte.MaxValue, ((pixel.G * byte.MaxValue) + half) / a);
+ byte b = (byte)Math.Min(byte.MaxValue, ((pixel.B * byte.MaxValue) + half) / a);
+ pixel = new Rgba32(r, g, b, a);
+ }
+
+ internal static int ApplyTransformVector512(Span scanline, int pixelCount)
+ {
+ ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline);
+ int i = 0;
+
+ // Indices stay within their own 4-byte pixel, so the per-pixel pattern
+ // is also valid under the per-128-bit-lane vpshufb that ShuffleNative
+ // selects on AVX-512BW hosts.
+ Vector512 shuffleMask = BgraToRgbaShuffle512;
+
+ Vector512 zero = Vector512.Zero;
+ Vector512 one = Vector512.One;
+ Vector512 byteMax = Vector512.Create((int)byte.MaxValue);
+
+ for (; i <= pixelCount - Vector512.Count; i += Vector512.Count)
+ {
+ ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf());
+ Vector512 bgra = Unsafe.ReadUnaligned>(ref blockRef);
+ Vector512 rgba = Vector512_.ShuffleNative(bgra, shuffleMask);
+ Vector512 packed = rgba.AsInt32();
+ Vector512 alpha = Vector512.ShiftRightLogical(packed, 24);
+
+ // Fully transparent and fully opaque pixels are identity cases for
+ // unpremultiplication. Masking them keeps the scalar behavior and lets
+ // safeAlpha avoid dividing by zero for alpha == 0.
+ Vector512 partialMask = ~(Vector512.Equals(alpha, zero) | Vector512.Equals(alpha, byteMax));
+
+ Vector512 r = packed & byteMax;
+ Vector512 g = Vector512.ShiftRightLogical(packed, 8) & byteMax;
+ Vector512 b = Vector512.ShiftRightLogical(packed, 16) & byteMax;
+
+ Vector512 safeAlpha = Vector512.ConditionalSelect(partialMask, alpha, one);
+ Vector512 halfAlpha = Vector512.ShiftRightLogical(safeAlpha, 1);
+ Vector512 safeAlphaF = Vector512.ConvertToSingle(safeAlpha);
+
+ // ConvertToInt32 truncates toward zero (cvttps2dq / fcvtzs); since
+ // every quotient here is non-negative, that matches the scalar
+ // ((c * 255) + (a >> 1)) / a integer-division floor.
+ Vector512 unpremultipliedR = Vector512.Min(
+ byteMax,
+ Vector512.ConvertToInt32(Vector512.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF));
+
+ Vector512 unpremultipliedG = Vector512.Min(
+ byteMax,
+ Vector512.ConvertToInt32(Vector512.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF));
+
+ Vector512 unpremultipliedB = Vector512.Min(
+ byteMax,
+ Vector512.ConvertToInt32(Vector512.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF));
+
+ // ConditionalSelect applies the expensive unpremultiply only to pixels
+ // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the
+ // shuffled channel values exactly as the scalar path does.
+ Vector512 finalR = Vector512.ConditionalSelect(partialMask, unpremultipliedR, r);
+ Vector512 finalG = Vector512.ConditionalSelect(partialMask, unpremultipliedG, g);
+ Vector512 finalB = Vector512.ConditionalSelect(partialMask, unpremultipliedB, b);
+
+ // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so
+ // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3
+ // recreates the in-memory RGBA bytes for the unaligned store.
+ Vector512 result =
+ finalR |
+ Vector512.ShiftLeft(finalG, 8) |
+ Vector512.ShiftLeft(finalB, 16) |
+ Vector512.ShiftLeft(alpha, 24);
+
+ Unsafe.WriteUnaligned(ref blockRef, result.AsByte());
+ }
+
+ return i;
+ }
+
+ internal static int ApplyTransformVector256(Span scanline, int startPixel, int pixelCount)
+ {
+ ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline);
+ int i = startPixel;
+
+ // vpshufb is 128-bit lane-local and uses only the low 4 bits of each
+ // index, so the same per-pixel [2,1,0,3] pattern in both lanes keeps
+ // every byte inside its own lane.
+ Vector256 shuffleMask = BgraToRgbaShuffle256;
+
+ Vector256 zero = Vector256.Zero;
+ Vector256 one = Vector256.One;
+ Vector256 byteMax = Vector256.Create((int)byte.MaxValue);
+
+ for (; i <= pixelCount - Vector256.Count; i += Vector256.Count)
+ {
+ ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf());
+ Vector256 bgra = Unsafe.ReadUnaligned>(ref blockRef);
+ Vector256 rgba = Vector256_.ShufflePerLane(bgra, shuffleMask);
+ Vector256 packed = rgba.AsInt32();
+ Vector256 alpha = Vector256.ShiftRightLogical(packed, 24);
+
+ // Fully transparent and fully opaque pixels are identity cases for
+ // unpremultiplication. Masking them keeps the scalar behavior and lets
+ // safeAlpha avoid dividing by zero for alpha == 0.
+ Vector256 partialMask = ~(Vector256.Equals(alpha, zero) | Vector256.Equals(alpha, byteMax));
+
+ Vector256 r = packed & byteMax;
+ Vector256 g = Vector256.ShiftRightLogical(packed, 8) & byteMax;
+ Vector256 b = Vector256.ShiftRightLogical(packed, 16) & byteMax;
+
+ Vector256 safeAlpha = Vector256.ConditionalSelect(partialMask, alpha, one);
+ Vector256 halfAlpha = Vector256.ShiftRightLogical(safeAlpha, 1);
+ Vector256 safeAlphaF = Vector256.ConvertToSingle(safeAlpha);
+
+ // ConvertToInt32 truncates toward zero (cvttps2dq / fcvtzs); since
+ // every quotient here is non-negative, that matches the scalar
+ // ((c * 255) + (a >> 1)) / a integer-division floor.
+ Vector256 unpremultipliedR = Vector256.Min(
+ byteMax,
+ Vector256.ConvertToInt32(Vector256.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF));
+
+ Vector256 unpremultipliedG = Vector256.Min(
+ byteMax,
+ Vector256.ConvertToInt32(Vector256.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF));
+
+ Vector256 unpremultipliedB = Vector256.Min(
+ byteMax,
+ Vector256.ConvertToInt32(Vector256.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF));
+
+ // ConditionalSelect applies the expensive unpremultiply only to pixels
+ // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the
+ // shuffled channel values exactly as the scalar path does.
+ Vector256 finalR = Vector256.ConditionalSelect(partialMask, unpremultipliedR, r);
+ Vector256 finalG = Vector256.ConditionalSelect(partialMask, unpremultipliedG, g);
+ Vector256 finalB = Vector256.ConditionalSelect(partialMask, unpremultipliedB, b);
+
+ // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so
+ // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3
+ // recreates the in-memory RGBA bytes for the unaligned store.
+ Vector256 result =
+ finalR |
+ Vector256.ShiftLeft(finalG, 8) |
+ Vector256.ShiftLeft(finalB, 16) |
+ Vector256.ShiftLeft(alpha, 24);
+
+ Unsafe.WriteUnaligned(ref blockRef, result.AsByte());
+ }
+
+ return i;
+ }
+
+ internal static int ApplyTransformVector128(Span scanline, int startPixel, int pixelCount)
+ {
+ ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline);
+ int i = startPixel;
+
+ Vector128 shuffleMask = BgraToRgbaShuffle128;
+
+ Vector128 zero = Vector128.Zero;
+ Vector128 one = Vector128.One;
+ Vector128 byteMax = Vector128.Create((int)byte.MaxValue);
+
+ for (; i <= pixelCount - Vector128.Count; i += Vector128.Count)
+ {
+ ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf());
+ Vector128 bgra = Unsafe.ReadUnaligned>(ref blockRef);
+ Vector128 rgba = Vector128_.ShuffleNative(bgra, shuffleMask);
+ Vector128 packed = rgba.AsInt32();
+ Vector128 alpha = Vector128.ShiftRightLogical(packed, 24);
+
+ // Fully transparent and fully opaque pixels are identity cases for
+ // unpremultiplication. Masking them keeps the scalar behavior and lets
+ // safeAlpha avoid dividing by zero for alpha == 0.
+ Vector128 partialMask = ~(Vector128.Equals(alpha, zero) | Vector128.Equals(alpha, byteMax));
+
+ Vector128 r = packed & byteMax;
+ Vector128 g = Vector128.ShiftRightLogical(packed, 8) & byteMax;
+ Vector128 b = Vector128.ShiftRightLogical(packed, 16) & byteMax;
+
+ Vector128 safeAlpha = Vector128.ConditionalSelect(partialMask, alpha, one);
+ Vector128 halfAlpha = Vector128.ShiftRightLogical(safeAlpha, 1);
+ Vector128 safeAlphaF = Vector128.ConvertToSingle(safeAlpha);
+
+ // ConvertToInt32 truncates toward zero (cvttps2dq / fcvtzs); since
+ // every quotient here is non-negative, that matches the scalar
+ // ((c * 255) + (a >> 1)) / a integer-division floor.
+ Vector128 unpremultipliedR = Vector128.Min(
+ byteMax,
+ Vector128.ConvertToInt32(Vector128.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF));
+
+ Vector128 unpremultipliedG = Vector128.Min(
+ byteMax,
+ Vector128.ConvertToInt32(Vector128.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF));
+
+ Vector128 unpremultipliedB = Vector128.Min(
+ byteMax,
+ Vector128.ConvertToInt32(Vector128.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF));
+
+ // ConditionalSelect applies the expensive unpremultiply only to pixels
+ // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the
+ // shuffled channel values exactly as the scalar path does.
+ Vector128 finalR = Vector128.ConditionalSelect(partialMask, unpremultipliedR, r);
+ Vector128 finalG = Vector128.ConditionalSelect(partialMask, unpremultipliedG, g);
+ Vector128 finalB = Vector128.ConditionalSelect(partialMask, unpremultipliedB, b);
+
+ // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so
+ // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3
+ // recreates the in-memory RGBA bytes for the unaligned store.
+ Vector128 result =
+ finalR |
+ Vector128.ShiftLeft(finalG, 8) |
+ Vector128.ShiftLeft(finalB, 16) |
+ Vector128.ShiftLeft(alpha, 24);
+
+ Unsafe.WriteUnaligned(ref blockRef, result.AsByte());
+ }
+
+ return i;
+ }
+
+ private static byte[] BuildShuffleBytes()
+ {
+ byte[] bytes = new byte[Vector512.Count];
+ Span span = bytes;
+ Shuffle.MMShuffleSpan(ref span, Shuffle.MMShuffle3012);
+
+ return bytes;
+ }
+}
diff --git a/src/ImageSharp/Formats/Png/PngDecoderCore.cs b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
index 84245254a2..f3e2bbdbe0 100644
--- a/src/ImageSharp/Formats/Png/PngDecoderCore.cs
+++ b/src/ImageSharp/Formats/Png/PngDecoderCore.cs
@@ -9,8 +9,6 @@
using System.IO.Hashing;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics;
-using System.Runtime.Intrinsics.X86;
using System.Text;
using SixLabors.ImageSharp.Common.Helpers;
using SixLabors.ImageSharp.Compression.Zlib;
@@ -767,7 +765,7 @@ private int CalculateScanlineLength(int width)
/// The length of the chunk that containing the compressed scanline data.
/// The pixel data.
/// The png metadata
- /// A delegate to get more data from the inner stream for .
+ /// A delegate to get more data from the inner stream when chunk boundaries are crossed.
/// The frame control
/// The cancellation token.
private void ReadScanlines(
@@ -779,14 +777,34 @@ private void ReadScanlines(
CancellationToken cancellationToken)
where TPixel : unmanaged, IPixel
{
- using ZlibInflateStream inflateStream = new(this.currentStream, getData, noHeader: this.isCgbi);
+ // CgBI IDATs wrap a raw DEFLATE payload directly (no zlib CMF/FLG header
+ // and no Adler-32 trailer); skip the zlib header parser entirely.
+ if (this.isCgbi)
+ {
+ using ChunkedReadStream segmentStream = new(this.currentStream, getData);
+ segmentStream.SetCurrentSegmentLength(chunkLength);
+ using DeflateStream cgbiDataStream = new(segmentStream, CompressionMode.Decompress, leaveOpen: true);
+ this.DecodeFromDeflate(cgbiDataStream, image, pngMetadata, frameControl, cancellationToken);
+ return;
+ }
+
+ using ZlibInflateStream inflateStream = new(this.currentStream, getData);
if (!inflateStream.AllocateNewBytes(chunkLength, !this.hasImageData))
{
return;
}
- DeflateStream dataStream = inflateStream.CompressedStream!;
+ this.DecodeFromDeflate(inflateStream.CompressedStream!, image, pngMetadata, frameControl, cancellationToken);
+ }
+ private void DecodeFromDeflate(
+ DeflateStream dataStream,
+ ImageFrame image,
+ PngMetadata pngMetadata,
+ in FrameControl frameControl,
+ CancellationToken cancellationToken)
+ where TPixel : unmanaged, IPixel
+ {
if (this.header.InterlaceMethod is PngInterlaceMode.Adam7)
{
this.DecodeInterlacedPixelData(frameControl, dataStream, image, pngMetadata, cancellationToken);
@@ -902,7 +920,7 @@ private void DecodePixelDataCore(
if (this.isCgbi)
{
- this.ApplyCgbiTransform(scanSpan[1..], this.pngColorType);
+ PngCgbiProcessor.ApplyTransform(this.configuration, scanSpan[1..], this.pngColorType);
}
this.ProcessDefilteredScanline(frameControl, currentRow, scanSpan, imageFrame, pngMetadata, blendRowBuffer);
@@ -1037,7 +1055,7 @@ private void DecodeInterlacedPixelDataCore(
if (this.isCgbi)
{
- this.ApplyCgbiTransform(scanSpan[1..], this.pngColorType);
+ PngCgbiProcessor.ApplyTransform(this.configuration, scanSpan[1..], this.pngColorType);
}
Span rowSpan = imageBuffer.DangerousGetRowSpan(currentRow);
@@ -1431,6 +1449,22 @@ private void ReadHeaderChunk(PngMetadata pngMetadata, ReadOnlySpan data)
this.pngColorType = this.header.ColorType;
this.Dimensions = new Size(this.header.Width, this.header.Height);
+
+ // Apple's pngcrush emits the CgBI chunk before IHDR, so the header
+ // compatibility check is deferred until both chunks have been seen.
+ if (this.isCgbi)
+ {
+ ThrowIfInvalidCgbiContent(this.header);
+ }
+ }
+
+ private static void ThrowIfInvalidCgbiContent(in PngHeader header)
+ {
+ if (header.BitDepth != 8 || (header.ColorType is not PngColorType.Rgb and not PngColorType.RgbWithAlpha))
+ {
+ PngThrowHelper.ThrowInvalidImageContentException(
+ $"CgBI is only supported for 8-bit truecolor images. Was bit depth '{header.BitDepth}', color type '{header.ColorType}'.");
+ }
}
///
@@ -2493,303 +2527,4 @@ private static bool IsXmpTextData(ReadOnlySpan keywordBytes)
private void SwapScanlineBuffers()
=> (this.scanline, this.previousScanline) = (this.previousScanline, this.scanline);
-
- ///
- /// Applies the inverse of Apple's CgBI pixel mangling to a defiltered scanline.
- /// CgBI PNGs are emitted by pngcrush -iphone with channel order swapped
- /// from RGB(A) to BGR(A) and RGB samples premultiplied by alpha. This converts
- /// the bytes back to standard PNG semantics in place so the existing scanline
- /// processors can consume them unchanged. CgBI is only emitted for 8-bit
- /// truecolor (with or without alpha); other color types are left alone.
- ///
- ///
- /// See https://theapplewiki.com/wiki/PNG_CgBI_Format
- ///
- /// The defiltered pixel bytes (without the leading filter byte).
- /// The PNG color type from IHDR.
- private void ApplyCgbiTransform(Span scanline, PngColorType colorType)
- {
- if (colorType == PngColorType.RgbWithAlpha)
- {
- Span pixels = MemoryMarshal.Cast(scanline);
- int i = 0;
-
- if (Vector512.IsHardwareAccelerated && pixels.Length >= 16)
- {
- i = ApplyCgbiTransformVector512(scanline, pixels.Length);
- }
-
- if (Vector256.IsHardwareAccelerated && Avx2.IsSupported && (pixels.Length - i) >= 8)
- {
- i = ApplyCgbiTransformVector256(scanline, i, pixels.Length);
- }
-
- if (Vector128.IsHardwareAccelerated && (pixels.Length - i) >= 4)
- {
- i = ApplyCgbiTransformVector128(scanline, i, pixels.Length);
- }
-
- for (; i < pixels.Length; i++)
- {
- ref Rgba32 pixel = ref pixels[i];
- pixel = new Rgba32(pixel.B, pixel.G, pixel.R, pixel.A);
- UndoCgbiPremultiplicationScalar(ref pixel);
- }
- }
- else if (colorType == PngColorType.Rgb)
- {
- // No alpha channel, so just swap R and B using built in SIMD-optimized pixel operations.
- Span target = MemoryMarshal.Cast(scanline);
- PixelOperations.Instance.FromBgr24Bytes(this.configuration, scanline, target, target.Length);
- }
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- private static void UndoCgbiPremultiplicationScalar(ref Rgba32 pixel)
- {
- byte a = pixel.A;
- if (a is 0 or byte.MaxValue)
- {
- return;
- }
-
- // Reverse: c' = c * a / 255 => c = round(c' * 255 / a)
- int half = a >> 1;
- byte r = (byte)Math.Min(byte.MaxValue, ((pixel.R * byte.MaxValue) + half) / a);
- byte g = (byte)Math.Min(byte.MaxValue, ((pixel.G * byte.MaxValue) + half) / a);
- byte b = (byte)Math.Min(byte.MaxValue, ((pixel.B * byte.MaxValue) + half) / a);
- pixel = new Rgba32(r, g, b, a);
- }
-
- private static int ApplyCgbiTransformVector512(Span scanline, int pixelCount)
- {
- ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline);
- int i = 0;
-
- Span temp = stackalloc byte[Vector512.Count];
- SimdUtils.Shuffle.MMShuffleSpan(ref temp, SimdUtils.Shuffle.MMShuffle3012);
-
- // MMShuffle3012 expands to [2, 1, 0, 3] for each 4-byte pixel, converting
- // CgBI's BGRA byte order to Rgba32's RGBA layout while keeping alpha in place.
- // The generated mask only swaps bytes inside each pixel, so it remains
- // correct for the optimized 512-bit byte shuffle helper.
- Vector512 shuffleMask = Unsafe.As>(ref MemoryMarshal.GetReference(temp));
-
- Vector512 zero = Vector512.Zero;
- Vector512 one = Vector512.One;
- Vector512 byteMask = Vector512.Create(0xFF);
- Vector512 opaque = Vector512.Create(0xFF);
- Vector512 byteMax = Vector512.Create((int)byte.MaxValue);
-
- for (; i <= pixelCount - 16; i += 16)
- {
- ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf());
- Vector512 bgra = Unsafe.ReadUnaligned>(ref blockRef);
- Vector512 rgba = Vector512_.ShuffleNative(bgra, shuffleMask);
- Vector512 packed = rgba.AsInt32();
- Vector512 alpha = Vector512.ShiftRightLogical(packed, 24);
-
- // Fully transparent and fully opaque pixels are identity cases for
- // unpremultiplication. Masking them keeps the scalar behavior and lets
- // safeAlpha avoid dividing by zero for alpha == 0.
- Vector512 partialMask = ~(Vector512.Equals(alpha, zero) | Vector512.Equals(alpha, opaque));
-
- Vector512 r = packed & byteMask;
- Vector512 g = Vector512.ShiftRightLogical(packed, 8) & byteMask;
- Vector512 b = Vector512.ShiftRightLogical(packed, 16) & byteMask;
-
- Vector512 safeAlpha = Vector512.ConditionalSelect(partialMask, alpha, one);
- Vector512 halfAlpha = Vector512.ShiftRightLogical(safeAlpha, 1);
- Vector512 safeAlphaF = Vector512.ConvertToSingle(safeAlpha);
-
- // The scalar path computes ((c * 255) + (a >> 1)) / a with integer
- // division. Floor the positive quotient before converting so SIMD does
- // not use the default round-to-nearest conversion and drift by one.
- Vector512 unpremultipliedR = Vector512.Min(
- byteMax,
- Vector512.ConvertToInt32(Vector512.Floor(Vector512.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF)));
-
- Vector512 unpremultipliedG = Vector512.Min(
- byteMax,
- Vector512.ConvertToInt32(Vector512.Floor(Vector512.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF)));
-
- Vector512 unpremultipliedB = Vector512.Min(
- byteMax,
- Vector512.ConvertToInt32(Vector512.Floor(Vector512.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF)));
-
- // ConditionalSelect applies the expensive unpremultiply only to pixels
- // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the
- // shuffled channel values exactly as the scalar path does.
- Vector512 finalR = Vector512.ConditionalSelect(partialMask, unpremultipliedR, r);
- Vector512 finalG = Vector512.ConditionalSelect(partialMask, unpremultipliedG, g);
- Vector512 finalB = Vector512.ConditionalSelect(partialMask, unpremultipliedB, b);
-
- // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so
- // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3
- // recreates the in-memory RGBA bytes for the unaligned store.
- Vector512 result =
- finalR |
- Vector512.ShiftLeft(finalG, 8) |
- Vector512.ShiftLeft(finalB, 16) |
- Vector512.ShiftLeft(alpha, 24);
-
- Unsafe.WriteUnaligned(ref blockRef, result.AsByte());
- }
-
- return i;
- }
-
- private static int ApplyCgbiTransformVector256(Span scanline, int startPixel, int pixelCount)
- {
- ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline);
- int i = startPixel;
-
- Span temp = stackalloc byte[Vector512.Count];
- SimdUtils.Shuffle.MMShuffleSpan(ref temp, SimdUtils.Shuffle.MMShuffle3012);
-
- // MMShuffle3012 expands to [2, 1, 0, 3] for each 4-byte pixel, converting
- // CgBI's BGRA byte order to Rgba32's RGBA layout while keeping alpha in place.
- // Avx2.Shuffle is 128-bit lane-local, and the generated mask repeats inside
- // each lane, so no byte ever needs to cross the lane boundary.
- Vector256 shuffleMask = Unsafe.As>(ref MemoryMarshal.GetReference(temp));
-
- Vector256 zero = Vector256.Zero;
- Vector256 one = Vector256.One;
- Vector256 byteMask = Vector256.Create(0xFF);
- Vector256 opaque = Vector256.Create(0xFF);
- Vector256 byteMax = Vector256.Create((int)byte.MaxValue);
-
- for (; i <= pixelCount - 8; i += 8)
- {
- ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf());
- Vector256 bgra = Unsafe.ReadUnaligned>(ref blockRef);
- Vector256 rgba = Vector256_.ShufflePerLane(bgra, shuffleMask);
- Vector256 packed = rgba.AsInt32();
- Vector256 alpha = Vector256.ShiftRightLogical(packed, 24);
-
- // Fully transparent and fully opaque pixels are identity cases for
- // unpremultiplication. Masking them keeps the scalar behavior and lets
- // safeAlpha avoid dividing by zero for alpha == 0.
- Vector256 partialMask = ~(Vector256.Equals(alpha, zero) | Vector256.Equals(alpha, opaque));
-
- Vector256 r = packed & byteMask;
- Vector256 g = Vector256.ShiftRightLogical(packed, 8) & byteMask;
- Vector256 b = Vector256.ShiftRightLogical(packed, 16) & byteMask;
-
- Vector256 safeAlpha = Vector256.ConditionalSelect(partialMask, alpha, one);
- Vector256 halfAlpha = Vector256.ShiftRightLogical(safeAlpha, 1);
- Vector256 safeAlphaF = Vector256.ConvertToSingle(safeAlpha);
-
- // The scalar path computes ((c * 255) + (a >> 1)) / a with integer
- // division. Floor the positive quotient before converting so SIMD does
- // not use the default round-to-nearest conversion and drift by one.
- Vector256 unpremultipliedR = Vector256.Min(
- byteMax,
- Vector256.ConvertToInt32(Vector256.Floor(Vector256.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF)));
-
- Vector256 unpremultipliedG = Vector256.Min(
- byteMax,
- Vector256.ConvertToInt32(Vector256.Floor(Vector256.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF)));
-
- Vector256 unpremultipliedB = Vector256.Min(
- byteMax,
- Vector256.ConvertToInt32(Vector256.Floor(Vector256.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF)));
-
- // ConditionalSelect applies the expensive unpremultiply only to pixels
- // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the
- // shuffled channel values exactly as the scalar path does.
- Vector256 finalR = Vector256.ConditionalSelect(partialMask, unpremultipliedR, r);
- Vector256 finalG = Vector256.ConditionalSelect(partialMask, unpremultipliedG, g);
- Vector256 finalB = Vector256.ConditionalSelect(partialMask, unpremultipliedB, b);
-
- // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so
- // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3
- // recreates the in-memory RGBA bytes for the unaligned store.
- Vector256 result =
- finalR |
- Vector256.ShiftLeft(finalG, 8) |
- Vector256.ShiftLeft(finalB, 16) |
- Vector256.ShiftLeft(alpha, 24);
-
- Unsafe.WriteUnaligned(ref blockRef, result.AsByte());
- }
-
- return i;
- }
-
- private static int ApplyCgbiTransformVector128(Span scanline, int startPixel, int pixelCount)
- {
- ref byte scanlineRef = ref MemoryMarshal.GetReference(scanline);
- int i = startPixel;
-
- Span temp = stackalloc byte[Vector512.Count];
- SimdUtils.Shuffle.MMShuffleSpan(ref temp, SimdUtils.Shuffle.MMShuffle3012);
-
- // MMShuffle3012 expands to [2, 1, 0, 3] for each 4-byte pixel, converting
- // CgBI's BGRA byte order to Rgba32's RGBA layout while keeping alpha in place.
- Vector128 shuffleMask = Unsafe.As>(ref MemoryMarshal.GetReference(temp));
-
- Vector128 zero = Vector128.Zero;
- Vector128 one = Vector128.One;
- Vector128 byteMask = Vector128.Create(0xFF);
- Vector128 opaque = Vector128.Create(0xFF);
- Vector128 byteMax = Vector128.Create((int)byte.MaxValue);
-
- for (; i <= pixelCount - 4; i += 4)
- {
- ref byte blockRef = ref Unsafe.Add(ref scanlineRef, i * Unsafe.SizeOf());
- Vector128 bgra = Unsafe.ReadUnaligned>(ref blockRef);
- Vector128 rgba = Vector128_.ShuffleNative(bgra, shuffleMask);
- Vector128 packed = rgba.AsInt32();
- Vector128 alpha = Vector128.ShiftRightLogical(packed, 24);
-
- // Fully transparent and fully opaque pixels are identity cases for
- // unpremultiplication. Masking them keeps the scalar behavior and lets
- // safeAlpha avoid dividing by zero for alpha == 0.
- Vector128 partialMask = ~(Vector128.Equals(alpha, zero) | Vector128.Equals(alpha, opaque));
-
- Vector128 r = packed & byteMask;
- Vector128 g = Vector128.ShiftRightLogical(packed, 8) & byteMask;
- Vector128 b = Vector128.ShiftRightLogical(packed, 16) & byteMask;
-
- Vector128 safeAlpha = Vector128.ConditionalSelect(partialMask, alpha, one);
- Vector128 halfAlpha = Vector128.ShiftRightLogical(safeAlpha, 1);
- Vector128 safeAlphaF = Vector128.ConvertToSingle(safeAlpha);
-
- // The scalar path computes ((c * 255) + (a >> 1)) / a with integer
- // division. Floor the positive quotient before converting so SIMD does
- // not use the default round-to-nearest conversion and drift by one.
- Vector128 unpremultipliedR = Vector128.Min(
- byteMax,
- Vector128.ConvertToInt32(Vector128.Floor(Vector128.ConvertToSingle((r * byteMax) + halfAlpha) / safeAlphaF)));
-
- Vector128 unpremultipliedG = Vector128.Min(
- byteMax,
- Vector128.ConvertToInt32(Vector128.Floor(Vector128.ConvertToSingle((g * byteMax) + halfAlpha) / safeAlphaF)));
-
- Vector128 unpremultipliedB = Vector128.Min(
- byteMax,
- Vector128.ConvertToInt32(Vector128.Floor(Vector128.ConvertToSingle((b * byteMax) + halfAlpha) / safeAlphaF)));
-
- // ConditionalSelect applies the expensive unpremultiply only to pixels
- // where alpha is between 1 and 254; alpha 0 and 255 lanes keep the
- // shuffled channel values exactly as the scalar path does.
- Vector128 finalR = Vector128.ConditionalSelect(partialMask, unpremultipliedR, r);
- Vector128 finalG = Vector128.ConditionalSelect(partialMask, unpremultipliedG, g);
- Vector128 finalB = Vector128.ConditionalSelect(partialMask, unpremultipliedB, b);
-
- // Rgba32 is laid out as little-endian 0xAABBGGRR in an int lane, so
- // shifting the unpacked channels back to byte offsets 0, 1, 2, and 3
- // recreates the in-memory RGBA bytes for the unaligned store.
- Vector128 result =
- finalR |
- Vector128.ShiftLeft(finalG, 8) |
- Vector128.ShiftLeft(finalB, 16) |
- Vector128.ShiftLeft(alpha, 24);
-
- Unsafe.WriteUnaligned(ref blockRef, result.AsByte());
- }
-
- return i;
- }
}
diff --git a/tests/ImageSharp.Tests/Formats/Png/PngCgbiProcessorTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngCgbiProcessorTests.cs
new file mode 100644
index 0000000000..426afb6d42
--- /dev/null
+++ b/tests/ImageSharp.Tests/Formats/Png/PngCgbiProcessorTests.cs
@@ -0,0 +1,174 @@
+// Copyright (c) Six Labors.
+// Licensed under the Six Labors Split License.
+
+using System.Runtime.InteropServices;
+using SixLabors.ImageSharp.Formats.Png;
+using SixLabors.ImageSharp.PixelFormats;
+
+namespace SixLabors.ImageSharp.Tests.Formats.Png;
+
+[Trait("Format", "Png")]
+public class PngCgbiProcessorTests
+{
+ [Theory]
+ [InlineData(0)]
+ [InlineData(1)]
+ [InlineData(3)]
+ [InlineData(4)]
+ [InlineData(7)]
+ [InlineData(8)]
+ [InlineData(15)]
+ [InlineData(16)]
+ [InlineData(17)]
+ [InlineData(31)]
+ [InlineData(32)]
+ [InlineData(33)]
+ [InlineData(64)]
+ public void ApplyTransform_RgbWithAlpha_MatchesScalar(int pixelCount)
+ {
+ // Drives the full V512/V256/V128/scalar dispatch, so it covers each
+ // path that is hardware-accelerated on the host plus the scalar tail.
+ byte[] input = CreateBgraScanline(pixelCount);
+ byte[] processorOutput = (byte[])input.Clone();
+ byte[] scalarOutput = (byte[])input.Clone();
+
+ PngCgbiProcessor.ApplyTransform(Configuration.Default, processorOutput, PngColorType.RgbWithAlpha);
+ ApplyCgbiTransformScalarReference(scalarOutput);
+
+ Assert.Equal(scalarOutput, processorOutput);
+ }
+
+ [Theory]
+ [InlineData(0)]
+ [InlineData(1)]
+ [InlineData(3)]
+ [InlineData(4)]
+ [InlineData(7)]
+ [InlineData(8)]
+ [InlineData(15)]
+ [InlineData(16)]
+ [InlineData(17)]
+ [InlineData(31)]
+ [InlineData(32)]
+ [InlineData(33)]
+ [InlineData(64)]
+ public void ApplyTransformVector512_MatchesScalar(int pixelCount) =>
+ // Vector512 uses Vector512_.ShuffleNative which falls back to the software
+ // Vector512.Shuffle when Avx512BW is unavailable, so the body runs regardless
+ // of whether Vector512 is hardware-accelerated on the host.
+ AssertVectorMatchesScalar(
+ pixelCount,
+ scanline => PngCgbiProcessor.ApplyTransformVector512(scanline, scanline.Length / 4),
+ blockSize: 16);
+
+ [Theory]
+ [InlineData(0)]
+ [InlineData(1)]
+ [InlineData(3)]
+ [InlineData(4)]
+ [InlineData(7)]
+ [InlineData(8)]
+ [InlineData(15)]
+ [InlineData(16)]
+ [InlineData(17)]
+ [InlineData(31)]
+ [InlineData(32)]
+ [InlineData(64)]
+ public void ApplyTransformVector256_MatchesScalar(int pixelCount) => AssertVectorMatchesScalar(
+ pixelCount,
+ scanline => PngCgbiProcessor.ApplyTransformVector256(scanline, 0, scanline.Length / 4),
+ blockSize: 8);
+
+ [Theory]
+ [InlineData(0)]
+ [InlineData(1)]
+ [InlineData(3)]
+ [InlineData(4)]
+ [InlineData(7)]
+ [InlineData(8)]
+ [InlineData(15)]
+ [InlineData(16)]
+ [InlineData(64)]
+ public void ApplyTransformVector128_MatchesScalar(int pixelCount) => AssertVectorMatchesScalar(
+ pixelCount,
+ scanline => PngCgbiProcessor.ApplyTransformVector128(scanline, 0, scanline.Length / 4),
+ blockSize: 4);
+
+ private static void AssertVectorMatchesScalar(int pixelCount, Func applyVector, int blockSize)
+ {
+ byte[] input = CreateBgraScanline(pixelCount);
+ byte[] vectorOutput = (byte[])input.Clone();
+ byte[] scalarOutput = (byte[])input.Clone();
+
+ int processed = applyVector(vectorOutput);
+
+ int expectedProcessed = (pixelCount / blockSize) * blockSize;
+ Assert.Equal(expectedProcessed, processed);
+
+ // The vector path is responsible for whole blocks only; remaining pixels are
+ // handled by the scalar tail in ApplyTransform. Run the scalar reference
+ // over every pixel and compare the prefix the vector path actually wrote.
+ ApplyCgbiTransformScalarReference(scalarOutput);
+
+ Span vectorProcessed = vectorOutput.AsSpan(0, processed * 4);
+ Span scalarProcessed = scalarOutput.AsSpan(0, processed * 4);
+ Assert.True(vectorProcessed.SequenceEqual(scalarProcessed), $"Mismatch at pixelCount={pixelCount}");
+
+ // Pixels past the vector's processed prefix must be untouched.
+ Span vectorTail = vectorOutput.AsSpan(processed * 4);
+ Span inputTail = input.AsSpan(processed * 4);
+ Assert.True(vectorTail.SequenceEqual(inputTail));
+ }
+
+ private static byte[] CreateBgraScanline(int pixelCount)
+ {
+ // Deterministic mix of edge cases (a=0, a=255, partial alpha) and varied channels.
+ byte[] bytes = new byte[pixelCount * 4];
+ for (int p = 0; p < pixelCount; p++)
+ {
+ byte a = (p % 7) switch
+ {
+ 0 => byte.MinValue,
+ 1 => byte.MaxValue,
+ _ => (byte)((((p * 37) + 23) & 0xFF) | 1) // never zero
+ };
+
+ // CgBI premultiplied BGRA: c' = c * a / 255
+ byte r = (byte)((p * 13) & 0xFF);
+ byte g = (byte)((p * 29) & 0xFF);
+ byte b = (byte)((p * 53) & 0xFF);
+ r = (byte)((r * a) / byte.MaxValue);
+ g = (byte)((g * a) / byte.MaxValue);
+ b = (byte)((b * a) / byte.MaxValue);
+
+ bytes[(p * 4) + 0] = b;
+ bytes[(p * 4) + 1] = g;
+ bytes[(p * 4) + 2] = r;
+ bytes[(p * 4) + 3] = a;
+ }
+
+ return bytes;
+ }
+
+ private static void ApplyCgbiTransformScalarReference(Span scanline)
+ {
+ Span pixels = MemoryMarshal.Cast(scanline);
+ for (int i = 0; i < pixels.Length; i++)
+ {
+ ref Rgba32 pixel = ref pixels[i];
+ pixel = new Rgba32(pixel.B, pixel.G, pixel.R, pixel.A);
+
+ byte a = pixel.A;
+ if (a is 0 or byte.MaxValue)
+ {
+ continue;
+ }
+
+ int half = a >> 1;
+ byte r = (byte)Math.Min(byte.MaxValue, ((pixel.R * byte.MaxValue) + half) / a);
+ byte g = (byte)Math.Min(byte.MaxValue, ((pixel.G * byte.MaxValue) + half) / a);
+ byte b = (byte)Math.Min(byte.MaxValue, ((pixel.B * byte.MaxValue) + half) / a);
+ pixel = new Rgba32(r, g, b, a);
+ }
+ }
+}
diff --git a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs
index 2e452b896d..2fbbe695e9 100644
--- a/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs
+++ b/tests/ImageSharp.Tests/Formats/Png/PngDecoderTests.cs
@@ -767,6 +767,28 @@ public void Identify_AppleCgBI(string imagePath, int expectedWidth, int expected
Assert.Equal(expectedHeight, imageInfo.Height);
}
+ [Theory]
+ [InlineData(TestImages.Png.Cgbi.BitDepth16)]
+ [InlineData(TestImages.Png.Cgbi.Palette)]
+ public void Identify_CgBI_IncompatibleHeader_ThrowsInvalidImageContentException(string imagePath)
+ {
+ TestFile testFile = TestFile.Create(imagePath);
+ using MemoryStream stream = new(testFile.Bytes, false);
+ InvalidImageContentException ex = Assert.Throws(() => Image.Identify(stream));
+ Assert.Contains("CgBI is only supported for 8-bit truecolor images", ex.Message);
+ }
+
+ [Theory]
+ [WithFile(TestImages.Png.Cgbi.BitDepth16, PixelTypes.Rgba32)]
+ [WithFile(TestImages.Png.Cgbi.Palette, PixelTypes.Rgba32)]
+ public void Decode_CgBI_IncompatibleHeader_ThrowsInvalidImageContentException(TestImageProvider provider)
+ where TPixel : unmanaged, IPixel
+ {
+ InvalidImageContentException ex = Assert.Throws(
+ () => { using Image image = provider.GetImage(PngDecoder.Instance); });
+ Assert.Contains("CgBI is only supported for 8-bit truecolor images", ex.Message);
+ }
+
[Theory]
[WithFile(TestImages.Png.Splash, PixelTypes.Rgba32)]
[WithFile(TestImages.Png.Bike, PixelTypes.Rgba32)]
diff --git a/tests/ImageSharp.Tests/TestImages.cs b/tests/ImageSharp.Tests/TestImages.cs
index 1b6ae56850..7b43ab262c 100644
--- a/tests/ImageSharp.Tests/TestImages.cs
+++ b/tests/ImageSharp.Tests/TestImages.cs
@@ -189,6 +189,10 @@ public static class Cgbi
// Issue 410: https://github.com/SixLabors/ImageSharp/issues/410
public const string Issue410 = "Png/issues/Issue_410.png";
+
+ // Synthetic fixtures derived from colors.png to exercise CgBI validation.
+ public const string BitDepth16 = "Png/cgbi/colors-cgbi-bitdepth16.png";
+ public const string Palette = "Png/cgbi/colors-cgbi-palette.png";
}
public static class Bad
diff --git a/tests/Images/Input/Png/cgbi/colors-cgbi-bitdepth16.png b/tests/Images/Input/Png/cgbi/colors-cgbi-bitdepth16.png
new file mode 100644
index 0000000000..18cfa9246d
--- /dev/null
+++ b/tests/Images/Input/Png/cgbi/colors-cgbi-bitdepth16.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59610bc03f6ca867e5f71c574b3a0d1942c9e3a230c8a32bf3007cb82f286866
+size 12853
diff --git a/tests/Images/Input/Png/cgbi/colors-cgbi-palette.png b/tests/Images/Input/Png/cgbi/colors-cgbi-palette.png
new file mode 100644
index 0000000000..f6406559b1
--- /dev/null
+++ b/tests/Images/Input/Png/cgbi/colors-cgbi-palette.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3a2f20c69ae423523a8f41887e3f37257a338f2220c2ea44d35c87daf8c3aa3
+size 12853