Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions src/ImageSharp/Advanced/ParallelExecutionSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public readonly struct ParallelExecutionSettings
/// </summary>
/// <param name="maxDegreeOfParallelism">
/// The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
/// If set to <c>-1</c>, there is no limit on the number of concurrently running operations.
/// </param>
/// <param name="minimumPixelsProcessedPerTask">The value for <see cref="MinimumPixelsProcessedPerTask"/>.</param>
/// <param name="memoryAllocator">The <see cref="MemoryAllocator"/>.</param>
Expand All @@ -31,7 +31,7 @@ public ParallelExecutionSettings(
{
// Shall be compatible with ParallelOptions.MaxDegreeOfParallelism:
// https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.paralleloptions.maxdegreeofparallelism
if (maxDegreeOfParallelism == 0 || maxDegreeOfParallelism < -1)
if (maxDegreeOfParallelism is 0 or < -1)
{
throw new ArgumentOutOfRangeException(nameof(maxDegreeOfParallelism));
}
Expand All @@ -49,7 +49,7 @@ public ParallelExecutionSettings(
/// </summary>
/// <param name="maxDegreeOfParallelism">
/// The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
/// If set to <c>-1</c>, there is no limit on the number of concurrently running operations.
/// </param>
/// <param name="memoryAllocator">The <see cref="MemoryAllocator"/>.</param>
public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator memoryAllocator)
Expand All @@ -64,7 +64,7 @@ public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator mem

/// <summary>
/// Gets the value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
/// A value of <c>-1</c> leaves the degree of parallelism unbounded.
/// A value of <c>-1</c> means there is no limit on the number of concurrently running operations.
/// </summary>
public int MaxDegreeOfParallelism { get; }

Expand Down Expand Up @@ -93,12 +93,10 @@ public ParallelExecutionSettings MultiplyMinimumPixelsPerTask(int multiplier)
}

/// <summary>
/// Get the default <see cref="SixLabors.ImageSharp.Advanced.ParallelExecutionSettings"/> for a <see cref="SixLabors.ImageSharp.Configuration"/>
/// Get the default <see cref="ParallelExecutionSettings"/> for a <see cref="Configuration"/>
/// </summary>
/// <param name="configuration">The <see cref="Configuration"/>.</param>
/// <returns>The <see cref="ParallelExecutionSettings"/>.</returns>
public static ParallelExecutionSettings FromConfiguration(Configuration configuration)
{
return new ParallelExecutionSettings(configuration.MaxDegreeOfParallelism, configuration.MemoryAllocator);
}
=> new(configuration.MaxDegreeOfParallelism, configuration.MemoryAllocator);
}
35 changes: 0 additions & 35 deletions src/ImageSharp/Advanced/ParallelRowIterator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ public static void IterateRows<T>(
where T : struct, IRowOperation
{
ValidateRectangle(rectangle);
ValidateSettings(parallelSettings);

int top = rectangle.Top;
int bottom = rectangle.Bottom;
Expand Down Expand Up @@ -109,7 +108,6 @@ public static void IterateRows<T, TBuffer>(
where TBuffer : unmanaged
{
ValidateRectangle(rectangle);
ValidateSettings(parallelSettings);

int top = rectangle.Top;
int bottom = rectangle.Bottom;
Expand Down Expand Up @@ -174,7 +172,6 @@ public static void IterateRowIntervals<T>(
where T : struct, IRowIntervalOperation
{
ValidateRectangle(rectangle);
ValidateSettings(parallelSettings);

int top = rectangle.Top;
int bottom = rectangle.Bottom;
Expand Down Expand Up @@ -236,7 +233,6 @@ public static void IterateRowIntervals<T, TBuffer>(
where TBuffer : unmanaged
{
ValidateRectangle(rectangle);
ValidateSettings(parallelSettings);

int top = rectangle.Top;
int bottom = rectangle.Bottom;
Expand Down Expand Up @@ -315,35 +311,4 @@ private static void ValidateRectangle(Rectangle rectangle)
0,
$"{nameof(rectangle)}.{nameof(rectangle.Height)}");
}

/// <summary>
/// Validates the supplied <see cref="ParallelExecutionSettings"/>.
/// </summary>
/// <param name="parallelSettings">The execution settings.</param>
/// <exception cref="ArgumentOutOfRangeException">
/// Thrown when <see cref="ParallelExecutionSettings.MaxDegreeOfParallelism"/> or
/// <see cref="ParallelExecutionSettings.MinimumPixelsProcessedPerTask"/> is invalid.
/// </exception>
/// <exception cref="ArgumentNullException">
/// Thrown when <see cref="ParallelExecutionSettings.MemoryAllocator"/> is null.
/// This also guards the public <see cref="ParallelExecutionSettings"/> default value, which bypasses constructor validation.
/// </exception>
private static void ValidateSettings(in ParallelExecutionSettings parallelSettings)
{
// ParallelExecutionSettings is a public struct, so callers can pass default and bypass constructor validation.
if (parallelSettings.MaxDegreeOfParallelism is 0 or < -1)
{
throw new ArgumentOutOfRangeException(
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MaxDegreeOfParallelism)}");
}

Guard.MustBeGreaterThan(
parallelSettings.MinimumPixelsProcessedPerTask,
0,
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MinimumPixelsProcessedPerTask)}");

Guard.NotNull(
parallelSettings.MemoryAllocator,
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MemoryAllocator)}");
}
}
5 changes: 3 additions & 2 deletions src/ImageSharp/Configuration.cs
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ public Configuration(params IImageFormatConfigurationModule[] configurationModul
/// <summary>
/// Gets or sets the maximum number of concurrent tasks enabled in ImageSharp algorithms
/// configured with this <see cref="Configuration"/> instance.
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
/// Initialized with <see cref="Environment.ProcessorCount"/> by default.
/// A positive value limits the number of concurrent operations to the set value.
/// If set to <c>-1</c>, there is no limit on the number of concurrently running operations.
/// Defaults to <see cref="Environment.ProcessorCount"/>.
/// </summary>
public int MaxDegreeOfParallelism
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.ColorProfiles.Companding;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;
using SixLabors.ImageSharp.Processing.Processors.Convolution.Parameters;
Expand Down Expand Up @@ -112,7 +113,7 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
}
else
{
ApplyInverseGammaExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration, 1 / this.gamma);
ApplyInverseGammaExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration, this.gamma);
ParallelRowIterator.IterateRows(
this.Configuration,
sourceRectangle,
Expand Down Expand Up @@ -305,15 +306,9 @@ public void Invoke(int y, Span<Vector4> span)
{
Span<TPixel> targetRowSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..];
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, targetRowSpan[..span.Length], span, PixelConversionModifiers.Premultiply);
ref Vector4 baseRef = ref MemoryMarshal.GetReference(span);

for (int x = 0; x < this.bounds.Width; x++)
{
ref Vector4 v = ref Unsafe.Add(ref baseRef, (uint)x);
v.X = MathF.Pow(v.X, this.gamma);
v.Y = MathF.Pow(v.Y, this.gamma);
v.Z = MathF.Pow(v.Z, this.gamma);
}
// Input is premultiplied [0,1] so the LUT is safe here.
GammaCompanding.Expand(span[..this.bounds.Width], this.gamma);

PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
}
Expand Down Expand Up @@ -367,44 +362,34 @@ public void Invoke(int y, Span<Vector4> span)
private readonly Buffer2D<TPixel> targetPixels;
private readonly Buffer2D<Vector4> sourceValues;
private readonly Configuration configuration;
private readonly float inverseGamma;
private readonly float gamma;

[MethodImpl(InliningOptions.ShortMethod)]
public ApplyInverseGammaExposureRowOperation(
Rectangle bounds,
Buffer2D<TPixel> targetPixels,
Buffer2D<Vector4> sourceValues,
Configuration configuration,
float inverseGamma)
float gamma)
{
this.bounds = bounds;
this.targetPixels = targetPixels;
this.sourceValues = sourceValues;
this.configuration = configuration;
this.inverseGamma = inverseGamma;
this.gamma = gamma;
}

/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public void Invoke(int y)
{
Vector4 low = Vector4.Zero;
Vector4 high = new(float.PositiveInfinity, float.PositiveInfinity, float.PositiveInfinity, float.PositiveInfinity);

Span<TPixel> targetPixelSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..];
Span<Vector4> sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y)[this.bounds.X..];
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceRowSpan);
Span<Vector4> sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y).Slice(this.bounds.X, this.bounds.Width);

for (int x = 0; x < this.bounds.Width; x++)
{
ref Vector4 v = ref Unsafe.Add(ref sourceRef, (uint)x);
Vector4 clamp = Numerics.Clamp(v, low, high);
v.X = MathF.Pow(clamp.X, this.inverseGamma);
v.Y = MathF.Pow(clamp.Y, this.inverseGamma);
v.Z = MathF.Pow(clamp.Z, this.inverseGamma);
}
Numerics.Clamp(MemoryMarshal.Cast<Vector4, float>(sourceRowSpan), 0, 1F);
GammaCompanding.Compress(sourceRowSpan, this.gamma);

PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan[..this.bounds.Width], targetPixelSpan, PixelConversionModifiers.Premultiply);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan, targetPixelSpan, PixelConversionModifiers.Premultiply);
}
}

Expand Down Expand Up @@ -433,17 +418,16 @@ public ApplyInverseGamma3ExposureRowOperation(

/// <inheritdoc/>
[MethodImpl(InliningOptions.ShortMethod)]
public unsafe void Invoke(int y)
public void Invoke(int y)
{
Span<Vector4> sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y).Slice(this.bounds.X, this.bounds.Width);
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceRowSpan);

Numerics.Clamp(MemoryMarshal.Cast<Vector4, float>(sourceRowSpan), 0, float.PositiveInfinity);
Numerics.Clamp(MemoryMarshal.Cast<Vector4, float>(sourceRowSpan), 0, 1F);
Numerics.CubeRootOnXYZ(sourceRowSpan);

Span<TPixel> targetPixelSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..];

PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan[..this.bounds.Width], targetPixelSpan, PixelConversionModifiers.Premultiply);
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan, targetPixelSpan, PixelConversionModifiers.Premultiply);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Buffers;
using System.Numerics;
using SixLabors.ImageSharp.Advanced;
using SixLabors.ImageSharp.Memory;
using SixLabors.ImageSharp.PixelFormats;

Expand Down Expand Up @@ -79,10 +79,16 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
this.Configuration,
this.PreserveAlpha);

ParallelRowIterator.IterateRows<Convolution2DRowOperation<TPixel>, Vector4>(
this.Configuration,
interest,
in operation);
// Convolution is memory-bandwidth-bound with low arithmetic intensity.
// Parallelization degrades performance due to cache line contention from
// overlapping source row reads. See #3111.
using IMemoryOwner<Vector4> buffer = allocator.Allocate<Vector4>(operation.GetRequiredBufferLength(interest));
Span<Vector4> span = buffer.Memory.Span;

for (int y = interest.Top; y < interest.Bottom; y++)
{
operation.Invoke(y, span);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any reason for Convolution2DRowOperation isolation? IMO removing it/moving it to a private method would simplify code.

}
}

Buffer2D<TPixel>.SwapOrCopyContent(source.PixelBuffer, targetPixels);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand Down Expand Up @@ -106,6 +107,12 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)

mapXY.BuildSamplingOffsetMap(this.KernelX.Length, this.KernelX.Length, interest, this.BorderWrapModeX, this.BorderWrapModeY);

MemoryAllocator allocator = this.Configuration.MemoryAllocator;

// Convolution is memory-bandwidth-bound with low arithmetic intensity.
// Parallelization degrades performance due to cache line contention from
// overlapping source row reads. See #3111.

// Horizontal convolution
HorizontalConvolutionRowOperation horizontalOperation = new(
interest,
Expand All @@ -116,10 +123,15 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
this.Configuration,
this.PreserveAlpha);

ParallelRowIterator.IterateRows<HorizontalConvolutionRowOperation, Vector4>(
this.Configuration,
interest,
in horizontalOperation);
using (IMemoryOwner<Vector4> hBuffer = allocator.Allocate<Vector4>(horizontalOperation.GetRequiredBufferLength(interest)))
{
Span<Vector4> hSpan = hBuffer.Memory.Span;

for (int y = interest.Top; y < interest.Bottom; y++)
{
horizontalOperation.Invoke(y, hSpan);
}
}

// Vertical convolution
VerticalConvolutionRowOperation verticalOperation = new(
Expand All @@ -131,10 +143,15 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
this.Configuration,
this.PreserveAlpha);

ParallelRowIterator.IterateRows<VerticalConvolutionRowOperation, Vector4>(
this.Configuration,
interest,
in verticalOperation);
using (IMemoryOwner<Vector4> vBuffer = allocator.Allocate<Vector4>(verticalOperation.GetRequiredBufferLength(interest)))
{
Span<Vector4> vSpan = vBuffer.Memory.Span;

for (int y = interest.Top; y < interest.Bottom; y++)
{
verticalOperation.Invoke(y, vSpan);
}
}
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright (c) Six Labors.
// Licensed under the Six Labors Split License.

using System.Buffers;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand Down Expand Up @@ -96,10 +97,17 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
map.BuildSamplingOffsetMap(this.KernelXY.Rows, this.KernelXY.Columns, interest, this.BorderWrapModeX, this.BorderWrapModeY);

RowOperation operation = new(interest, targetPixels, source.PixelBuffer, map, this.KernelXY, this.Configuration, this.PreserveAlpha);
ParallelRowIterator.IterateRows<RowOperation, Vector4>(
this.Configuration,
interest,
in operation);

// Convolution is memory-bandwidth-bound with low arithmetic intensity.
// Parallelization degrades performance due to cache line contention from
// overlapping source row reads. See #3111.
using IMemoryOwner<Vector4> buffer = allocator.Allocate<Vector4>(operation.GetRequiredBufferLength(interest));
Span<Vector4> span = buffer.Memory.Span;

for (int y = interest.Top; y < interest.Bottom; y++)
{
operation.Invoke(y, span);
}
}

Buffer2D<TPixel>.SwapOrCopyContent(source.PixelBuffer, targetPixels);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,15 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
processor.Apply(pass);
}

// Convolution is memory-bandwidth-bound with low arithmetic intensity.
// Parallelization degrades performance due to cache line contention from
// overlapping source row reads. See #3111.
RowOperation operation = new(source.PixelBuffer, pass.PixelBuffer, interest);
ParallelRowIterator.IterateRows(
this.Configuration,
interest,
in operation);

for (int y = interest.Top; y < interest.Bottom; y++)
{
operation.Invoke(y);
}
}
}

Expand Down
Loading
Loading