44using System . Diagnostics ;
55using System . Diagnostics . CodeAnalysis ;
66using System . Runtime . CompilerServices ;
7+ using System . Runtime . InteropServices ;
78using System . Runtime . Intrinsics ;
89using System . Runtime . Intrinsics . Arm ;
10+ using System . Runtime . Intrinsics . Wasm ;
911using System . Runtime . Intrinsics . X86 ;
1012
1113namespace SixLabors . ImageSharp . Common . Helpers ;
@@ -18,30 +20,36 @@ namespace SixLabors.ImageSharp.Common.Helpers;
1820/// </list>
1921/// Should only be used if the intrinsics are available.
2022/// </summary>
21- internal static class Vector128Utilities
23+ #pragma warning disable SA1649 // File name should match first type name
24+ internal static class Vector128_
25+ #pragma warning restore SA1649 // File name should match first type name
2226{
2327 /// <summary>
2428 /// Gets a value indicating whether shuffle operations are supported.
2529 /// </summary>
26- public static bool SupportsShuffleFloat
30+ public static bool SupportsShuffleNativeByte
2731 {
2832 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
29- get => Sse . IsSupported ;
30- }
33+ get
34+ {
35+ if ( Vector128 . IsHardwareAccelerated )
36+ {
37+ if ( RuntimeInformation . ProcessArchitecture is Architecture . X86 or Architecture . X64 )
38+ {
39+ return Ssse3 . IsSupported ;
40+ }
3141
32- /// <summary>
33- /// Gets a value indicating whether shuffle operations are supported.
34- /// </summary>
35- public static bool SupportsShuffleByte
36- {
37- [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
38- get => Ssse3 . IsSupported || AdvSimd . Arm64 . IsSupported ;
42+ return true ;
43+ }
44+
45+ return false ;
46+ }
3947 }
4048
4149 /// <summary>
4250 /// Gets a value indicating whether right align operations are supported.
4351 /// </summary>
44- public static bool SupportsRightAlign
52+ public static bool SupportsAlignRight
4553 {
4654 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
4755 get => Ssse3 . IsSupported || AdvSimd . IsSupported ;
@@ -63,15 +71,21 @@ public static bool SupportsShiftByte
6371 /// <param name="control">The shuffle control byte.</param>
6472 /// <returns>The <see cref="Vector128{Single}"/>.</returns>
6573 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
66- public static Vector128 < float > Shuffle ( Vector128 < float > vector , [ ConstantExpected ] byte control )
74+ public static Vector128 < float > ShuffleNative ( Vector128 < float > vector , [ ConstantExpected ] byte control )
6775 {
6876 if ( Sse . IsSupported )
6977 {
7078 return Sse . Shuffle ( vector , vector , control ) ;
7179 }
7280
73- ThrowUnreachableException ( ) ;
74- return default ;
81+ // Don't use InverseMMShuffle here as we want to avoid the cast.
82+ Vector128 < int > indices = Vector128 . Create (
83+ control & 0x3 ,
84+ ( control >> 2 ) & 0x3 ,
85+ ( control >> 4 ) & 0x3 ,
86+ ( control >> 6 ) & 0x3 ) ;
87+
88+ return Vector128 . Shuffle ( vector , indices ) ;
7589 }
7690
7791 /// <summary>
@@ -86,20 +100,18 @@ public static Vector128<float> Shuffle(Vector128<float> vector, [ConstantExpecte
86100 /// A new vector containing the values from <paramref name="vector" /> selected by the given <paramref name="indices" />.
87101 /// </returns>
88102 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
89- public static Vector128 < byte > Shuffle ( Vector128 < byte > vector , Vector128 < byte > indices )
103+ public static Vector128 < byte > ShuffleNative ( Vector128 < byte > vector , Vector128 < byte > indices )
90104 {
105+ // For x64 we use the SSSE3 shuffle intrinsic to avoid additional instructions. 3 vs 1.
91106 if ( Ssse3 . IsSupported )
92107 {
93108 return Ssse3 . Shuffle ( vector , indices ) ;
94109 }
95110
96- if ( AdvSimd . Arm64 . IsSupported )
97- {
98- return AdvSimd . Arm64 . VectorTableLookup ( vector , indices ) ;
99- }
100-
101- ThrowUnreachableException ( ) ;
102- return default ;
111+ // For ARM and WASM, codegen will be optimal.
112+ // We don't throw for x86/x64 so we should never use this method without
113+ // checking for support.
114+ return Vector128 . Shuffle ( vector , indices ) ;
103115 }
104116
105117 /// <summary>
@@ -193,6 +205,11 @@ public static Vector128<int> ConvertToInt32RoundToEven(Vector128<float> vector)
193205 return AdvSimd . ConvertToInt32RoundToEven ( vector ) ;
194206 }
195207
208+ if ( PackedSimd . IsSupported )
209+ {
210+ return PackedSimd . ConvertToInt32Saturate ( PackedSimd . RoundToNearest ( vector ) ) ;
211+ }
212+
196213 Vector128 < float > sign = vector & Vector128 . Create ( - 0F ) ;
197214 Vector128 < float > val_2p23_f32 = sign | Vector128 . Create ( 8388608F ) ;
198215
@@ -218,6 +235,11 @@ public static Vector128<float> RoundToNearestInteger(Vector128<float> vector)
218235 return AdvSimd . RoundToNearest ( vector ) ;
219236 }
220237
238+ if ( PackedSimd . IsSupported )
239+ {
240+ return PackedSimd . RoundToNearest ( vector ) ;
241+ }
242+
221243 Vector128 < float > sign = vector & Vector128 . Create ( - 0F ) ;
222244 Vector128 < float > val_2p23_f32 = sign | Vector128 . Create ( 8388608F ) ;
223245
@@ -270,8 +292,16 @@ public static Vector128<byte> PackUnsignedSaturate(Vector128<short> left, Vector
270292 return AdvSimd . ExtractNarrowingSaturateUnsignedUpper ( AdvSimd . ExtractNarrowingSaturateUnsignedLower ( left ) , right ) ;
271293 }
272294
273- ThrowUnreachableException ( ) ;
274- return default ;
295+ if ( PackedSimd . IsSupported )
296+ {
297+ return PackedSimd . ConvertNarrowingSaturateUnsigned ( left , right ) ;
298+ }
299+
300+ Vector128 < short > min = Vector128 . Create ( ( short ) byte . MinValue ) ;
301+ Vector128 < short > max = Vector128 . Create ( ( short ) byte . MaxValue ) ;
302+ Vector128 < ushort > lefClamped = Clamp ( left , min , max ) . AsUInt16 ( ) ;
303+ Vector128 < ushort > rightClamped = Clamp ( right , min , max ) . AsUInt16 ( ) ;
304+ return Vector128 . Narrow ( lefClamped , rightClamped ) ;
275305 }
276306
277307 /// <summary>
@@ -293,10 +323,30 @@ public static Vector128<short> PackSignedSaturate(Vector128<int> left, Vector128
293323 return AdvSimd . ExtractNarrowingSaturateUpper ( AdvSimd . ExtractNarrowingSaturateLower ( left ) , right ) ;
294324 }
295325
296- ThrowUnreachableException ( ) ;
297- return default ;
326+ if ( PackedSimd . IsSupported )
327+ {
328+ return PackedSimd . ConvertNarrowingSaturateSigned ( left , right ) ;
329+ }
330+
331+ Vector128 < int > min = Vector128 . Create ( ( int ) short . MinValue ) ;
332+ Vector128 < int > max = Vector128 . Create ( ( int ) short . MaxValue ) ;
333+ Vector128 < int > lefClamped = Clamp ( left , min , max ) ;
334+ Vector128 < int > rightClamped = Clamp ( right , min , max ) ;
335+ return Vector128 . Narrow ( lefClamped , rightClamped ) ;
298336 }
299337
338+ /// <summary>
339+ /// Restricts a vector between a minimum and a maximum value.
340+ /// </summary>
341+ /// <typeparam name="T">The type of the elements in the vector.</typeparam>
342+ /// <param name="value">The vector to restrict.</param>
343+ /// <param name="min">The minimum value.</param>
344+ /// <param name="max">The maximum value.</param>
345+ /// <returns>The restricted <see cref="Vector128{T}"/>.</returns>
346+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
347+ public static Vector128 < T > Clamp < T > ( Vector128 < T > value , Vector128 < T > min , Vector128 < T > max )
348+ => Vector128 . Min ( Vector128 . Max ( value , min ) , max ) ;
349+
300350 [ DoesNotReturn ]
301351 private static void ThrowUnreachableException ( ) => throw new UnreachableException ( ) ;
302352}
0 commit comments