Created
November 22, 2019 15:18
-
-
Save Const-me/2f743098cd3341fa8fc14a6cdc2dee98 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Diagnostics; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
namespace SimdBrightness | |
{ | |
static class Program | |
{ | |
/// <summary>Load 4 pixels of RGB</summary> | |
static unsafe Vector128<int> load4( byte* src ) | |
{ | |
return Sse2.LoadVector128( (int*)src ); | |
} | |
/// <summary>Pack red channel of 8 pixels into ushort values in [ 0xFF00 .. 0 ] interval</summary> | |
static Vector128<ushort> packRed( Vector128<int> a, Vector128<int> b, Vector128<int> mask ) | |
{ | |
a = Sse2.And( a, mask ); | |
b = Sse2.And( b, mask ); | |
return Sse2.ShiftLeftLogical128BitLane( Sse41.PackUnsignedSaturate( a, b ), 1 ); | |
} | |
/// <summary>Pack green channel of 8 pixels into ushort values in [ 0xFF00 .. 0 ] interval</summary> | |
static Vector128<ushort> packGreen( Vector128<int> a, Vector128<int> b, Vector128<int> mask ) | |
{ | |
a = Sse2.And( a, mask ); | |
b = Sse2.And( b, mask ); | |
return Sse41.PackUnsignedSaturate( a, b ); | |
} | |
/// <summary>Pack blue channel of 8 pixels into ushort values in [ 0xFF00 .. 0 ] interval</summary> | |
static Vector128<ushort> packBlue( Vector128<int> a, Vector128<int> b, Vector128<int> mask ) | |
{ | |
a = Sse2.ShiftRightLogical128BitLane( a, 1 ); | |
b = Sse2.ShiftRightLogical128BitLane( b, 1 ); | |
a = Sse2.And( a, mask ); | |
b = Sse2.And( b, mask ); | |
return Sse41.PackUnsignedSaturate( a, b ); | |
} | |
/// <summary>Split 8 RGBA pixels into RGB channels, 16 bit / channel.</summary> | |
static void packRgb( Vector128<int> a, Vector128<int> b, out Vector128<ushort> red, out Vector128<ushort> green, out Vector128<ushort> blue, Vector128<int> lowByte, Vector128<int> secondByte ) | |
{ | |
red = packRed( a, b, lowByte ); | |
green = packGreen( a, b, secondByte ); | |
blue = packBlue( a, b, secondByte ); | |
} | |
/// <summary>Compute brightness of 8 pixels</summary> | |
static Vector128<short> brightness( Vector128<ushort> r, Vector128<ushort> g, Vector128<ushort> b, Vector128<ushort> redMul, Vector128<ushort> greenMul, Vector128<ushort> blueMul ) | |
{ | |
r = Sse2.MultiplyHigh( r, redMul ); | |
g = Sse2.MultiplyHigh( g, greenMul ); | |
b = Sse2.MultiplyHigh( b, blueMul ); | |
var result = Sse2.AddSaturate( Sse2.AddSaturate( r, g ), b ); | |
return Vector128.AsInt16( Sse2.ShiftRightLogical( result, 8 ) ); | |
} | |
const ushort mulRed = (ushort)( 0.29891 * 0x10000 ); | |
const ushort mulGreen = (ushort)( 0.58661 * 0x10000 ); | |
const ushort mulBlue = (ushort)( 0.11448 * 0x10000 ); | |
/// <summary>Convert buffer from RGBA to grayscale.</summary> | |
/// <remarks> | |
/// <para>If your image has line paddings, you'll want to call this once per line, not for the complete image.</para> | |
/// <para>If width of the image is not multiple of 16 pixels, you'll need to do more work to handle the last few pixels of every line.</para> | |
/// </remarks> | |
static unsafe void convertToGrayscale( byte* src, byte* dst, long count ) | |
{ | |
var lowByte = Vector128.Create( 0xFF ); | |
var secondByte = Vector128.Create( 0xFF00 ); | |
var redMul = Vector128.Create( mulRed ); | |
var greenMul = Vector128.Create( mulGreen ); | |
var blueMul = Vector128.Create( mulBlue ); | |
byte* srcEnd = src + count * 4; | |
while( src < srcEnd ) | |
{ | |
var p1 = load4( src ); | |
var p2 = load4( src + 16 ); | |
var p3 = load4( src + 32 ); | |
var p4 = load4( src + 48 ); | |
packRgb( p1, p2, out var r, out var g, out var b, lowByte, secondByte ); | |
var low = brightness( r, g, b, redMul, greenMul, blueMul ); | |
packRgb( p3, p4, out r, out g, out b, lowByte, secondByte ); | |
var hi = brightness( r, g, b, redMul, greenMul, blueMul ); | |
var bytes = Sse2.PackUnsignedSaturate( low, hi ); | |
Sse2.Store( dst, bytes ); | |
src += 64; | |
dst += 16; | |
} | |
} | |
const long count = 1024 * 1024 * 511; | |
static unsafe void Main( string[] args ) | |
{ | |
byte[] source = new byte[ 4 * count ]; | |
new Random( 11 ).NextBytes( source ); | |
byte[] dest = new byte[ count ]; | |
Stopwatch sw; | |
fixed( byte* pSource = source ) | |
fixed( byte* pDest = dest ) | |
{ | |
sw = Stopwatch.StartNew(); | |
convertToGrayscale( pSource, pDest, count ); | |
sw.Stop(); | |
} | |
Console.WriteLine( "{0}ms", sw.Elapsed.TotalMilliseconds ); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment